From 25559c22cef879c5cf7119540bfe21fb379d29f3 Mon Sep 17 00:00:00 2001
From: Jens Wiklander <jens.wiklander@linaro.org>
Date: Mon, 9 Jul 2018 08:15:49 +0200
Subject: tee: add kernel internal client interface

Adds a kernel internal TEE client interface to be used by other drivers.

Reviewed-by: Sumit Garg <sumit.garg@linaro.org>
Tested-by: Sumit Garg <sumit.garg@linaro.org>
Tested-by: Zeng Tao <prime.zeng@hisilicon.com>
Signed-off-by: Jens Wiklander <jens.wiklander@linaro.org>
---
 include/linux/tee_drv.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h
index a2b3dfcee0b5..6cfe05893a76 100644
--- a/include/linux/tee_drv.h
+++ b/include/linux/tee_drv.h
@@ -453,6 +453,79 @@ static inline int tee_shm_get_id(struct tee_shm *shm)
  */
 struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id);
 
+/**
+ * tee_client_open_context() - Open a TEE context
+ * @start:	if not NULL, continue search after this context
+ * @match:	function to check TEE device
+ * @data:	data for match function
+ * @vers:	if not NULL, version data of TEE device of the context returned
+ *
+ * This function does an operation similar to open("/dev/teeX") in user space.
+ * A returned context must be released with tee_client_close_context().
+ *
+ * Returns a TEE context of the first TEE device matched by the match()
+ * callback or an ERR_PTR.
+ */
+struct tee_context *
+tee_client_open_context(struct tee_context *start,
+			int (*match)(struct tee_ioctl_version_data *,
+				     const void *),
+			const void *data, struct tee_ioctl_version_data *vers);
+
+/**
+ * tee_client_close_context() - Close a TEE context
+ * @ctx:	TEE context to close
+ *
+ * Note that all sessions previously opened with this context will be
+ * closed when this function is called.
+ */
+void tee_client_close_context(struct tee_context *ctx);
+
+/**
+ * tee_client_get_version() - Query version of TEE
+ * @ctx:	TEE context to TEE to query
+ * @vers:	Pointer to version data
+ */
+void tee_client_get_version(struct tee_context *ctx,
+			    struct tee_ioctl_version_data *vers);
+
+/**
+ * tee_client_open_session() - Open a session to a Trusted Application
+ * @ctx:	TEE context
+ * @arg:	Open session arguments, see description of
+ *		struct tee_ioctl_open_session_arg
+ * @param:	Parameters passed to the Trusted Application
+ *
+ * Returns < 0 on error else see @arg->ret for result. If @arg->ret
+ * is TEEC_SUCCESS the session identifier is available in @arg->session.
+ */
+int tee_client_open_session(struct tee_context *ctx,
+			    struct tee_ioctl_open_session_arg *arg,
+			    struct tee_param *param);
+
+/**
+ * tee_client_close_session() - Close a session to a Trusted Application
+ * @ctx:	TEE Context
+ * @session:	Session id
+ *
+ * Return < 0 on error else 0, regardless the session will not be
+ * valid after this function has returned.
+ */
+int tee_client_close_session(struct tee_context *ctx, u32 session);
+
+/**
+ * tee_client_invoke_func() - Invoke a function in a Trusted Application
+ * @ctx:	TEE Context
+ * @arg:	Invoke arguments, see description of
+ *		struct tee_ioctl_invoke_arg
+ * @param:	Parameters passed to the Trusted Application
+ *
+ * Returns < 0 on error else see @arg->ret for result.
+ */
+int tee_client_invoke_func(struct tee_context *ctx,
+			   struct tee_ioctl_invoke_arg *arg,
+			   struct tee_param *param);
+
 static inline bool tee_param_is_memref(struct tee_param *param)
 {
 	switch (param->attr & TEE_IOCTL_PARAM_ATTR_TYPE_MASK) {
-- 
cgit v1.2.3


From 0c4a1049cf298721eaec4553d3d5039798086e12 Mon Sep 17 00:00:00 2001
From: Sanyog Kale <sanyog.r.kale@intel.com>
Date: Fri, 27 Jul 2018 14:44:13 +0530
Subject: soundwire: Add support to lock across bus instances

Currently, the stream concept is limited to single Master and one
or more Codecs.

This patch extends the concept to support multiple Master(s)
sharing the same reference clock and synchronized in the hardware.
Modify sdw_stream_runtime to support a list of sdw_master_runtime
for the same. The existing reference to a single m_rt is removed
in the next patch.

Typically to lock, one would acquire a global lock and then lock
bus instances. In this case, the caller framework(ASoC DPCM)
guarantees that stream operations on a card are always serialized.
So, there is no race condition and hence no need for global lock.

Bus lock(s) are acquired to reconfigure the bus while the stream
is set-up.
So, we add sdw_acquire_bus_lock()/sdw_release_bus_lock() APIs which
are used only to reconfigure the bus.

Signed-off-by: Sanyog Kale <sanyog.r.kale@intel.com>
Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/bus.h       |  2 ++
 drivers/soundwire/stream.c    | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/soundwire/sdw.h |  4 ++++
 3 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soundwire/bus.h b/drivers/soundwire/bus.h
index 3b15c4e25a3a..b6cfbdfc47d5 100644
--- a/drivers/soundwire/bus.h
+++ b/drivers/soundwire/bus.h
@@ -99,6 +99,7 @@ struct sdw_slave_runtime {
  * this stream, can be zero.
  * @slave_rt_list: Slave runtime list
  * @port_list: List of Master Ports configured for this stream, can be zero.
+ * @stream_node: sdw_stream_runtime master_list node
  * @bus_node: sdw_bus m_rt_list node
  */
 struct sdw_master_runtime {
@@ -108,6 +109,7 @@ struct sdw_master_runtime {
 	unsigned int ch_count;
 	struct list_head slave_rt_list;
 	struct list_head port_list;
+	struct list_head stream_node;
 	struct list_head bus_node;
 };
 
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index e5c7e1ef6318..b52903fe6f31 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -747,6 +747,7 @@ struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name)
 		return NULL;
 
 	stream->name = stream_name;
+	INIT_LIST_HEAD(&stream->master_list);
 	stream->state = SDW_STREAM_ALLOCATED;
 
 	return stream;
@@ -1245,6 +1246,48 @@ struct sdw_dpn_prop *sdw_get_slave_dpn_prop(struct sdw_slave *slave,
 	return NULL;
 }
 
+/**
+ * sdw_acquire_bus_lock: Acquire bus lock for all Master runtime(s)
+ *
+ * @stream: SoundWire stream
+ *
+ * Acquire bus_lock for each of the master runtime(m_rt) part of this
+ * stream to reconfigure the bus.
+ * NOTE: This function is called from SoundWire stream ops and is
+ * expected that a global lock is held before acquiring bus_lock.
+ */
+static void sdw_acquire_bus_lock(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
+
+	/* Iterate for all Master(s) in Master list */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+
+		mutex_lock(&bus->bus_lock);
+	}
+}
+
+/**
+ * sdw_release_bus_lock: Release bus lock for all Master runtime(s)
+ *
+ * @stream: SoundWire stream
+ *
+ * Release the previously held bus_lock after reconfiguring the bus.
+ */
+static void sdw_release_bus_lock(struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
+
+	/* Iterate for all Master(s) in Master list */
+	list_for_each_entry_reverse(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		mutex_unlock(&bus->bus_lock);
+	}
+}
+
 static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
 {
 	struct sdw_master_runtime *m_rt = stream->m_rt;
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 962971e6a9c7..ccd8dcdf06ab 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -769,6 +769,9 @@ struct sdw_stream_params {
  * @state: Current state of the stream
  * @type: Stream type PCM or PDM
  * @m_rt: Master runtime
+ * @master_list: List of Master runtime(s) in this stream.
+ * master_list can contain only one m_rt per Master instance
+ * for a stream
  */
 struct sdw_stream_runtime {
 	char *name;
@@ -776,6 +779,7 @@ struct sdw_stream_runtime {
 	enum sdw_stream_state state;
 	enum sdw_stream_type type;
 	struct sdw_master_runtime *m_rt;
+	struct list_head master_list;
 };
 
 struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name);
-- 
cgit v1.2.3


From 48949722ced4daacfa32f13c221f173b87231ead Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Fri, 27 Jul 2018 14:44:14 +0530
Subject: soundwire: Handle multiple master instances in a stream

For each SoundWire stream operation, we need to parse master
list and operate upon all master runtime.

This is a preparatory patch to do the boilerplate conversion
of stream handling from single master runtime to handle a
list of master runtime. The code to support bank switch for
multiple master instances is added in the next patch.

Signed-off-by: Sanyog Kale <sanyog.r.kale@intel.com>
Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/stream.c    | 311 +++++++++++++++++++++++++-----------------
 include/linux/soundwire/sdw.h |   2 -
 2 files changed, 188 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index b52903fe6f31..ee024d72dd7b 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -681,35 +681,45 @@ error_1:
 
 static int do_bank_switch(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_master_runtime *m_rt = NULL;
 	const struct sdw_master_ops *ops;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_bus *bus = NULL;
 	int ret = 0;
 
-	ops = bus->ops;
 
-	/* Pre-bank switch */
-	if (ops->pre_bank_switch) {
-		ret = ops->pre_bank_switch(bus);
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		ops = bus->ops;
+
+		/* Pre-bank switch */
+		if (ops->pre_bank_switch) {
+			ret = ops->pre_bank_switch(bus);
+			if (ret < 0) {
+				dev_err(bus->dev,
+					"Pre bank switch op failed: %d", ret);
+				return ret;
+			}
+		}
+
+		/* Bank switch */
+		ret = sdw_bank_switch(bus);
 		if (ret < 0) {
-			dev_err(bus->dev, "Pre bank switch op failed: %d", ret);
+			dev_err(bus->dev, "Bank switch failed: %d", ret);
 			return ret;
 		}
 	}
 
-	/* Bank switch */
-	ret = sdw_bank_switch(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Bank switch failed: %d", ret);
-		return ret;
-	}
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		ops = bus->ops;
 
-	/* Post-bank switch */
-	if (ops->post_bank_switch) {
-		ret = ops->post_bank_switch(bus);
-		if (ret < 0) {
-			dev_err(bus->dev,
+		/* Post-bank switch */
+		if (ops->post_bank_switch) {
+			ret = ops->post_bank_switch(bus);
+			if (ret < 0) {
+				dev_err(bus->dev,
 					"Post bank switch op failed: %d", ret);
+			}
 		}
 	}
 
@@ -754,6 +764,21 @@ struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name)
 }
 EXPORT_SYMBOL(sdw_alloc_stream);
 
+static struct sdw_master_runtime
+*sdw_find_master_rt(struct sdw_bus *bus,
+			struct sdw_stream_runtime *stream)
+{
+	struct sdw_master_runtime *m_rt = NULL;
+
+	/* Retrieve Bus handle if already available */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		if (m_rt->bus == bus)
+			return m_rt;
+	}
+
+	return NULL;
+}
+
 /**
  * sdw_alloc_master_rt() - Allocates and initialize Master runtime handle
  *
@@ -770,12 +795,11 @@ static struct sdw_master_runtime
 {
 	struct sdw_master_runtime *m_rt;
 
-	m_rt = stream->m_rt;
-
 	/*
 	 * check if Master is already allocated (as a result of Slave adding
 	 * it first), if so skip allocation and go to configure
 	 */
+	m_rt = sdw_find_master_rt(bus, stream);
 	if (m_rt)
 		goto stream_config;
 
@@ -786,7 +810,7 @@ static struct sdw_master_runtime
 	/* Initialization of Master runtime handle */
 	INIT_LIST_HEAD(&m_rt->port_list);
 	INIT_LIST_HEAD(&m_rt->slave_rt_list);
-	stream->m_rt = m_rt;
+	list_add_tail(&m_rt->stream_node, &stream->master_list);
 
 	list_add_tail(&m_rt->bus_node, &bus->m_rt_list);
 
@@ -844,17 +868,21 @@ static void sdw_slave_port_release(struct sdw_bus *bus,
 			struct sdw_stream_runtime *stream)
 {
 	struct sdw_port_runtime *p_rt, *_p_rt;
-	struct sdw_master_runtime *m_rt = stream->m_rt;
+	struct sdw_master_runtime *m_rt;
 	struct sdw_slave_runtime *s_rt;
 
-	list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
-		if (s_rt->slave != slave)
-			continue;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) {
 
-		list_for_each_entry_safe(p_rt, _p_rt,
-				&s_rt->port_list, port_node) {
-			list_del(&p_rt->port_node);
-			kfree(p_rt);
+			if (s_rt->slave != slave)
+				continue;
+
+			list_for_each_entry_safe(p_rt, _p_rt,
+					&s_rt->port_list, port_node) {
+
+				list_del(&p_rt->port_node);
+				kfree(p_rt);
+			}
 		}
 	}
 }
@@ -871,16 +899,18 @@ static void sdw_release_slave_stream(struct sdw_slave *slave,
 			struct sdw_stream_runtime *stream)
 {
 	struct sdw_slave_runtime *s_rt, *_s_rt;
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-
-	/* Retrieve Slave runtime handle */
-	list_for_each_entry_safe(s_rt, _s_rt,
-			&m_rt->slave_rt_list, m_rt_node) {
+	struct sdw_master_runtime *m_rt;
 
-		if (s_rt->slave == slave) {
-			list_del(&s_rt->m_rt_node);
-			kfree(s_rt);
-			return;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		/* Retrieve Slave runtime handle */
+		list_for_each_entry_safe(s_rt, _s_rt,
+					&m_rt->slave_rt_list, m_rt_node) {
+
+			if (s_rt->slave == slave) {
+				list_del(&s_rt->m_rt_node);
+				kfree(s_rt);
+				return;
+			}
 		}
 	}
 }
@@ -888,6 +918,7 @@ static void sdw_release_slave_stream(struct sdw_slave *slave,
 /**
  * sdw_release_master_stream() - Free Master runtime handle
  *
+ * @m_rt: Master runtime node
  * @stream: Stream runtime handle.
  *
  * This function is to be called with bus_lock held
@@ -895,9 +926,9 @@ static void sdw_release_slave_stream(struct sdw_slave *slave,
  * handle. If this is called first then sdw_release_slave_stream() will have
  * no effect as Slave(s) runtime handle would already be freed up.
  */
-static void sdw_release_master_stream(struct sdw_stream_runtime *stream)
+static void sdw_release_master_stream(struct sdw_master_runtime *m_rt,
+			struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
 	struct sdw_slave_runtime *s_rt, *_s_rt;
 
 	list_for_each_entry_safe(s_rt, _s_rt, &m_rt->slave_rt_list, m_rt_node) {
@@ -905,7 +936,9 @@ static void sdw_release_master_stream(struct sdw_stream_runtime *stream)
 		sdw_release_slave_stream(s_rt->slave, stream);
 	}
 
+	list_del(&m_rt->stream_node);
 	list_del(&m_rt->bus_node);
+	kfree(m_rt);
 }
 
 /**
@@ -919,13 +952,22 @@ static void sdw_release_master_stream(struct sdw_stream_runtime *stream)
 int sdw_stream_remove_master(struct sdw_bus *bus,
 		struct sdw_stream_runtime *stream)
 {
+	struct sdw_master_runtime *m_rt, *_m_rt;
+
 	mutex_lock(&bus->bus_lock);
 
-	sdw_release_master_stream(stream);
-	sdw_master_port_release(bus, stream->m_rt);
-	stream->state = SDW_STREAM_RELEASED;
-	kfree(stream->m_rt);
-	stream->m_rt = NULL;
+	list_for_each_entry_safe(m_rt, _m_rt,
+			&stream->master_list, stream_node) {
+
+		if (m_rt->bus != bus)
+			continue;
+
+		sdw_master_port_release(bus, m_rt);
+		sdw_release_master_stream(m_rt, stream);
+	}
+
+	if (list_empty(&stream->master_list))
+		stream->state = SDW_STREAM_RELEASED;
 
 	mutex_unlock(&bus->bus_lock);
 
@@ -1128,7 +1170,7 @@ int sdw_stream_add_master(struct sdw_bus *bus,
 	goto unlock;
 
 stream_error:
-	sdw_release_master_stream(stream);
+	sdw_release_master_stream(m_rt, stream);
 unlock:
 	mutex_unlock(&bus->bus_lock);
 	return ret;
@@ -1206,7 +1248,7 @@ stream_error:
 	 * we hit error so cleanup the stream, release all Slave(s) and
 	 * Master runtime
 	 */
-	sdw_release_master_stream(stream);
+	sdw_release_master_stream(m_rt, stream);
 error:
 	mutex_unlock(&slave->bus->bus_lock);
 	return ret;
@@ -1275,6 +1317,8 @@ static void sdw_acquire_bus_lock(struct sdw_stream_runtime *stream)
  * @stream: SoundWire stream
  *
  * Release the previously held bus_lock after reconfiguring the bus.
+ * NOTE: This function is called from SoundWire stream ops and is
+ * expected that a global lock is held before releasing bus_lock.
  */
 static void sdw_release_bus_lock(struct sdw_stream_runtime *stream)
 {
@@ -1290,31 +1334,36 @@ static void sdw_release_bus_lock(struct sdw_stream_runtime *stream)
 
 static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
 	struct sdw_master_prop *prop = NULL;
 	struct sdw_bus_params params;
 	int ret;
 
-	prop = &bus->prop;
-	memcpy(&params, &bus->params, sizeof(params));
+	/* Prepare  Master(s) and Slave(s) port(s) associated with stream */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		prop = &bus->prop;
+		memcpy(&params, &bus->params, sizeof(params));
 
-	/* TODO: Support Asynchronous mode */
-	if ((prop->max_freq % stream->params.rate) != 0) {
-		dev_err(bus->dev, "Async mode not supported");
-		return -EINVAL;
-	}
+		/* TODO: Support Asynchronous mode */
+		if ((prop->max_freq % stream->params.rate) != 0) {
+			dev_err(bus->dev, "Async mode not supported");
+			return -EINVAL;
+		}
 
-	/* Increment cumulative bus bandwidth */
-	/* TODO: Update this during Device-Device support */
-	bus->params.bandwidth += m_rt->stream->params.rate *
-		m_rt->ch_count * m_rt->stream->params.bps;
+		/* Increment cumulative bus bandwidth */
+		/* TODO: Update this during Device-Device support */
+		bus->params.bandwidth += m_rt->stream->params.rate *
+			m_rt->ch_count * m_rt->stream->params.bps;
+
+		/* Program params */
+		ret = sdw_program_params(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Program params failed: %d", ret);
+			goto restore_params;
+		}
 
-	/* Program params */
-	ret = sdw_program_params(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Program params failed: %d", ret);
-		goto restore_params;
 	}
 
 	ret = do_bank_switch(stream);
@@ -1323,12 +1372,16 @@ static int _sdw_prepare_stream(struct sdw_stream_runtime *stream)
 		goto restore_params;
 	}
 
-	/* Prepare port(s) on the new clock configuration */
-	ret = sdw_prep_deprep_ports(m_rt, true);
-	if (ret < 0) {
-		dev_err(bus->dev, "Prepare port(s) failed ret = %d",
-				ret);
-		return ret;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+
+		/* Prepare port(s) on the new clock configuration */
+		ret = sdw_prep_deprep_ports(m_rt, true);
+		if (ret < 0) {
+			dev_err(bus->dev, "Prepare port(s) failed ret = %d",
+					ret);
+			return ret;
+		}
 	}
 
 	stream->state = SDW_STREAM_PREPARED;
@@ -1356,35 +1409,40 @@ int sdw_prepare_stream(struct sdw_stream_runtime *stream)
 		return -EINVAL;
 	}
 
-	mutex_lock(&stream->m_rt->bus->bus_lock);
+	sdw_acquire_bus_lock(stream);
 
 	ret = _sdw_prepare_stream(stream);
 	if (ret < 0)
 		pr_err("Prepare for stream:%s failed: %d", stream->name, ret);
 
-	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	sdw_release_bus_lock(stream);
 	return ret;
 }
 EXPORT_SYMBOL(sdw_prepare_stream);
 
 static int _sdw_enable_stream(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
 	int ret;
 
-	/* Program params */
-	ret = sdw_program_params(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Program params failed: %d", ret);
-		return ret;
-	}
+	/* Enable Master(s) and Slave(s) port(s) associated with stream */
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
 
-	/* Enable port(s) */
-	ret = sdw_enable_disable_ports(m_rt, true);
-	if (ret < 0) {
-		dev_err(bus->dev, "Enable port(s) failed ret: %d", ret);
-		return ret;
+		/* Program params */
+		ret = sdw_program_params(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Program params failed: %d", ret);
+			return ret;
+		}
+
+		/* Enable port(s) */
+		ret = sdw_enable_disable_ports(m_rt, true);
+		if (ret < 0) {
+			dev_err(bus->dev, "Enable port(s) failed ret: %d", ret);
+			return ret;
+		}
 	}
 
 	ret = do_bank_switch(stream);
@@ -1413,37 +1471,42 @@ int sdw_enable_stream(struct sdw_stream_runtime *stream)
 		return -EINVAL;
 	}
 
-	mutex_lock(&stream->m_rt->bus->bus_lock);
+	sdw_acquire_bus_lock(stream);
 
 	ret = _sdw_enable_stream(stream);
 	if (ret < 0)
 		pr_err("Enable for stream:%s failed: %d", stream->name, ret);
 
-	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	sdw_release_bus_lock(stream);
 	return ret;
 }
 EXPORT_SYMBOL(sdw_enable_stream);
 
 static int _sdw_disable_stream(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
 	int ret;
 
-	/* Disable port(s) */
-	ret = sdw_enable_disable_ports(m_rt, false);
-	if (ret < 0) {
-		dev_err(bus->dev, "Disable port(s) failed: %d", ret);
-		return ret;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		/* Disable port(s) */
+		ret = sdw_enable_disable_ports(m_rt, false);
+		if (ret < 0) {
+			dev_err(bus->dev, "Disable port(s) failed: %d", ret);
+			return ret;
+		}
 	}
-
 	stream->state = SDW_STREAM_DISABLED;
 
-	/* Program params */
-	ret = sdw_program_params(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Program params failed: %d", ret);
-		return ret;
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		/* Program params */
+		ret = sdw_program_params(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Program params failed: %d", ret);
+			return ret;
+		}
 	}
 
 	return do_bank_switch(stream);
@@ -1465,43 +1528,46 @@ int sdw_disable_stream(struct sdw_stream_runtime *stream)
 		return -EINVAL;
 	}
 
-	mutex_lock(&stream->m_rt->bus->bus_lock);
+	sdw_acquire_bus_lock(stream);
 
 	ret = _sdw_disable_stream(stream);
 	if (ret < 0)
 		pr_err("Disable for stream:%s failed: %d", stream->name, ret);
 
-	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	sdw_release_bus_lock(stream);
 	return ret;
 }
 EXPORT_SYMBOL(sdw_disable_stream);
 
 static int _sdw_deprepare_stream(struct sdw_stream_runtime *stream)
 {
-	struct sdw_master_runtime *m_rt = stream->m_rt;
-	struct sdw_bus *bus = m_rt->bus;
+	struct sdw_master_runtime *m_rt = NULL;
+	struct sdw_bus *bus = NULL;
 	int ret = 0;
 
-	/* De-prepare port(s) */
-	ret = sdw_prep_deprep_ports(m_rt, false);
-	if (ret < 0) {
-		dev_err(bus->dev, "De-prepare port(s) failed: %d", ret);
-		return ret;
-	}
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+		bus = m_rt->bus;
+		/* De-prepare port(s) */
+		ret = sdw_prep_deprep_ports(m_rt, false);
+		if (ret < 0) {
+			dev_err(bus->dev, "De-prepare port(s) failed: %d", ret);
+			return ret;
+		}
 
-	stream->state = SDW_STREAM_DEPREPARED;
+		/* TODO: Update this during Device-Device support */
+		bus->params.bandwidth -= m_rt->stream->params.rate *
+			m_rt->ch_count * m_rt->stream->params.bps;
 
-	/* TODO: Update this during Device-Device support */
-	bus->params.bandwidth -= m_rt->stream->params.rate *
-		m_rt->ch_count * m_rt->stream->params.bps;
+		/* Program params */
+		ret = sdw_program_params(bus);
+		if (ret < 0) {
+			dev_err(bus->dev, "Program params failed: %d", ret);
+			return ret;
+		}
 
-	/* Program params */
-	ret = sdw_program_params(bus);
-	if (ret < 0) {
-		dev_err(bus->dev, "Program params failed: %d", ret);
-		return ret;
 	}
 
+	stream->state = SDW_STREAM_DEPREPARED;
 	return do_bank_switch(stream);
 }
 
@@ -1521,13 +1587,12 @@ int sdw_deprepare_stream(struct sdw_stream_runtime *stream)
 		return -EINVAL;
 	}
 
-	mutex_lock(&stream->m_rt->bus->bus_lock);
-
+	sdw_acquire_bus_lock(stream);
 	ret = _sdw_deprepare_stream(stream);
 	if (ret < 0)
 		pr_err("De-prepare for stream:%d failed: %d", ret, ret);
 
-	mutex_unlock(&stream->m_rt->bus->bus_lock);
+	sdw_release_bus_lock(stream);
 	return ret;
 }
 EXPORT_SYMBOL(sdw_deprepare_stream);
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index ccd8dcdf06ab..03df709fb8ef 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -768,7 +768,6 @@ struct sdw_stream_params {
  * @params: Stream parameters
  * @state: Current state of the stream
  * @type: Stream type PCM or PDM
- * @m_rt: Master runtime
  * @master_list: List of Master runtime(s) in this stream.
  * master_list can contain only one m_rt per Master instance
  * for a stream
@@ -778,7 +777,6 @@ struct sdw_stream_runtime {
 	struct sdw_stream_params params;
 	enum sdw_stream_state state;
 	enum sdw_stream_type type;
-	struct sdw_master_runtime *m_rt;
 	struct list_head master_list;
 };
 
-- 
cgit v1.2.3


From ce6e74d008ff5c8b43e3bafaa7343bf7eb69593e Mon Sep 17 00:00:00 2001
From: Shreyas NC <shreyas.nc@intel.com>
Date: Fri, 27 Jul 2018 14:44:16 +0530
Subject: soundwire: Add support for multi link bank switch

In cases of multiple Masters in a stream, synchronization
between multiple Master(s) is achieved by performing bank switch
together and using Master methods.

Add sdw_ml_bank_switch() to wait for completion of bank switch.

Signed-off-by: Sanyog Kale <sanyog.r.kale@intel.com>
Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/bus.c       |   5 ++
 drivers/soundwire/bus.h       |   2 +
 drivers/soundwire/stream.c    | 145 ++++++++++++++++++++++++++++++++++++++----
 include/linux/soundwire/sdw.h |   4 ++
 4 files changed, 144 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index dbabd5e69343..1cbfedfc20ef 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -35,6 +35,11 @@ int sdw_add_bus_master(struct sdw_bus *bus)
 	INIT_LIST_HEAD(&bus->slaves);
 	INIT_LIST_HEAD(&bus->m_rt_list);
 
+	/*
+	 * Initialize multi_link flag
+	 * TODO: populate this flag by reading property from FW node
+	 */
+	bus->multi_link = false;
 	if (bus->ops->read_prop) {
 		ret = bus->ops->read_prop(bus);
 		if (ret < 0) {
diff --git a/drivers/soundwire/bus.h b/drivers/soundwire/bus.h
index b6cfbdfc47d5..c77de05b8100 100644
--- a/drivers/soundwire/bus.h
+++ b/drivers/soundwire/bus.h
@@ -4,6 +4,8 @@
 #ifndef __SDW_BUS_H
 #define __SDW_BUS_H
 
+#define DEFAULT_BANK_SWITCH_TIMEOUT 3000
+
 #if IS_ENABLED(CONFIG_ACPI)
 int sdw_acpi_find_slaves(struct sdw_bus *bus);
 #else
diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index ee024d72dd7b..3d98f20cbd6a 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -626,9 +626,10 @@ static int sdw_program_params(struct sdw_bus *bus)
 	return ret;
 }
 
-static int sdw_bank_switch(struct sdw_bus *bus)
+static int sdw_bank_switch(struct sdw_bus *bus, int m_rt_count)
 {
 	int col_index, row_index;
+	bool multi_link;
 	struct sdw_msg *wr_msg;
 	u8 *wbuf = NULL;
 	int ret = 0;
@@ -638,6 +639,8 @@ static int sdw_bank_switch(struct sdw_bus *bus)
 	if (!wr_msg)
 		return -ENOMEM;
 
+	bus->defer_msg.msg = wr_msg;
+
 	wbuf = kzalloc(sizeof(*wbuf), GFP_KERNEL);
 	if (!wbuf) {
 		ret = -ENOMEM;
@@ -658,17 +661,29 @@ static int sdw_bank_switch(struct sdw_bus *bus)
 					SDW_MSG_FLAG_WRITE, wbuf);
 	wr_msg->ssp_sync = true;
 
-	ret = sdw_transfer(bus, wr_msg);
+	/*
+	 * Set the multi_link flag only when both the hardware supports
+	 * and there is a stream handled by multiple masters
+	 */
+	multi_link = bus->multi_link && (m_rt_count > 1);
+
+	if (multi_link)
+		ret = sdw_transfer_defer(bus, wr_msg, &bus->defer_msg);
+	else
+		ret = sdw_transfer(bus, wr_msg);
+
 	if (ret < 0) {
 		dev_err(bus->dev, "Slave frame_ctrl reg write failed");
 		goto error;
 	}
 
-	kfree(wr_msg);
-	kfree(wbuf);
-	bus->defer_msg.msg = NULL;
-	bus->params.curr_bank = !bus->params.curr_bank;
-	bus->params.next_bank = !bus->params.next_bank;
+	if (!multi_link) {
+		kfree(wr_msg);
+		kfree(wbuf);
+		bus->defer_msg.msg = NULL;
+		bus->params.curr_bank = !bus->params.curr_bank;
+		bus->params.next_bank = !bus->params.next_bank;
+	}
 
 	return 0;
 
@@ -679,36 +694,87 @@ error_1:
 	return ret;
 }
 
+/**
+ * sdw_ml_sync_bank_switch: Multilink register bank switch
+ *
+ * @bus: SDW bus instance
+ *
+ * Caller function should free the buffers on error
+ */
+static int sdw_ml_sync_bank_switch(struct sdw_bus *bus)
+{
+	unsigned long time_left;
+
+	if (!bus->multi_link)
+		return 0;
+
+	/* Wait for completion of transfer */
+	time_left = wait_for_completion_timeout(&bus->defer_msg.complete,
+						bus->bank_switch_timeout);
+
+	if (!time_left) {
+		dev_err(bus->dev, "Controller Timed out on bank switch");
+		return -ETIMEDOUT;
+	}
+
+	bus->params.curr_bank = !bus->params.curr_bank;
+	bus->params.next_bank = !bus->params.next_bank;
+
+	if (bus->defer_msg.msg) {
+		kfree(bus->defer_msg.msg->buf);
+		kfree(bus->defer_msg.msg);
+	}
+
+	return 0;
+}
+
 static int do_bank_switch(struct sdw_stream_runtime *stream)
 {
 	struct sdw_master_runtime *m_rt = NULL;
 	const struct sdw_master_ops *ops;
 	struct sdw_bus *bus = NULL;
+	bool multi_link = false;
 	int ret = 0;
 
-
 	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
 		bus = m_rt->bus;
 		ops = bus->ops;
 
+		if (bus->multi_link) {
+			multi_link = true;
+			mutex_lock(&bus->msg_lock);
+		}
+
 		/* Pre-bank switch */
 		if (ops->pre_bank_switch) {
 			ret = ops->pre_bank_switch(bus);
 			if (ret < 0) {
 				dev_err(bus->dev,
 					"Pre bank switch op failed: %d", ret);
-				return ret;
+				goto msg_unlock;
 			}
 		}
 
-		/* Bank switch */
-		ret = sdw_bank_switch(bus);
+		/*
+		 * Perform Bank switch operation.
+		 * For multi link cases, the actual bank switch is
+		 * synchronized across all Masters and happens later as a
+		 * part of post_bank_switch ops.
+		 */
+		ret = sdw_bank_switch(bus, stream->m_rt_count);
 		if (ret < 0) {
 			dev_err(bus->dev, "Bank switch failed: %d", ret);
-			return ret;
+			goto error;
+
 		}
 	}
 
+	/*
+	 * For multi link cases, it is expected that the bank switch is
+	 * triggered by the post_bank_switch for the first Master in the list
+	 * and for the other Masters the post_bank_switch() should return doing
+	 * nothing.
+	 */
 	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
 		bus = m_rt->bus;
 		ops = bus->ops;
@@ -719,7 +785,47 @@ static int do_bank_switch(struct sdw_stream_runtime *stream)
 			if (ret < 0) {
 				dev_err(bus->dev,
 					"Post bank switch op failed: %d", ret);
+				goto error;
 			}
+		} else if (bus->multi_link && stream->m_rt_count > 1) {
+			dev_err(bus->dev,
+				"Post bank switch ops not implemented");
+			goto error;
+		}
+
+		/* Set the bank switch timeout to default, if not set */
+		if (!bus->bank_switch_timeout)
+			bus->bank_switch_timeout = DEFAULT_BANK_SWITCH_TIMEOUT;
+
+		/* Check if bank switch was successful */
+		ret = sdw_ml_sync_bank_switch(bus);
+		if (ret < 0) {
+			dev_err(bus->dev,
+				"multi link bank switch failed: %d", ret);
+			goto error;
+		}
+
+		mutex_unlock(&bus->msg_lock);
+	}
+
+	return ret;
+
+error:
+	list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+
+		bus = m_rt->bus;
+
+		kfree(bus->defer_msg.msg->buf);
+		kfree(bus->defer_msg.msg);
+	}
+
+msg_unlock:
+
+	if (multi_link) {
+		list_for_each_entry(m_rt, &stream->master_list, stream_node) {
+			bus = m_rt->bus;
+			if (mutex_is_locked(&bus->msg_lock))
+				mutex_unlock(&bus->msg_lock);
 		}
 	}
 
@@ -964,6 +1070,7 @@ int sdw_stream_remove_master(struct sdw_bus *bus,
 
 		sdw_master_port_release(bus, m_rt);
 		sdw_release_master_stream(m_rt, stream);
+		stream->m_rt_count--;
 	}
 
 	if (list_empty(&stream->master_list))
@@ -1150,6 +1257,18 @@ int sdw_stream_add_master(struct sdw_bus *bus,
 
 	mutex_lock(&bus->bus_lock);
 
+	/*
+	 * For multi link streams, add the second master only if
+	 * the bus supports it.
+	 * Check if bus->multi_link is set
+	 */
+	if (!bus->multi_link && stream->m_rt_count > 0) {
+		dev_err(bus->dev,
+			"Multilink not supported, link %d", bus->link_id);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
 	m_rt = sdw_alloc_master_rt(bus, stream_config, stream);
 	if (!m_rt) {
 		dev_err(bus->dev,
@@ -1167,6 +1286,8 @@ int sdw_stream_add_master(struct sdw_bus *bus,
 	if (ret)
 		goto stream_error;
 
+	stream->m_rt_count++;
+
 	goto unlock;
 
 stream_error:
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index 03df709fb8ef..c6aa2bf847c7 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -678,6 +678,9 @@ struct sdw_master_ops {
  * @defer_msg: Defer message
  * @clk_stop_timeout: Clock stop timeout computed
  * @bank_switch_timeout: Bank switch timeout computed
+ * @multi_link: Store bus property that indicates if multi links
+ * are supported. This flag is populated by drivers after reading
+ * appropriate firmware (ACPI/DT).
  */
 struct sdw_bus {
 	struct device *dev;
@@ -694,6 +697,7 @@ struct sdw_bus {
 	struct sdw_defer defer_msg;
 	unsigned int clk_stop_timeout;
 	u32 bank_switch_timeout;
+	bool multi_link;
 };
 
 int sdw_add_bus_master(struct sdw_bus *bus);
-- 
cgit v1.2.3


From 9b5c132a1ec98895fe40ba73a19e0a17293122e5 Mon Sep 17 00:00:00 2001
From: Shreyas NC <shreyas.nc@intel.com>
Date: Fri, 27 Jul 2018 14:44:15 +0530
Subject: soundwire: keep track of Masters in a stream

A multi link bankswitch can be done if the hardware supports and
the stream is handled by multiple Master(s).

This preparatory patch adds support to track m_rt in a stream.
Modifying m_rt_count and usage is added as part of the next patch.

Signed-off-by: Shreyas NC <shreyas.nc@intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/stream.c    | 1 +
 include/linux/soundwire/sdw.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c
index 3d98f20cbd6a..bd879b1a76c8 100644
--- a/drivers/soundwire/stream.c
+++ b/drivers/soundwire/stream.c
@@ -865,6 +865,7 @@ struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name)
 	stream->name = stream_name;
 	INIT_LIST_HEAD(&stream->master_list);
 	stream->state = SDW_STREAM_ALLOCATED;
+	stream->m_rt_count = 0;
 
 	return stream;
 }
diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h
index c6aa2bf847c7..df313913e856 100644
--- a/include/linux/soundwire/sdw.h
+++ b/include/linux/soundwire/sdw.h
@@ -775,6 +775,7 @@ struct sdw_stream_params {
  * @master_list: List of Master runtime(s) in this stream.
  * master_list can contain only one m_rt per Master instance
  * for a stream
+ * @m_rt_count: Count of Master runtime(s) in this stream
  */
 struct sdw_stream_runtime {
 	char *name;
@@ -782,6 +783,7 @@ struct sdw_stream_runtime {
 	enum sdw_stream_state state;
 	enum sdw_stream_type type;
 	struct list_head master_list;
+	int m_rt_count;
 };
 
 struct sdw_stream_runtime *sdw_alloc_stream(char *stream_name);
-- 
cgit v1.2.3


From 976516404ff3fab2a8caa8bd6f5efc1437fed0b8 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Aug 2018 14:02:57 +0200
Subject: y2038: remove unused time interfaces

After many small patches, at least some of the deprecated interfaces
have no remaining users any more and can be removed:

  current_kernel_time
  do_settimeofday
  get_monotonic_boottime
  get_monotonic_boottime64
  get_monotonic_coarse
  get_monotonic_coarse64
  getrawmonotonic64
  ktime_get_real_ts
  timekeeping_clocktai
  timespec_trunc
  timespec_valid_strict
  time_to_tm

For many of the remaining time functions, we are missing one or
two patches that failed to make it into 4.19, they will be removed
in the following merge window.

The replacement functions for the removed interfaces are documented in
Documentation/core-api/timekeeping.rst.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/time32.h        | 25 ----------------------
 include/linux/timekeeping.h   | 12 -----------
 include/linux/timekeeping32.h | 50 -------------------------------------------
 kernel/time/time.c            | 24 ---------------------
 4 files changed, 111 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time32.h b/include/linux/time32.h
index d1ae43c13e25..92c0ca092d93 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -105,16 +105,6 @@ static inline bool timespec_valid(const struct timespec *ts)
 	return true;
 }
 
-static inline bool timespec_valid_strict(const struct timespec *ts)
-{
-	if (!timespec_valid(ts))
-		return false;
-	/* Disallow values that could overflow ktime_t */
-	if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
-		return false;
-	return true;
-}
-
 /**
  * timespec_to_ns - Convert timespec to nanoseconds
  * @ts:		pointer to the timespec variable to be converted
@@ -149,19 +139,6 @@ static __always_inline void timespec_add_ns(struct timespec *a, u64 ns)
 	a->tv_nsec = ns;
 }
 
-/**
- * time_to_tm - converts the calendar time to local broken-down time
- *
- * @totalsecs	the number of seconds elapsed since 00:00:00 on January 1, 1970,
- *		Coordinated Universal Time (UTC).
- * @offset	offset seconds adding to totalsecs.
- * @result	pointer to struct tm variable to receive broken-down time
- */
-static inline void time_to_tm(time_t totalsecs, int offset, struct tm *result)
-{
-	time64_to_tm(totalsecs, offset, result);
-}
-
 static inline unsigned long mktime(const unsigned int year,
 			const unsigned int mon, const unsigned int day,
 			const unsigned int hour, const unsigned int min,
@@ -183,8 +160,6 @@ static inline bool timeval_valid(const struct timeval *tv)
 	return true;
 }
 
-extern struct timespec timespec_trunc(struct timespec t, unsigned int gran);
-
 /**
  * timeval_to_ns - Convert timeval to nanoseconds
  * @ts:		pointer to the timeval variable to be converted
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 5d738804e3d6..3faba06411aa 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -266,9 +266,6 @@ extern int update_persistent_clock64(struct timespec64 now);
  * deprecated aliases, don't use in new code
  */
 #define getnstimeofday64(ts)		ktime_get_real_ts64(ts)
-#define get_monotonic_boottime64(ts)	ktime_get_boottime_ts64(ts)
-#define getrawmonotonic64(ts)		ktime_get_raw_ts64(ts)
-#define timekeeping_clocktai64(ts)	ktime_get_clocktai_ts64(ts)
 
 static inline struct timespec64 current_kernel_time64(void)
 {
@@ -279,13 +276,4 @@ static inline struct timespec64 current_kernel_time64(void)
 	return ts;
 }
 
-static inline struct timespec64 get_monotonic_coarse64(void)
-{
-	struct timespec64 ts;
-
-	ktime_get_coarse_ts64(&ts);
-
-	return ts;
-}
-
 #endif
diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h
index 8762c2f45f8b..a4a4991160fb 100644
--- a/include/linux/timekeeping32.h
+++ b/include/linux/timekeeping32.h
@@ -9,26 +9,6 @@
 extern void do_gettimeofday(struct timeval *tv);
 unsigned long get_seconds(void);
 
-static inline struct timespec current_kernel_time(void)
-{
-	struct timespec64 ts64;
-
-	ktime_get_coarse_real_ts64(&ts64);
-
-	return timespec64_to_timespec(ts64);
-}
-
-/**
- * Deprecated. Use do_settimeofday64().
- */
-static inline int do_settimeofday(const struct timespec *ts)
-{
-	struct timespec64 ts64;
-
-	ts64 = timespec_to_timespec64(*ts);
-	return do_settimeofday64(&ts64);
-}
-
 static inline void getnstimeofday(struct timespec *ts)
 {
 	struct timespec64 ts64;
@@ -45,14 +25,6 @@ static inline void ktime_get_ts(struct timespec *ts)
 	*ts = timespec64_to_timespec(ts64);
 }
 
-static inline void ktime_get_real_ts(struct timespec *ts)
-{
-	struct timespec64 ts64;
-
-	ktime_get_real_ts64(&ts64);
-	*ts = timespec64_to_timespec(ts64);
-}
-
 static inline void getrawmonotonic(struct timespec *ts)
 {
 	struct timespec64 ts64;
@@ -61,15 +33,6 @@ static inline void getrawmonotonic(struct timespec *ts)
 	*ts = timespec64_to_timespec(ts64);
 }
 
-static inline struct timespec get_monotonic_coarse(void)
-{
-	struct timespec64 ts64;
-
-	ktime_get_coarse_ts64(&ts64);
-
-	return timespec64_to_timespec(ts64);
-}
-
 static inline void getboottime(struct timespec *ts)
 {
 	struct timespec64 ts64;
@@ -78,19 +41,6 @@ static inline void getboottime(struct timespec *ts)
 	*ts = timespec64_to_timespec(ts64);
 }
 
-/*
- * Timespec interfaces utilizing the ktime based ones
- */
-static inline void get_monotonic_boottime(struct timespec *ts)
-{
-	*ts = ktime_to_timespec(ktime_get_boottime());
-}
-
-static inline void timekeeping_clocktai(struct timespec *ts)
-{
-	*ts = ktime_to_timespec(ktime_get_clocktai());
-}
-
 /*
  * Persistent clock related interfaces
  */
diff --git a/kernel/time/time.c b/kernel/time/time.c
index ccdb351277ee..712543011106 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -342,30 +342,6 @@ unsigned int jiffies_to_usecs(const unsigned long j)
 }
 EXPORT_SYMBOL(jiffies_to_usecs);
 
-/**
- * timespec_trunc - Truncate timespec to a granularity
- * @t: Timespec
- * @gran: Granularity in ns.
- *
- * Truncate a timespec to a granularity. Always rounds down. gran must
- * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
- */
-struct timespec timespec_trunc(struct timespec t, unsigned gran)
-{
-	/* Avoid division in the common cases 1 ns and 1 s. */
-	if (gran == 1) {
-		/* nothing */
-	} else if (gran == NSEC_PER_SEC) {
-		t.tv_nsec = 0;
-	} else if (gran > 1 && gran < NSEC_PER_SEC) {
-		t.tv_nsec -= t.tv_nsec % gran;
-	} else {
-		WARN(1, "illegal file time granularity: %u", gran);
-	}
-	return t;
-}
-EXPORT_SYMBOL(timespec_trunc);
-
 /*
  * mktime64 - Converts date to seconds.
  * Converts Gregorian date to seconds since 1970-01-01 00:00:00.
-- 
cgit v1.2.3


From 33e26418193f58d1895f2f968e1953b1caf8deb7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Aug 2018 15:18:20 +0200
Subject: y2038: make do_gettimeofday() and get_seconds() inline

get_seconds() and do_gettimeofday() are only used by a few modules now any
more (waiting for the respective patches to get accepted), and they are
among the last holdouts of code that is not y2038 safe in the core kernel.

Move the implementation into the timekeeping32.h header to clean up
the core kernel and isolate the old interfaces further.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/timekeeping32.h | 15 +++++++++++++--
 kernel/time/time.c            | 15 +++++++++------
 kernel/time/timekeeping.c     | 24 ------------------------
 3 files changed, 22 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h
index a4a4991160fb..a502616f7e1c 100644
--- a/include/linux/timekeeping32.h
+++ b/include/linux/timekeeping32.h
@@ -6,8 +6,19 @@
  * over time so we can remove the file here.
  */
 
-extern void do_gettimeofday(struct timeval *tv);
-unsigned long get_seconds(void);
+static inline void do_gettimeofday(struct timeval *tv)
+{
+	struct timespec64 now;
+
+	ktime_get_real_ts64(&now);
+	tv->tv_sec = now.tv_sec;
+	tv->tv_usec = now.tv_nsec/1000;
+}
+
+static inline unsigned long get_seconds(void)
+{
+	return ktime_get_real_seconds();
+}
 
 static inline void getnstimeofday(struct timespec *ts)
 {
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 712543011106..de332250d6fa 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -144,9 +144,11 @@ SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
 		struct timezone __user *, tz)
 {
 	if (likely(tv != NULL)) {
-		struct timeval ktv;
-		do_gettimeofday(&ktv);
-		if (copy_to_user(tv, &ktv, sizeof(ktv)))
+		struct timespec64 ts;
+
+		ktime_get_real_ts64(&ts);
+		if (put_user(ts.tv_sec, &tv->tv_sec) ||
+		    put_user(ts.tv_nsec / 1000, &tv->tv_usec))
 			return -EFAULT;
 	}
 	if (unlikely(tz != NULL)) {
@@ -227,10 +229,11 @@ COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv,
 		       struct timezone __user *, tz)
 {
 	if (tv) {
-		struct timeval ktv;
+		struct timespec64 ts;
 
-		do_gettimeofday(&ktv);
-		if (compat_put_timeval(&ktv, tv))
+		ktime_get_real_ts64(&ts);
+		if (put_user(ts.tv_sec, &tv->tv_sec) ||
+		    put_user(ts.tv_nsec / 1000, &tv->tv_usec))
 			return -EFAULT;
 	}
 	if (tz) {
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f3b22f456fac..2d110c948805 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1211,22 +1211,6 @@ int get_device_system_crosststamp(int (*get_time_fn)
 }
 EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
 
-/**
- * do_gettimeofday - Returns the time of day in a timeval
- * @tv:		pointer to the timeval to be set
- *
- * NOTE: Users should be converted to using getnstimeofday()
- */
-void do_gettimeofday(struct timeval *tv)
-{
-	struct timespec64 now;
-
-	getnstimeofday64(&now);
-	tv->tv_sec = now.tv_sec;
-	tv->tv_usec = now.tv_nsec/1000;
-}
-EXPORT_SYMBOL(do_gettimeofday);
-
 /**
  * do_settimeofday64 - Sets the time of day.
  * @ts:     pointer to the timespec64 variable containing the new time
@@ -2174,14 +2158,6 @@ void getboottime64(struct timespec64 *ts)
 }
 EXPORT_SYMBOL_GPL(getboottime64);
 
-unsigned long get_seconds(void)
-{
-	struct timekeeper *tk = &tk_core.timekeeper;
-
-	return tk->xtime_sec;
-}
-EXPORT_SYMBOL(get_seconds);
-
 void ktime_get_coarse_real_ts64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-- 
cgit v1.2.3


From 9afc5eee65ca7d717a99d6fe8f4adfe32a40940a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 13 Jul 2018 12:52:28 +0200
Subject: y2038: globally rename compat_time to old_time32

Christoph Hellwig suggested a slightly different path for handling
backwards compatibility with the 32-bit time_t based system calls:

Rather than simply reusing the compat_sys_* entry points on 32-bit
architectures unchanged, we get rid of those entry points and the
compat_time types by renaming them to something that makes more sense
on 32-bit architectures (which don't have a compat mode otherwise),
and then share the entry points under the new name with the 64-bit
architectures that use them for implementing the compatibility.

The following types and interfaces are renamed here, and moved
from linux/compat_time.h to linux/time32.h:

old				new
---				---
compat_time_t			old_time32_t
struct compat_timeval		struct old_timeval32
struct compat_timespec		struct old_timespec32
struct compat_itimerspec	struct old_itimerspec32
ns_to_compat_timeval()		ns_to_old_timeval32()
get_compat_itimerspec64()	get_old_itimerspec32()
put_compat_itimerspec64()	put_old_itimerspec32()
compat_get_timespec64()		get_old_timespec32()
compat_put_timespec64()		put_old_timespec32()

As we already have aliases in place, this patch addresses only the
instances that are relevant to the system call interface in particular,
not those that occur in device drivers and other modules. Those
will get handled separately, while providing the 64-bit version
of the respective interfaces.

I'm not renaming the timex, rusage and itimerval structures, as we are
still debating what the new interface will look like, and whether we
will need a replacement at all.

This also doesn't change the names of the syscall entry points, which can
be done more easily when we actually switch over the 32-bit architectures
to use them, at that point we need to change COMPAT_SYSCALL_DEFINEx to
SYSCALL_DEFINEx with a new name, e.g. with a _time32 suffix.

Suggested-by: Christoph Hellwig <hch@infradead.org>
Link: https://lore.kernel.org/lkml/20180705222110.GA5698@infradead.org/
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/include/asm/compat.h   |  6 +--
 arch/arm64/include/asm/stat.h     |  2 +-
 arch/mips/include/asm/compat.h    |  6 +--
 arch/mips/kernel/binfmt_elfn32.c  | 14 +++---
 arch/mips/kernel/binfmt_elfo32.c  | 14 +++---
 arch/parisc/include/asm/compat.h  |  6 +--
 arch/powerpc/include/asm/compat.h |  6 +--
 arch/powerpc/kernel/asm-offsets.c |  8 ++--
 arch/powerpc/oprofile/backtrace.c |  2 +-
 arch/sparc/include/asm/compat.h   |  6 +--
 fs/aio.c                          |  8 ++--
 fs/compat_binfmt_elf.c            |  2 +-
 fs/select.c                       | 20 ++++----
 fs/timerfd.c                      | 12 ++---
 fs/utimes.c                       | 12 ++---
 include/linux/compat.h            | 96 +++++++++++++++++++--------------------
 include/linux/compat_time.h       | 32 -------------
 include/linux/elfcore-compat.h    |  8 ++--
 include/linux/restart_block.h     |  4 +-
 include/linux/syscalls.h          |  2 +-
 include/linux/time32.h            | 48 +++++++++++++++-----
 ipc/mqueue.c                      |  8 ++--
 ipc/msg.c                         |  6 +--
 ipc/sem.c                         | 10 ++--
 ipc/shm.c                         |  6 +--
 ipc/syscall.c                     |  2 +-
 ipc/util.h                        |  2 +-
 kernel/compat.c                   |  8 ++--
 kernel/futex_compat.c             |  2 +-
 kernel/sched/core.c               |  4 +-
 kernel/signal.c                   |  2 +-
 kernel/time/hrtimer.c             |  8 ++--
 kernel/time/posix-stubs.c         | 18 ++++----
 kernel/time/posix-timers.c        | 30 ++++++------
 kernel/time/time.c                | 58 +++++++++++------------
 net/compat.c                      |  4 +-
 36 files changed, 237 insertions(+), 245 deletions(-)
 delete mode 100644 include/linux/compat_time.h

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 1a037b94eba1..4a89007db14a 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -86,11 +86,11 @@ struct compat_stat {
 	compat_off_t	st_size;
 	compat_off_t	st_blksize;
 	compat_off_t	st_blocks;
-	compat_time_t	st_atime;
+	old_time32_t	st_atime;
 	compat_ulong_t	st_atime_nsec;
-	compat_time_t	st_mtime;
+	old_time32_t	st_mtime;
 	compat_ulong_t	st_mtime_nsec;
-	compat_time_t	st_ctime;
+	old_time32_t	st_ctime;
 	compat_ulong_t	st_ctime_nsec;
 	compat_ulong_t	__unused4[2];
 };
diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h
index eab738019707..397c6ccd04e7 100644
--- a/arch/arm64/include/asm/stat.h
+++ b/arch/arm64/include/asm/stat.h
@@ -20,7 +20,7 @@
 
 #ifdef CONFIG_COMPAT
 
-#include <linux/compat_time.h>
+#include <linux/time.h>
 #include <asm/compat.h>
 
 /*
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 78675f19440f..7dcbd855814e 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -59,11 +59,11 @@ struct compat_stat {
 	s32		st_pad2[2];
 	compat_off_t	st_size;
 	s32		st_pad3;
-	compat_time_t	st_atime;
+	old_time32_t	st_atime;
 	s32		st_atime_nsec;
-	compat_time_t	st_mtime;
+	old_time32_t	st_mtime;
 	s32		st_mtime_nsec;
-	compat_time_t	st_ctime;
+	old_time32_t	st_ctime;
 	s32		st_ctime_nsec;
 	s32		st_blksize;
 	s32		st_blocks;
diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c
index 89b234844534..7a12763d553a 100644
--- a/arch/mips/kernel/binfmt_elfn32.c
+++ b/arch/mips/kernel/binfmt_elfn32.c
@@ -54,10 +54,10 @@ struct elf_prstatus32
 	pid_t	pr_ppid;
 	pid_t	pr_pgrp;
 	pid_t	pr_sid;
-	struct compat_timeval pr_utime; /* User time */
-	struct compat_timeval pr_stime; /* System time */
-	struct compat_timeval pr_cutime;/* Cumulative user time */
-	struct compat_timeval pr_cstime;/* Cumulative system time */
+	struct old_timeval32 pr_utime; /* User time */
+	struct old_timeval32 pr_stime; /* System time */
+	struct old_timeval32 pr_cutime;/* Cumulative user time */
+	struct old_timeval32 pr_cstime;/* Cumulative system time */
 	elf_gregset_t pr_reg;	/* GP registers */
 	int pr_fpvalid;		/* True if math co-processor being used.  */
 };
@@ -81,9 +81,9 @@ struct elf_prpsinfo32
 #define elf_caddr_t	u32
 #define init_elf_binfmt init_elfn32_binfmt
 
-#define jiffies_to_timeval jiffies_to_compat_timeval
+#define jiffies_to_timeval jiffies_to_old_timeval32
 static __inline__ void
-jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
+jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
 {
 	/*
 	 * Convert jiffies to nanoseconds and separate with
@@ -101,6 +101,6 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
 #define TASK_SIZE TASK_SIZE32
 
 #undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
+#define ns_to_timeval ns_to_old_timeval32
 
 #include "../../../fs/binfmt_elf.c"
diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c
index a88c59db3d48..e6db06a1d31a 100644
--- a/arch/mips/kernel/binfmt_elfo32.c
+++ b/arch/mips/kernel/binfmt_elfo32.c
@@ -59,10 +59,10 @@ struct elf_prstatus32
 	pid_t	pr_ppid;
 	pid_t	pr_pgrp;
 	pid_t	pr_sid;
-	struct compat_timeval pr_utime; /* User time */
-	struct compat_timeval pr_stime; /* System time */
-	struct compat_timeval pr_cutime;/* Cumulative user time */
-	struct compat_timeval pr_cstime;/* Cumulative system time */
+	struct old_timeval32 pr_utime; /* User time */
+	struct old_timeval32 pr_stime; /* System time */
+	struct old_timeval32 pr_cutime;/* Cumulative user time */
+	struct old_timeval32 pr_cstime;/* Cumulative system time */
 	elf_gregset_t pr_reg;	/* GP registers */
 	int pr_fpvalid;		/* True if math co-processor being used.  */
 };
@@ -86,9 +86,9 @@ struct elf_prpsinfo32
 #define elf_caddr_t	u32
 #define init_elf_binfmt init_elf32_binfmt
 
-#define jiffies_to_timeval jiffies_to_compat_timeval
+#define jiffies_to_timeval jiffies_to_old_timeval32
 static inline void
-jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
+jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value)
 {
 	/*
 	 * Convert jiffies to nanoseconds and separate with
@@ -104,6 +104,6 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value)
 #define TASK_SIZE TASK_SIZE32
 
 #undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
+#define ns_to_timeval ns_to_old_timeval32
 
 #include "../../../fs/binfmt_elf.c"
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index ab8a54771507..32dadaf2b534 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -48,11 +48,11 @@ struct compat_stat {
 	u16			st_reserved2;	/* old st_gid */
 	compat_dev_t		st_rdev;
 	compat_off_t		st_size;
-	compat_time_t		st_atime;
+	old_time32_t		st_atime;
 	u32			st_atime_nsec;
-	compat_time_t		st_mtime;
+	old_time32_t		st_mtime;
 	u32			st_mtime_nsec;
-	compat_time_t		st_ctime;
+	old_time32_t		st_ctime;
 	u32			st_ctime_nsec;
 	s32			st_blksize;
 	s32			st_blocks;
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 85c8af2bb272..93f79d1a03c3 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -55,11 +55,11 @@ struct compat_stat {
 	compat_off_t	st_size;
 	compat_off_t	st_blksize;
 	compat_off_t	st_blocks;
-	compat_time_t	st_atime;
+	old_time32_t	st_atime;
 	u32		st_atime_nsec;
-	compat_time_t	st_mtime;
+	old_time32_t	st_mtime;
 	u32		st_mtime_nsec;
-	compat_time_t	st_ctime;
+	old_time32_t	st_ctime;
 	u32		st_ctime_nsec;
 	u32		__unused4[2];
 };
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 89cf15566c4e..041a115789a1 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -387,12 +387,12 @@ int main(void)
 	OFFSET(CFG_SYSCALL_MAP64, vdso_data, syscall_map_64);
 	OFFSET(TVAL64_TV_SEC, timeval, tv_sec);
 	OFFSET(TVAL64_TV_USEC, timeval, tv_usec);
-	OFFSET(TVAL32_TV_SEC, compat_timeval, tv_sec);
-	OFFSET(TVAL32_TV_USEC, compat_timeval, tv_usec);
+	OFFSET(TVAL32_TV_SEC, old_timeval32, tv_sec);
+	OFFSET(TVAL32_TV_USEC, old_timeval32, tv_usec);
 	OFFSET(TSPC64_TV_SEC, timespec, tv_sec);
 	OFFSET(TSPC64_TV_NSEC, timespec, tv_nsec);
-	OFFSET(TSPC32_TV_SEC, compat_timespec, tv_sec);
-	OFFSET(TSPC32_TV_NSEC, compat_timespec, tv_nsec);
+	OFFSET(TSPC32_TV_SEC, old_timespec32, tv_sec);
+	OFFSET(TSPC32_TV_NSEC, old_timespec32, tv_nsec);
 #else
 	OFFSET(TVAL32_TV_SEC, timeval, tv_sec);
 	OFFSET(TVAL32_TV_USEC, timeval, tv_usec);
diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c
index ad054dd0d666..5df6290d1ccc 100644
--- a/arch/powerpc/oprofile/backtrace.c
+++ b/arch/powerpc/oprofile/backtrace.c
@@ -7,7 +7,7 @@
  * 2 of the License, or (at your option) any later version.
 **/
 
-#include <linux/compat_time.h>
+#include <linux/time.h>
 #include <linux/oprofile.h>
 #include <linux/sched.h>
 #include <asm/processor.h>
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 4eb51d2dae98..7018cb60beef 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -47,11 +47,11 @@ struct compat_stat {
 	__compat_gid_t	st_gid;
 	compat_dev_t	st_rdev;
 	compat_off_t	st_size;
-	compat_time_t	st_atime;
+	old_time32_t	st_atime;
 	compat_ulong_t	st_atime_nsec;
-	compat_time_t	st_mtime;
+	old_time32_t	st_mtime;
 	compat_ulong_t	st_mtime_nsec;
-	compat_time_t	st_ctime;
+	old_time32_t	st_ctime;
 	compat_ulong_t	st_ctime_nsec;
 	compat_off_t	st_blksize;
 	compat_off_t	st_blocks;
diff --git a/fs/aio.c b/fs/aio.c
index b9350f3360c6..301e6314183b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -2135,12 +2135,12 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
 		       compat_long_t, min_nr,
 		       compat_long_t, nr,
 		       struct io_event __user *, events,
-		       struct compat_timespec __user *, timeout)
+		       struct old_timespec32 __user *, timeout)
 {
 	struct timespec64 t;
 	int ret;
 
-	if (timeout && compat_get_timespec64(&t, timeout))
+	if (timeout && get_old_timespec32(&t, timeout))
 		return -EFAULT;
 
 	ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
@@ -2160,7 +2160,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
 		compat_long_t, min_nr,
 		compat_long_t, nr,
 		struct io_event __user *, events,
-		struct compat_timespec __user *, timeout,
+		struct old_timespec32 __user *, timeout,
 		const struct __compat_aio_sigset __user *, usig)
 {
 	struct __compat_aio_sigset ksig = { NULL, };
@@ -2168,7 +2168,7 @@ COMPAT_SYSCALL_DEFINE6(io_pgetevents,
 	struct timespec64 t;
 	int ret;
 
-	if (timeout && compat_get_timespec64(&t, timeout))
+	if (timeout && get_old_timespec32(&t, timeout))
 		return -EFAULT;
 
 	if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index 504b3c3539dc..15f6e96b3bd9 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -52,7 +52,7 @@
 #define elf_prpsinfo	compat_elf_prpsinfo
 
 #undef ns_to_timeval
-#define ns_to_timeval ns_to_compat_timeval
+#define ns_to_timeval ns_to_old_timeval32
 
 /*
  * To use this file, asm/elf.h must define compat_elf_check_arch.
diff --git a/fs/select.c b/fs/select.c
index 4a6b6e4b21cb..22b3bf89f051 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -1120,7 +1120,7 @@ int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *
 		ts.tv_sec = ts.tv_nsec = 0;
 
 	if (timeval) {
-		struct compat_timeval rtv;
+		struct old_timeval32 rtv;
 
 		rtv.tv_sec = ts.tv_sec;
 		rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
@@ -1128,7 +1128,7 @@ int compat_poll_select_copy_remaining(struct timespec64 *end_time, void __user *
 		if (!copy_to_user(p, &rtv, sizeof(rtv)))
 			return ret;
 	} else {
-		if (!compat_put_timespec64(&ts, p))
+		if (!put_old_timespec32(&ts, p))
 			return ret;
 	}
 	/*
@@ -1257,10 +1257,10 @@ out_nofds:
 
 static int do_compat_select(int n, compat_ulong_t __user *inp,
 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
-	struct compat_timeval __user *tvp)
+	struct old_timeval32 __user *tvp)
 {
 	struct timespec64 end_time, *to = NULL;
-	struct compat_timeval tv;
+	struct old_timeval32 tv;
 	int ret;
 
 	if (tvp) {
@@ -1282,7 +1282,7 @@ static int do_compat_select(int n, compat_ulong_t __user *inp,
 
 COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
 	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
-	struct compat_timeval __user *, tvp)
+	struct old_timeval32 __user *, tvp)
 {
 	return do_compat_select(n, inp, outp, exp, tvp);
 }
@@ -1307,7 +1307,7 @@ COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
 
 static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
-	struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
+	struct old_timespec32 __user *tsp, compat_sigset_t __user *sigmask,
 	compat_size_t sigsetsize)
 {
 	sigset_t ksigmask, sigsaved;
@@ -1315,7 +1315,7 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 	int ret;
 
 	if (tsp) {
-		if (compat_get_timespec64(&ts, tsp))
+		if (get_old_timespec32(&ts, tsp))
 			return -EFAULT;
 
 		to = &end_time;
@@ -1355,7 +1355,7 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 
 COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
 	compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
-	struct compat_timespec __user *, tsp, void __user *, sig)
+	struct old_timespec32 __user *, tsp, void __user *, sig)
 {
 	compat_size_t sigsetsize = 0;
 	compat_uptr_t up = 0;
@@ -1373,7 +1373,7 @@ COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
 }
 
 COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
-	unsigned int,  nfds, struct compat_timespec __user *, tsp,
+	unsigned int,  nfds, struct old_timespec32 __user *, tsp,
 	const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
 {
 	sigset_t ksigmask, sigsaved;
@@ -1381,7 +1381,7 @@ COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
 	int ret;
 
 	if (tsp) {
-		if (compat_get_timespec64(&ts, tsp))
+		if (get_old_timespec32(&ts, tsp))
 			return -EFAULT;
 
 		to = &end_time;
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d69ad801eb80..803ca070d42e 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -561,29 +561,29 @@ SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
-		const struct compat_itimerspec __user *, utmr,
-		struct compat_itimerspec __user *, otmr)
+		const struct old_itimerspec32 __user *, utmr,
+		struct old_itimerspec32 __user *, otmr)
 {
 	struct itimerspec64 new, old;
 	int ret;
 
-	if (get_compat_itimerspec64(&new, utmr))
+	if (get_old_itimerspec32(&new, utmr))
 		return -EFAULT;
 	ret = do_timerfd_settime(ufd, flags, &new, &old);
 	if (ret)
 		return ret;
-	if (otmr && put_compat_itimerspec64(&old, otmr))
+	if (otmr && put_old_itimerspec32(&old, otmr))
 		return -EFAULT;
 	return ret;
 }
 
 COMPAT_SYSCALL_DEFINE2(timerfd_gettime, int, ufd,
-		struct compat_itimerspec __user *, otmr)
+		struct old_itimerspec32 __user *, otmr)
 {
 	struct itimerspec64 kotmr;
 	int ret = do_timerfd_gettime(ufd, &kotmr);
 	if (ret)
 		return ret;
-	return put_compat_itimerspec64(&kotmr, otmr) ? -EFAULT : 0;
+	return put_old_itimerspec32(&kotmr, otmr) ? -EFAULT : 0;
 }
 #endif
diff --git a/fs/utimes.c b/fs/utimes.c
index 69d4b6ba1bfb..79a65c163f40 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -245,13 +245,13 @@ COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
 	return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
 }
 
-COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags)
+COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags)
 {
 	struct timespec64 tv[2];
 
 	if  (t) {
-		if (compat_get_timespec64(&tv[0], &t[0]) ||
-		    compat_get_timespec64(&tv[1], &t[1]))
+		if (get_old_timespec32(&tv[0], &t[0]) ||
+		    get_old_timespec32(&tv[1], &t[1]))
 			return -EFAULT;
 
 		if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
@@ -261,7 +261,7 @@ COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filena
 }
 
 static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
-				struct compat_timeval __user *t)
+				struct old_timeval32 __user *t)
 {
 	struct timespec64 tv[2];
 
@@ -282,12 +282,12 @@ static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
 
 COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd,
 		       const char __user *, filename,
-		       struct compat_timeval __user *, t)
+		       struct old_timeval32 __user *, t)
 {
 	return do_compat_futimesat(dfd, filename, t);
 }
 
-COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t)
+COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct old_timeval32 __user *, t)
 {
 	return do_compat_futimesat(AT_FDCWD, filename, t);
 }
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 1a3c4f37e908..0e058792ecf6 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -7,7 +7,7 @@
  */
 
 #include <linux/types.h>
-#include <linux/compat_time.h>
+#include <linux/time.h>
 
 #include <linux/stat.h>
 #include <linux/param.h>	/* for HZ */
@@ -116,13 +116,13 @@ struct compat_sel_arg_struct;
 struct rusage;
 
 struct compat_utimbuf {
-	compat_time_t		actime;
-	compat_time_t		modtime;
+	old_time32_t		actime;
+	old_time32_t		modtime;
 };
 
 struct compat_itimerval {
-	struct compat_timeval	it_interval;
-	struct compat_timeval	it_value;
+	struct old_timeval32	it_interval;
+	struct old_timeval32	it_value;
 };
 
 struct itimerval;
@@ -146,7 +146,7 @@ struct compat_timex {
 	compat_long_t constant;
 	compat_long_t precision;
 	compat_long_t tolerance;
-	struct compat_timeval time;
+	struct old_timeval32 time;
 	compat_long_t tick;
 	compat_long_t ppsfreq;
 	compat_long_t jitter;
@@ -307,8 +307,8 @@ struct compat_rlimit {
 };
 
 struct compat_rusage {
-	struct compat_timeval ru_utime;
-	struct compat_timeval ru_stime;
+	struct old_timeval32 ru_utime;
+	struct old_timeval32 ru_stime;
 	compat_long_t	ru_maxrss;
 	compat_long_t	ru_ixrss;
 	compat_long_t	ru_idrss;
@@ -457,8 +457,8 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *fr
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 
-static inline int compat_timeval_compare(struct compat_timeval *lhs,
-					struct compat_timeval *rhs)
+static inline int old_timeval32_compare(struct old_timeval32 *lhs,
+					struct old_timeval32 *rhs)
 {
 	if (lhs->tv_sec < rhs->tv_sec)
 		return -1;
@@ -467,8 +467,8 @@ static inline int compat_timeval_compare(struct compat_timeval *lhs,
 	return lhs->tv_usec - rhs->tv_usec;
 }
 
-static inline int compat_timespec_compare(struct compat_timespec *lhs,
-					struct compat_timespec *rhs)
+static inline int old_timespec32_compare(struct old_timespec32 *lhs,
+					struct old_timespec32 *rhs)
 {
 	if (lhs->tv_sec < rhs->tv_sec)
 		return -1;
@@ -552,12 +552,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
 					compat_long_t min_nr,
 					compat_long_t nr,
 					struct io_event __user *events,
-					struct compat_timespec __user *timeout);
+					struct old_timespec32 __user *timeout);
 asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
 					compat_long_t min_nr,
 					compat_long_t nr,
 					struct io_event __user *events,
-					struct compat_timespec __user *timeout,
+					struct old_timespec32 __user *timeout,
 					const struct __compat_aio_sigset __user *usig);
 
 /* fs/cookies.c */
@@ -642,11 +642,11 @@ asmlinkage long compat_sys_sendfile64(int out_fd, int in_fd,
 asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
 				    compat_ulong_t __user *outp,
 				    compat_ulong_t __user *exp,
-				    struct compat_timespec __user *tsp,
+				    struct old_timespec32 __user *tsp,
 				    void __user *sig);
 asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 				 unsigned int nfds,
-				 struct compat_timespec __user *tsp,
+				 struct old_timespec32 __user *tsp,
 				 const compat_sigset_t __user *sigmask,
 				 compat_size_t sigsetsize);
 
@@ -671,15 +671,15 @@ asmlinkage long compat_sys_newfstat(unsigned int fd,
 
 /* fs/timerfd.c */
 asmlinkage long compat_sys_timerfd_gettime(int ufd,
-				   struct compat_itimerspec __user *otmr);
+				   struct old_itimerspec32 __user *otmr);
 asmlinkage long compat_sys_timerfd_settime(int ufd, int flags,
-				   const struct compat_itimerspec __user *utmr,
-				   struct compat_itimerspec __user *otmr);
+				   const struct old_itimerspec32 __user *utmr,
+				   struct old_itimerspec32 __user *otmr);
 
 /* fs/utimes.c */
 asmlinkage long compat_sys_utimensat(unsigned int dfd,
 				     const char __user *filename,
-				     struct compat_timespec __user *t,
+				     struct old_timespec32 __user *t,
 				     int flags);
 
 /* kernel/exit.c */
@@ -691,7 +691,7 @@ asmlinkage long compat_sys_waitid(int, compat_pid_t,
 
 /* kernel/futex.c */
 asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
-		struct compat_timespec __user *utime, u32 __user *uaddr2,
+		struct old_timespec32 __user *utime, u32 __user *uaddr2,
 		u32 val3);
 asmlinkage long
 compat_sys_set_robust_list(struct compat_robust_list_head __user *head,
@@ -701,8 +701,8 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
 			   compat_size_t __user *len_ptr);
 
 /* kernel/hrtimer.c */
-asmlinkage long compat_sys_nanosleep(struct compat_timespec __user *rqtp,
-				     struct compat_timespec __user *rmtp);
+asmlinkage long compat_sys_nanosleep(struct old_timespec32 __user *rqtp,
+				     struct old_timespec32 __user *rmtp);
 
 /* kernel/itimer.c */
 asmlinkage long compat_sys_getitimer(int which,
@@ -722,19 +722,19 @@ asmlinkage long compat_sys_timer_create(clockid_t which_clock,
 			struct compat_sigevent __user *timer_event_spec,
 			timer_t __user *created_timer_id);
 asmlinkage long compat_sys_timer_gettime(timer_t timer_id,
-				 struct compat_itimerspec __user *setting);
+				 struct old_itimerspec32 __user *setting);
 asmlinkage long compat_sys_timer_settime(timer_t timer_id, int flags,
-					 struct compat_itimerspec __user *new,
-					 struct compat_itimerspec __user *old);
+					 struct old_itimerspec32 __user *new,
+					 struct old_itimerspec32 __user *old);
 asmlinkage long compat_sys_clock_settime(clockid_t which_clock,
-					 struct compat_timespec __user *tp);
+					 struct old_timespec32 __user *tp);
 asmlinkage long compat_sys_clock_gettime(clockid_t which_clock,
-					 struct compat_timespec __user *tp);
+					 struct old_timespec32 __user *tp);
 asmlinkage long compat_sys_clock_getres(clockid_t which_clock,
-					struct compat_timespec __user *tp);
+					struct old_timespec32 __user *tp);
 asmlinkage long compat_sys_clock_nanosleep(clockid_t which_clock, int flags,
-					   struct compat_timespec __user *rqtp,
-					   struct compat_timespec __user *rmtp);
+					   struct old_timespec32 __user *rqtp,
+					   struct old_timespec32 __user *rmtp);
 
 /* kernel/ptrace.c */
 asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
@@ -748,7 +748,7 @@ asmlinkage long compat_sys_sched_getaffinity(compat_pid_t pid,
 				     unsigned int len,
 				     compat_ulong_t __user *user_mask_ptr);
 asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid,
-						 struct compat_timespec __user *interval);
+						 struct old_timespec32 __user *interval);
 
 /* kernel/signal.c */
 asmlinkage long compat_sys_sigaltstack(const compat_stack_t __user *uss_ptr,
@@ -768,7 +768,7 @@ asmlinkage long compat_sys_rt_sigpending(compat_sigset_t __user *uset,
 					 compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigtimedwait(compat_sigset_t __user *uthese,
 		struct compat_siginfo __user *uinfo,
-		struct compat_timespec __user *uts, compat_size_t sigsetsize);
+		struct old_timespec32 __user *uts, compat_size_t sigsetsize);
 asmlinkage long compat_sys_rt_sigqueueinfo(compat_pid_t pid, int sig,
 				struct compat_siginfo __user *uinfo);
 /* No generic prototype for rt_sigreturn */
@@ -782,9 +782,9 @@ asmlinkage long compat_sys_setrlimit(unsigned int resource,
 asmlinkage long compat_sys_getrusage(int who, struct compat_rusage __user *ru);
 
 /* kernel/time.c */
-asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv,
+asmlinkage long compat_sys_gettimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
-asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv,
+asmlinkage long compat_sys_settimeofday(struct old_timeval32 __user *tv,
 		struct timezone __user *tz);
 asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 
@@ -798,11 +798,11 @@ asmlinkage long compat_sys_mq_open(const char __user *u_name,
 asmlinkage long compat_sys_mq_timedsend(mqd_t mqdes,
 			const char __user *u_msg_ptr,
 			compat_size_t msg_len, unsigned int msg_prio,
-			const struct compat_timespec __user *u_abs_timeout);
+			const struct old_timespec32 __user *u_abs_timeout);
 asmlinkage ssize_t compat_sys_mq_timedreceive(mqd_t mqdes,
 			char __user *u_msg_ptr,
 			compat_size_t msg_len, unsigned int __user *u_msg_prio,
-			const struct compat_timespec __user *u_abs_timeout);
+			const struct old_timespec32 __user *u_abs_timeout);
 asmlinkage long compat_sys_mq_notify(mqd_t mqdes,
 			const struct compat_sigevent __user *u_notification);
 asmlinkage long compat_sys_mq_getsetattr(mqd_t mqdes,
@@ -819,7 +819,7 @@ asmlinkage long compat_sys_msgsnd(int msqid, compat_uptr_t msgp,
 /* ipc/sem.c */
 asmlinkage long compat_sys_semctl(int semid, int semnum, int cmd, int arg);
 asmlinkage long compat_sys_semtimedop(int semid, struct sembuf __user *tsems,
-		unsigned nsems, const struct compat_timespec __user *timeout);
+		unsigned nsems, const struct old_timespec32 __user *timeout);
 
 /* ipc/shm.c */
 asmlinkage long compat_sys_shmctl(int first, int second, void __user *uptr);
@@ -876,7 +876,7 @@ asmlinkage long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid,
 					struct compat_siginfo __user *uinfo);
 asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				    unsigned vlen, unsigned int flags,
-				    struct compat_timespec __user *timeout);
+				    struct old_timespec32 __user *timeout);
 asmlinkage long compat_sys_wait4(compat_pid_t pid,
 				 compat_uint_t __user *stat_addr, int options,
 				 struct compat_rusage __user *ru);
@@ -928,7 +928,7 @@ asmlinkage long compat_sys_pwritev64v2(unsigned long fd,
 asmlinkage long compat_sys_open(const char __user *filename, int flags,
 				umode_t mode);
 asmlinkage long compat_sys_utimes(const char __user *filename,
-				  struct compat_timeval __user *t);
+				  struct old_timeval32 __user *t);
 
 /* __ARCH_WANT_SYSCALL_NO_FLAGS */
 asmlinkage long compat_sys_signalfd(int ufd,
@@ -942,15 +942,15 @@ asmlinkage long compat_sys_newlstat(const char __user *filename,
 				    struct compat_stat __user *statbuf);
 
 /* __ARCH_WANT_SYSCALL_DEPRECATED */
-asmlinkage long compat_sys_time(compat_time_t __user *tloc);
+asmlinkage long compat_sys_time(old_time32_t __user *tloc);
 asmlinkage long compat_sys_utime(const char __user *filename,
 				 struct compat_utimbuf __user *t);
 asmlinkage long compat_sys_futimesat(unsigned int dfd,
 				     const char __user *filename,
-				     struct compat_timeval __user *t);
+				     struct old_timeval32 __user *t);
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
-		struct compat_timeval __user *tvp);
+		struct old_timeval32 __user *tvp);
 asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u32);
 asmlinkage long compat_sys_recv(int fd, void __user *buf, compat_size_t len,
 				unsigned flags);
@@ -983,7 +983,7 @@ asmlinkage long compat_sys_sigaction(int sig,
 #endif
 
 /* obsolete: kernel/time/time.c */
-asmlinkage long compat_sys_stime(compat_time_t __user *tptr);
+asmlinkage long compat_sys_stime(old_time32_t __user *tptr);
 
 /* obsolete: net/socket.c */
 asmlinkage long compat_sys_socketcall(int call, u32 __user *args);
@@ -1002,15 +1002,15 @@ static inline bool in_compat_syscall(void) { return is_compat_task(); }
 #endif
 
 /**
- * ns_to_compat_timeval - Compat version of ns_to_timeval
+ * ns_to_old_timeval32 - Compat version of ns_to_timeval
  * @nsec:	the nanoseconds value to be converted
  *
- * Returns the compat_timeval representation of the nsec parameter.
+ * Returns the old_timeval32 representation of the nsec parameter.
  */
-static inline struct compat_timeval ns_to_compat_timeval(s64 nsec)
+static inline struct old_timeval32 ns_to_old_timeval32(s64 nsec)
 {
 	struct timeval tv;
-	struct compat_timeval ctv;
+	struct old_timeval32 ctv;
 
 	tv = ns_to_timeval(nsec);
 	ctv.tv_sec = tv.tv_sec;
diff --git a/include/linux/compat_time.h b/include/linux/compat_time.h
deleted file mode 100644
index e70bfd1d2c3f..000000000000
--- a/include/linux/compat_time.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_COMPAT_TIME_H
-#define _LINUX_COMPAT_TIME_H
-
-#include <linux/types.h>
-#include <linux/time64.h>
-
-typedef s32		compat_time_t;
-
-struct compat_timespec {
-	compat_time_t	tv_sec;
-	s32		tv_nsec;
-};
-
-struct compat_timeval {
-	compat_time_t	tv_sec;
-	s32		tv_usec;
-};
-
-struct compat_itimerspec {
-	struct compat_timespec it_interval;
-	struct compat_timespec it_value;
-};
-
-extern int compat_get_timespec64(struct timespec64 *, const void __user *);
-extern int compat_put_timespec64(const struct timespec64 *, void __user *);
-extern int get_compat_itimerspec64(struct itimerspec64 *its,
-			const struct compat_itimerspec __user *uits);
-extern int put_compat_itimerspec64(const struct itimerspec64 *its,
-			struct compat_itimerspec __user *uits);
-
-#endif /* _LINUX_COMPAT_TIME_H */
diff --git a/include/linux/elfcore-compat.h b/include/linux/elfcore-compat.h
index b5f2efdd05e0..7a37f4ce9fd2 100644
--- a/include/linux/elfcore-compat.h
+++ b/include/linux/elfcore-compat.h
@@ -27,10 +27,10 @@ struct compat_elf_prstatus
 	compat_pid_t			pr_ppid;
 	compat_pid_t			pr_pgrp;
 	compat_pid_t			pr_sid;
-	struct compat_timeval		pr_utime;
-	struct compat_timeval		pr_stime;
-	struct compat_timeval		pr_cutime;
-	struct compat_timeval		pr_cstime;
+	struct old_timeval32		pr_utime;
+	struct old_timeval32		pr_stime;
+	struct old_timeval32		pr_cutime;
+	struct old_timeval32		pr_cstime;
 	compat_elf_gregset_t		pr_reg;
 #ifdef CONFIG_BINFMT_ELF_FDPIC
 	compat_ulong_t			pr_exec_fdpic_loadmap;
diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h
index 5d83d0c1d06c..bba2920e9c05 100644
--- a/include/linux/restart_block.h
+++ b/include/linux/restart_block.h
@@ -10,7 +10,7 @@
 #include <linux/time64.h>
 
 struct timespec;
-struct compat_timespec;
+struct old_timespec32;
 struct pollfd;
 
 enum timespec_type {
@@ -40,7 +40,7 @@ struct restart_block {
 			enum timespec_type type;
 			union {
 				struct __kernel_timespec __user *rmtp;
-				struct compat_timespec __user *compat_rmtp;
+				struct old_timespec32 __user *compat_rmtp;
 			};
 			u64 expires;
 		} nanosleep;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 2ff814c92f7f..b3e27e5ee322 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -60,7 +60,7 @@ struct tms;
 struct utimbuf;
 struct mq_attr;
 struct compat_stat;
-struct compat_timeval;
+struct old_timeval32;
 struct robust_list_head;
 struct getcpu_cache;
 struct old_linux_dirent;
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 92c0ca092d93..0e0d7304d1a8 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -13,6 +13,31 @@
 
 #define TIME_T_MAX	(time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1)
 
+typedef s32		old_time32_t;
+
+struct old_timespec32 {
+	old_time32_t	tv_sec;
+	s32		tv_nsec;
+};
+
+struct old_timeval32 {
+	old_time32_t	tv_sec;
+	s32		tv_usec;
+};
+
+struct old_itimerspec32 {
+	struct old_timespec32 it_interval;
+	struct old_timespec32 it_value;
+};
+
+extern int get_old_timespec32(struct timespec64 *, const void __user *);
+extern int put_old_timespec32(const struct timespec64 *, void __user *);
+extern int get_old_itimerspec32(struct itimerspec64 *its,
+			const struct old_itimerspec32 __user *uits);
+extern int put_old_itimerspec32(const struct itimerspec64 *its,
+			struct old_itimerspec32 __user *uits);
+
+
 #if __BITS_PER_LONG == 64
 
 /* timespec64 is defined as timespec here */
@@ -183,18 +208,17 @@ extern struct timeval ns_to_timeval(const s64 nsec);
 extern struct __kernel_old_timeval ns_to_kernel_old_timeval(s64 nsec);
 
 /*
- * New aliases for compat time functions. These will be used to replace
- * the compat code so it can be shared between 32-bit and 64-bit builds
- * both of which provide compatibility with old 32-bit tasks.
+ * Old names for the 32-bit time_t interfaces, these will be removed
+ * when everything uses the new names.
  */
-#define old_time32_t		compat_time_t
-#define old_timeval32		compat_timeval
-#define old_timespec32		compat_timespec
-#define old_itimerspec32	compat_itimerspec
-#define ns_to_old_timeval32	ns_to_compat_timeval
-#define get_old_itimerspec32	get_compat_itimerspec64
-#define put_old_itimerspec32	put_compat_itimerspec64
-#define get_old_timespec32	compat_get_timespec64
-#define put_old_timespec32	compat_put_timespec64
+#define compat_time_t		old_time32_t
+#define compat_timeval		old_timeval32
+#define compat_timespec		old_timespec32
+#define compat_itimerspec	old_itimerspec32
+#define ns_to_compat_timeval	ns_to_old_timeval32
+#define get_compat_itimerspec64	get_old_itimerspec32
+#define put_compat_itimerspec64	put_old_itimerspec32
+#define compat_get_timespec64	get_old_timespec32
+#define compat_put_timespec64	put_old_timespec32
 
 #endif
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c0d58f390c3b..db7833370351 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -1461,10 +1461,10 @@ COMPAT_SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes,
 #endif
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
-static int compat_prepare_timeout(const struct compat_timespec __user *p,
+static int compat_prepare_timeout(const struct old_timespec32 __user *p,
 				   struct timespec64 *ts)
 {
-	if (compat_get_timespec64(ts, p))
+	if (get_old_timespec32(ts, p))
 		return -EFAULT;
 	if (!timespec64_valid(ts))
 		return -EINVAL;
@@ -1474,7 +1474,7 @@ static int compat_prepare_timeout(const struct compat_timespec __user *p,
 COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes,
 		       const char __user *, u_msg_ptr,
 		       compat_size_t, msg_len, unsigned int, msg_prio,
-		       const struct compat_timespec __user *, u_abs_timeout)
+		       const struct old_timespec32 __user *, u_abs_timeout)
 {
 	struct timespec64 ts, *p = NULL;
 	if (u_abs_timeout) {
@@ -1489,7 +1489,7 @@ COMPAT_SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes,
 COMPAT_SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes,
 		       char __user *, u_msg_ptr,
 		       compat_size_t, msg_len, unsigned int __user *, u_msg_prio,
-		       const struct compat_timespec __user *, u_abs_timeout)
+		       const struct old_timespec32 __user *, u_abs_timeout)
 {
 	struct timespec64 ts, *p = NULL;
 	if (u_abs_timeout) {
diff --git a/ipc/msg.c b/ipc/msg.c
index 883642cf2b27..0833c6405915 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -622,9 +622,9 @@ struct compat_msqid_ds {
 	struct compat_ipc_perm msg_perm;
 	compat_uptr_t msg_first;
 	compat_uptr_t msg_last;
-	compat_time_t msg_stime;
-	compat_time_t msg_rtime;
-	compat_time_t msg_ctime;
+	old_time32_t msg_stime;
+	old_time32_t msg_rtime;
+	old_time32_t msg_ctime;
 	compat_ulong_t msg_lcbytes;
 	compat_ulong_t msg_lqbytes;
 	unsigned short msg_cbytes;
diff --git a/ipc/sem.c b/ipc/sem.c
index 26f8e37fcdcb..745dc6187e84 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1698,8 +1698,8 @@ SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg)
 
 struct compat_semid_ds {
 	struct compat_ipc_perm sem_perm;
-	compat_time_t sem_otime;
-	compat_time_t sem_ctime;
+	old_time32_t sem_otime;
+	old_time32_t sem_ctime;
 	compat_uptr_t sem_base;
 	compat_uptr_t sem_pending;
 	compat_uptr_t sem_pending_last;
@@ -2214,11 +2214,11 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 			    unsigned int nsops,
-			    const struct compat_timespec __user *timeout)
+			    const struct old_timespec32 __user *timeout)
 {
 	if (timeout) {
 		struct timespec64 ts;
-		if (compat_get_timespec64(&ts, timeout))
+		if (get_old_timespec32(&ts, timeout))
 			return -EFAULT;
 		return do_semtimedop(semid, tsems, nsops, &ts);
 	}
@@ -2227,7 +2227,7 @@ long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 
 COMPAT_SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsems,
 		       unsigned int, nsops,
-		       const struct compat_timespec __user *, timeout)
+		       const struct old_timespec32 __user *, timeout)
 {
 	return compat_ksys_semtimedop(semid, tsems, nsops, timeout);
 }
diff --git a/ipc/shm.c b/ipc/shm.c
index b0eb3757ab89..2657692199eb 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1201,9 +1201,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
 struct compat_shmid_ds {
 	struct compat_ipc_perm shm_perm;
 	int shm_segsz;
-	compat_time_t shm_atime;
-	compat_time_t shm_dtime;
-	compat_time_t shm_ctime;
+	old_time32_t shm_atime;
+	old_time32_t shm_dtime;
+	old_time32_t shm_ctime;
 	compat_ipc_pid_t shm_cpid;
 	compat_ipc_pid_t shm_lpid;
 	unsigned short shm_nattch;
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 65d405f1ba0c..1ac06e3983c0 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -35,7 +35,7 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
 			        (const struct __kernel_timespec __user *)fifth);
 		else if (IS_ENABLED(CONFIG_COMPAT_32BIT_TIME))
 			return compat_ksys_semtimedop(first, ptr, second,
-			        (const struct compat_timespec __user *)fifth);
+			        (const struct old_timespec32 __user *)fifth);
 		else
 			return -ENOSYS;
 
diff --git a/ipc/util.h b/ipc/util.h
index 0a159f69b3bb..1ee81bce25e9 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -266,7 +266,7 @@ long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 /* for CONFIG_ARCH_WANT_OLD_COMPAT_IPC */
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
 			    unsigned int nsops,
-			    const struct compat_timespec __user *timeout);
+			    const struct old_timespec32 __user *timeout);
 #ifdef CONFIG_COMPAT
 long compat_ksys_semctl(int semid, int semnum, int cmd, int arg);
 long compat_ksys_msgctl(int msqid, int cmd, void __user *uptr);
diff --git a/kernel/compat.c b/kernel/compat.c
index 8e40efc2928a..089d00d0da9c 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -93,28 +93,28 @@ int compat_put_timex(struct compat_timex __user *utp, const struct timex *txc)
 	return 0;
 }
 
-static int __compat_get_timeval(struct timeval *tv, const struct compat_timeval __user *ctv)
+static int __compat_get_timeval(struct timeval *tv, const struct old_timeval32 __user *ctv)
 {
 	return (!access_ok(VERIFY_READ, ctv, sizeof(*ctv)) ||
 			__get_user(tv->tv_sec, &ctv->tv_sec) ||
 			__get_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
 }
 
-static int __compat_put_timeval(const struct timeval *tv, struct compat_timeval __user *ctv)
+static int __compat_put_timeval(const struct timeval *tv, struct old_timeval32 __user *ctv)
 {
 	return (!access_ok(VERIFY_WRITE, ctv, sizeof(*ctv)) ||
 			__put_user(tv->tv_sec, &ctv->tv_sec) ||
 			__put_user(tv->tv_usec, &ctv->tv_usec)) ? -EFAULT : 0;
 }
 
-static int __compat_get_timespec(struct timespec *ts, const struct compat_timespec __user *cts)
+static int __compat_get_timespec(struct timespec *ts, const struct old_timespec32 __user *cts)
 {
 	return (!access_ok(VERIFY_READ, cts, sizeof(*cts)) ||
 			__get_user(ts->tv_sec, &cts->tv_sec) ||
 			__get_user(ts->tv_nsec, &cts->tv_nsec)) ? -EFAULT : 0;
 }
 
-static int __compat_put_timespec(const struct timespec *ts, struct compat_timespec __user *cts)
+static int __compat_put_timespec(const struct timespec *ts, struct old_timespec32 __user *cts)
 {
 	return (!access_ok(VERIFY_WRITE, cts, sizeof(*cts)) ||
 			__put_user(ts->tv_sec, &cts->tv_sec) ||
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 83f830acbb5f..410a77a8f6e2 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -173,7 +173,7 @@ err_unlock:
 }
 
 COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
-		struct compat_timespec __user *, utime, u32 __user *, uaddr2,
+		struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
 		u32, val3)
 {
 	struct timespec ts;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 625bc9897f62..8287b75ed961 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5257,13 +5257,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
 		       compat_pid_t, pid,
-		       struct compat_timespec __user *, interval)
+		       struct old_timespec32 __user *, interval)
 {
 	struct timespec64 t;
 	int retval = sched_rr_get_interval(pid, &t);
 
 	if (retval == 0)
-		retval = compat_put_timespec64(&t, interval);
+		retval = put_old_timespec32(&t, interval);
 	return retval;
 }
 #endif
diff --git a/kernel/signal.c b/kernel/signal.c
index 5843c541fda9..a4db724e14c1 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3173,7 +3173,7 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
 		struct compat_siginfo __user *, uinfo,
-		struct compat_timespec __user *, uts, compat_size_t, sigsetsize)
+		struct old_timespec32 __user *, uts, compat_size_t, sigsetsize)
 {
 	sigset_t s;
 	struct timespec t;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e1a549c9e399..9cdd74bd2d27 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1660,7 +1660,7 @@ int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
 	switch(restart->nanosleep.type) {
 #ifdef CONFIG_COMPAT_32BIT_TIME
 	case TT_COMPAT:
-		if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp))
+		if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
 			return -EFAULT;
 		break;
 #endif
@@ -1780,12 +1780,12 @@ SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
-COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
-		       struct compat_timespec __user *, rmtp)
+COMPAT_SYSCALL_DEFINE2(nanosleep, struct old_timespec32 __user *, rqtp,
+		       struct old_timespec32 __user *, rmtp)
 {
 	struct timespec64 tu;
 
-	if (compat_get_timespec64(&tu, rqtp))
+	if (get_old_timespec32(&tu, rqtp))
 		return -EFAULT;
 
 	if (!timespec64_valid(&tu))
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 2c6847d5d69b..989ccf028bde 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -162,20 +162,20 @@ COMPAT_SYS_NI(setitimer);
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	struct timespec64 new_tp;
 
 	if (which_clock != CLOCK_REALTIME)
 		return -EINVAL;
-	if (compat_get_timespec64(&new_tp, tp))
+	if (get_old_timespec32(&new_tp, tp))
 		return -EFAULT;
 
 	return do_sys_settimeofday64(&new_tp, NULL);
 }
 
 COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	int ret;
 	struct timespec64 kernel_tp;
@@ -184,13 +184,13 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 	if (ret)
 		return ret;
 
-	if (compat_put_timespec64(&kernel_tp, tp))
+	if (put_old_timespec32(&kernel_tp, tp))
 		return -EFAULT;
 	return 0;
 }
 
 COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	struct timespec64 rtn_tp = {
 		.tv_sec = 0,
@@ -201,7 +201,7 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 	case CLOCK_REALTIME:
 	case CLOCK_MONOTONIC:
 	case CLOCK_BOOTTIME:
-		if (compat_put_timespec64(&rtn_tp, tp))
+		if (put_old_timespec32(&rtn_tp, tp))
 			return -EFAULT;
 		return 0;
 	default:
@@ -210,8 +210,8 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 }
 
 COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
-		       struct compat_timespec __user *, rqtp,
-		       struct compat_timespec __user *, rmtp)
+		       struct old_timespec32 __user *, rqtp,
+		       struct old_timespec32 __user *, rmtp)
 {
 	struct timespec64 t;
 
@@ -224,7 +224,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
 		return -EINVAL;
 	}
 
-	if (compat_get_timespec64(&t, rqtp))
+	if (get_old_timespec32(&t, rqtp))
 		return -EFAULT;
 	if (!timespec64_valid(&t))
 		return -EINVAL;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 4b9127e95430..3e71921668ba 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -755,13 +755,13 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
-		       struct compat_itimerspec __user *, setting)
+		       struct old_itimerspec32 __user *, setting)
 {
 	struct itimerspec64 cur_setting;
 
 	int ret = do_timer_gettime(timer_id, &cur_setting);
 	if (!ret) {
-		if (put_compat_itimerspec64(&cur_setting, setting))
+		if (put_old_itimerspec32(&cur_setting, setting))
 			ret = -EFAULT;
 	}
 	return ret;
@@ -928,8 +928,8 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 
 #ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
-		       struct compat_itimerspec __user *, new,
-		       struct compat_itimerspec __user *, old)
+		       struct old_itimerspec32 __user *, new,
+		       struct old_itimerspec32 __user *, old)
 {
 	struct itimerspec64 new_spec, old_spec;
 	struct itimerspec64 *rtn = old ? &old_spec : NULL;
@@ -937,12 +937,12 @@ COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
 
 	if (!new)
 		return -EINVAL;
-	if (get_compat_itimerspec64(&new_spec, new))
+	if (get_old_itimerspec32(&new_spec, new))
 		return -EFAULT;
 
 	error = do_timer_settime(timer_id, flags, &new_spec, rtn);
 	if (!error && old) {
-		if (put_compat_itimerspec64(&old_spec, old))
+		if (put_old_itimerspec32(&old_spec, old))
 			error = -EFAULT;
 	}
 	return error;
@@ -1115,7 +1115,7 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1123,14 +1123,14 @@ COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock,
 	if (!kc || !kc->clock_set)
 		return -EINVAL;
 
-	if (compat_get_timespec64(&ts, tp))
+	if (get_old_timespec32(&ts, tp))
 		return -EFAULT;
 
 	return kc->clock_set(which_clock, &ts);
 }
 
 COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1141,7 +1141,7 @@ COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
 
 	err = kc->clock_get(which_clock, &ts);
 
-	if (!err && compat_put_timespec64(&ts, tp))
+	if (!err && put_old_timespec32(&ts, tp))
 		err = -EFAULT;
 
 	return err;
@@ -1180,7 +1180,7 @@ COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
-		       struct compat_timespec __user *, tp)
+		       struct old_timespec32 __user *, tp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 ts;
@@ -1190,7 +1190,7 @@ COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
 		return -EINVAL;
 
 	err = kc->clock_getres(which_clock, &ts);
-	if (!err && tp && compat_put_timespec64(&ts, tp))
+	if (!err && tp && put_old_timespec32(&ts, tp))
 		return -EFAULT;
 
 	return err;
@@ -1237,8 +1237,8 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
 #ifdef CONFIG_COMPAT_32BIT_TIME
 
 COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
-		       struct compat_timespec __user *, rqtp,
-		       struct compat_timespec __user *, rmtp)
+		       struct old_timespec32 __user *, rqtp,
+		       struct old_timespec32 __user *, rmtp)
 {
 	const struct k_clock *kc = clockid_to_kclock(which_clock);
 	struct timespec64 t;
@@ -1248,7 +1248,7 @@ COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
 	if (!kc->nsleep)
 		return -EOPNOTSUPP;
 
-	if (compat_get_timespec64(&t, rqtp))
+	if (get_old_timespec32(&t, rqtp))
 		return -EFAULT;
 
 	if (!timespec64_valid(&t))
diff --git a/kernel/time/time.c b/kernel/time/time.c
index de332250d6fa..f1983f468fe3 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -104,12 +104,12 @@ SYSCALL_DEFINE1(stime, time_t __user *, tptr)
 #ifdef CONFIG_COMPAT
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
-/* compat_time_t is a 32 bit "long" and needs to get converted. */
-COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc)
+/* old_time32_t is a 32 bit "long" and needs to get converted. */
+COMPAT_SYSCALL_DEFINE1(time, old_time32_t __user *, tloc)
 {
-	compat_time_t i;
+	old_time32_t i;
 
-	i = (compat_time_t)ktime_get_real_seconds();
+	i = (old_time32_t)ktime_get_real_seconds();
 
 	if (tloc) {
 		if (put_user(i,tloc))
@@ -119,7 +119,7 @@ COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc)
 	return i;
 }
 
-COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr)
+COMPAT_SYSCALL_DEFINE1(stime, old_time32_t __user *, tptr)
 {
 	struct timespec64 tv;
 	int err;
@@ -225,7 +225,7 @@ SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv,
 }
 
 #ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv,
+COMPAT_SYSCALL_DEFINE2(gettimeofday, struct old_timeval32 __user *, tv,
 		       struct timezone __user *, tz)
 {
 	if (tv) {
@@ -244,7 +244,7 @@ COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv,
 	return 0;
 }
 
-COMPAT_SYSCALL_DEFINE2(settimeofday, struct compat_timeval __user *, tv,
+COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
 		       struct timezone __user *, tz)
 {
 	struct timespec64 new_ts;
@@ -863,10 +863,10 @@ int put_timespec64(const struct timespec64 *ts,
 }
 EXPORT_SYMBOL_GPL(put_timespec64);
 
-int __compat_get_timespec64(struct timespec64 *ts64,
-				   const struct compat_timespec __user *cts)
+int __get_old_timespec32(struct timespec64 *ts64,
+				   const struct old_timespec32 __user *cts)
 {
-	struct compat_timespec ts;
+	struct old_timespec32 ts;
 	int ret;
 
 	ret = copy_from_user(&ts, cts, sizeof(ts));
@@ -879,33 +879,33 @@ int __compat_get_timespec64(struct timespec64 *ts64,
 	return 0;
 }
 
-int __compat_put_timespec64(const struct timespec64 *ts64,
-				   struct compat_timespec __user *cts)
+int __put_old_timespec32(const struct timespec64 *ts64,
+				   struct old_timespec32 __user *cts)
 {
-	struct compat_timespec ts = {
+	struct old_timespec32 ts = {
 		.tv_sec = ts64->tv_sec,
 		.tv_nsec = ts64->tv_nsec
 	};
 	return copy_to_user(cts, &ts, sizeof(ts)) ? -EFAULT : 0;
 }
 
-int compat_get_timespec64(struct timespec64 *ts, const void __user *uts)
+int get_old_timespec32(struct timespec64 *ts, const void __user *uts)
 {
 	if (COMPAT_USE_64BIT_TIME)
 		return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
 	else
-		return __compat_get_timespec64(ts, uts);
+		return __get_old_timespec32(ts, uts);
 }
-EXPORT_SYMBOL_GPL(compat_get_timespec64);
+EXPORT_SYMBOL_GPL(get_old_timespec32);
 
-int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
+int put_old_timespec32(const struct timespec64 *ts, void __user *uts)
 {
 	if (COMPAT_USE_64BIT_TIME)
 		return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
 	else
-		return __compat_put_timespec64(ts, uts);
+		return __put_old_timespec32(ts, uts);
 }
-EXPORT_SYMBOL_GPL(compat_put_timespec64);
+EXPORT_SYMBOL_GPL(put_old_timespec32);
 
 int get_itimerspec64(struct itimerspec64 *it,
 			const struct __kernel_itimerspec __user *uit)
@@ -937,23 +937,23 @@ int put_itimerspec64(const struct itimerspec64 *it,
 }
 EXPORT_SYMBOL_GPL(put_itimerspec64);
 
-int get_compat_itimerspec64(struct itimerspec64 *its,
-			const struct compat_itimerspec __user *uits)
+int get_old_itimerspec32(struct itimerspec64 *its,
+			const struct old_itimerspec32 __user *uits)
 {
 
-	if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
-	    __compat_get_timespec64(&its->it_value, &uits->it_value))
+	if (__get_old_timespec32(&its->it_interval, &uits->it_interval) ||
+	    __get_old_timespec32(&its->it_value, &uits->it_value))
 		return -EFAULT;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
+EXPORT_SYMBOL_GPL(get_old_itimerspec32);
 
-int put_compat_itimerspec64(const struct itimerspec64 *its,
-			struct compat_itimerspec __user *uits)
+int put_old_itimerspec32(const struct itimerspec64 *its,
+			struct old_itimerspec32 __user *uits)
 {
-	if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
-	    __compat_put_timespec64(&its->it_value, &uits->it_value))
+	if (__put_old_timespec32(&its->it_interval, &uits->it_interval) ||
+	    __put_old_timespec32(&its->it_value, &uits->it_value))
 		return -EFAULT;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
+EXPORT_SYMBOL_GPL(put_old_itimerspec32);
diff --git a/net/compat.c b/net/compat.c
index 3b2105f6549d..2ef160876bc1 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -812,7 +812,7 @@ COMPAT_SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, buf, compat_size_t, len
 
 static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				 unsigned int vlen, unsigned int flags,
-				 struct compat_timespec __user *timeout)
+				 struct old_timespec32 __user *timeout)
 {
 	int datagrams;
 	struct timespec ktspec;
@@ -834,7 +834,7 @@ static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 
 COMPAT_SYSCALL_DEFINE5(recvmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
 		       unsigned int, vlen, unsigned int, flags,
-		       struct compat_timespec __user *, timeout)
+		       struct old_timespec32 __user *, timeout)
 {
 	return __compat_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
 }
-- 
cgit v1.2.3


From eeaceb8b7d1fb64b6030249ca0dd1d902ef3069e Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Thu, 16 Aug 2018 20:54:49 +0800
Subject: spi: Introduce one new field to set word delay

For some SPI controllers, after each word size (specified by bits_per_word)
transimission, the hardware need some delay to make sure the slave has enough
time to receive the whole data.

So introducing one new 'word_delay' field of struct spi_tansfer for slave
devices to set this inter word delay time.

Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index a64235e05321..d698f9db3484 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -711,6 +711,8 @@ extern void spi_res_release(struct spi_controller *ctlr,
  * @delay_usecs: microseconds to delay after this transfer before
  *	(optionally) changing the chipselect status, then starting
  *	the next transfer or completing this @spi_message.
+ * @word_delay: clock cycles to inter word delay after each word size
+ *	(set by bits_per_word) transmission.
  * @transfer_list: transfers are sequenced through @spi_message.transfers
  * @tx_sg: Scatterlist for transmit, currently not for client use
  * @rx_sg: Scatterlist for receive, currently not for client use
@@ -793,6 +795,7 @@ struct spi_transfer {
 	u8		bits_per_word;
 	u16		delay_usecs;
 	u32		speed_hz;
+	u16		word_delay;
 
 	struct list_head transfer_list;
 };
-- 
cgit v1.2.3


From fb3739759474d150a9927b920a80ea2afb4c2a51 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 Apr 2018 22:01:26 +0200
Subject: asm-generic: Move common compat types to asm-generic/compat.h

While converting compat system call handlers to work on 32-bit
architectures, I found a number of types used in those handlers
that are identical between all architectures.

Let's move all the identical ones into asm-generic/compat.h to avoid
having to add even more identical definitions of those types.

For unknown reasons, mips defines __compat_gid32_t, __compat_uid32_t
and compat_caddr_t as signed, while all others have them unsigned.
This seems to be a mistake, but I'm leaving it alone here. The other
types all differ by size or alignment on at least on architecture.

compat_aio_context_t is currently defined in linux/compat.h but
also needed for compat_sys_io_getevents(), so let's move it into
the same place.

While we still have not decided whether the 32-bit time handling
will always use the compat syscalls, or in which form, I think this
is a useful cleanup that we can merge regardless.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm64/include/asm/compat.h   | 20 ++------------------
 arch/mips/include/asm/compat.h    | 22 ++--------------------
 arch/parisc/include/asm/compat.h  | 18 ++----------------
 arch/powerpc/include/asm/compat.h | 18 ++----------------
 arch/s390/include/asm/compat.h    | 18 ++----------------
 arch/sparc/include/asm/compat.h   | 19 ++-----------------
 arch/x86/include/asm/compat.h     | 19 ++-----------------
 include/asm-generic/compat.h      | 24 +++++++++++++++++++++++-
 include/linux/compat.h            |  2 --
 9 files changed, 37 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index 4a89007db14a..ee689d8a026d 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -25,6 +25,8 @@
 #include <linux/sched.h>
 #include <linux/sched/task_stack.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #ifdef __AARCH64EB__
 #define COMPAT_UTS_MACHINE	"armv8b\0\0"
@@ -32,10 +34,6 @@
 #define COMPAT_UTS_MACHINE	"armv8l\0\0"
 #endif
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
 typedef u16		__compat_uid16_t;
@@ -43,27 +41,13 @@ typedef u16		__compat_gid16_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u16		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u32		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef s32		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_key_t;
-typedef s32		compat_timer_t;
-
-typedef s16		compat_short_t;
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u16		compat_ushort_t;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
 
 struct compat_stat {
 #ifdef __AARCH64EB__
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index 7dcbd855814e..c99166eadbde 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -9,43 +9,25 @@
 #include <asm/page.h>
 #include <asm/ptrace.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"mips\0\0\0"
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_suseconds_t;
-
-typedef s32		compat_pid_t;
 typedef s32		__compat_uid_t;
 typedef s32		__compat_gid_t;
 typedef __compat_uid_t	__compat_uid32_t;
 typedef __compat_gid_t	__compat_gid32_t;
 typedef u32		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u32		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef u32		compat_nlink_t;
 typedef s32		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef s32		compat_caddr_t;
 typedef struct {
 	s32	val[2];
 } compat_fsid_t;
-typedef s32		compat_timer_t;
-typedef s32		compat_key_t;
-
-typedef s16		compat_short_t;
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u16		compat_ushort_t;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
 
 struct compat_stat {
 	compat_dev_t	st_dev;
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 32dadaf2b534..e03e3c849f40 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -8,36 +8,22 @@
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ 		100
 #define COMPAT_UTS_MACHINE	"parisc\0\0"
 
-typedef u32	compat_size_t;
-typedef s32	compat_ssize_t;
-typedef s32	compat_clock_t;
-typedef s32	compat_pid_t;
 typedef u32	__compat_uid_t;
 typedef u32	__compat_gid_t;
 typedef u32	__compat_uid32_t;
 typedef u32	__compat_gid32_t;
 typedef u16	compat_mode_t;
-typedef u32	compat_ino_t;
 typedef u32	compat_dev_t;
-typedef s32	compat_off_t;
-typedef s64	compat_loff_t;
 typedef u16	compat_nlink_t;
 typedef u16	compat_ipc_pid_t;
-typedef s32	compat_daddr_t;
 typedef u32	compat_caddr_t;
-typedef s32	compat_key_t;
-typedef s32	compat_timer_t;
-
-typedef s32	compat_int_t;
-typedef s32	compat_long_t;
 typedef s64	compat_s64;
-typedef u32	compat_uint_t;
-typedef u32	compat_ulong_t;
 typedef u64	compat_u64;
-typedef u32	compat_uptr_t;
 
 struct compat_stat {
 	compat_dev_t		st_dev;	/* dev_t is 32 bits on parisc */
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 93f79d1a03c3..74d0db511099 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -8,6 +8,8 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #ifdef __BIG_ENDIAN__
 #define COMPAT_UTS_MACHINE	"ppc\0\0"
@@ -15,34 +17,18 @@
 #define COMPAT_UTS_MACHINE	"ppcle\0\0"
 #endif
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u32		__compat_uid_t;
 typedef u32		__compat_gid_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u32		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u32		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef s16		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_key_t;
-typedef s32		compat_timer_t;
-
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
 
 struct compat_stat {
 	compat_dev_t	st_dev;
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 97db2fba546a..63b46e30b2c3 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -9,6 +9,8 @@
 #include <linux/sched/task_stack.h>
 #include <linux/thread_info.h>
 
+#include <asm-generic/compat.h>
+
 #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p( \
 				typeof(0?(__force t)0:0ULL), u64))
 
@@ -51,34 +53,18 @@
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"s390\0\0\0\0"
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u16		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u16		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef u16		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_key_t;
-typedef s32		compat_timer_t;
-
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
 
 typedef struct {
 	u32 mask;
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index 7018cb60beef..30b1763580b1 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -6,38 +6,23 @@
  */
 #include <linux/types.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"sparc\0\0"
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u16		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u16		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef s16		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_key_t;
-typedef s32		compat_timer_t;
-
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64		compat_s64;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
-typedef u32		compat_uptr_t;
-
 struct compat_stat {
 	compat_dev_t	st_dev;
 	compat_ino_t	st_ino;
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index fb97cf7c4137..0ce6f452d334 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -12,38 +12,23 @@
 #include <asm/user32.h>
 #include <asm/unistd.h>
 
+#include <asm-generic/compat.h>
+
 #define COMPAT_USER_HZ		100
 #define COMPAT_UTS_MACHINE	"i686\0\0"
 
-typedef u32		compat_size_t;
-typedef s32		compat_ssize_t;
-typedef s32		compat_clock_t;
-typedef s32		compat_pid_t;
 typedef u16		__compat_uid_t;
 typedef u16		__compat_gid_t;
 typedef u32		__compat_uid32_t;
 typedef u32		__compat_gid32_t;
 typedef u16		compat_mode_t;
-typedef u32		compat_ino_t;
 typedef u16		compat_dev_t;
-typedef s32		compat_off_t;
-typedef s64		compat_loff_t;
 typedef u16		compat_nlink_t;
 typedef u16		compat_ipc_pid_t;
-typedef s32		compat_daddr_t;
 typedef u32		compat_caddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
-typedef s32		compat_timer_t;
-typedef s32		compat_key_t;
-
-typedef s32		compat_int_t;
-typedef s32		compat_long_t;
 typedef s64 __attribute__((aligned(4))) compat_s64;
-typedef u32		compat_uint_t;
-typedef u32		compat_ulong_t;
-typedef u32		compat_u32;
 typedef u64 __attribute__((aligned(4))) compat_u64;
-typedef u32		compat_uptr_t;
 
 struct compat_stat {
 	compat_dev_t	st_dev;
diff --git a/include/asm-generic/compat.h b/include/asm-generic/compat.h
index 28819451b6d1..a86f65bffab8 100644
--- a/include/asm-generic/compat.h
+++ b/include/asm-generic/compat.h
@@ -1,3 +1,25 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_COMPAT_H
+#define __ASM_GENERIC_COMPAT_H
 
-/* This is an empty stub for 32-bit-only architectures */
+/* These types are common across all compat ABIs */
+typedef u32 compat_size_t;
+typedef s32 compat_ssize_t;
+typedef s32 compat_clock_t;
+typedef s32 compat_pid_t;
+typedef u32 compat_ino_t;
+typedef s32 compat_off_t;
+typedef s64 compat_loff_t;
+typedef s32 compat_daddr_t;
+typedef s32 compat_timer_t;
+typedef s32 compat_key_t;
+typedef s16 compat_short_t;
+typedef s32 compat_int_t;
+typedef s32 compat_long_t;
+typedef u16 compat_ushort_t;
+typedef u32 compat_uint_t;
+typedef u32 compat_ulong_t;
+typedef u32 compat_uptr_t;
+typedef u32 compat_aio_context_t;
+
+#endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 0e058792ecf6..d376fa8be00d 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -110,8 +110,6 @@ typedef struct compat_sigaltstack {
 typedef __compat_uid32_t	compat_uid_t;
 typedef __compat_gid32_t	compat_gid_t;
 
-typedef	compat_ulong_t		compat_aio_context_t;
-
 struct compat_sel_arg_struct;
 struct rusage;
 
-- 
cgit v1.2.3


From a4f7a3004630f1a0fb130ab1824942a49ce33140 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 17 Apr 2018 09:11:58 +0200
Subject: y2038: Change sys_utimensat() to use __kernel_timespec

When 32-bit architectures get changed to support 64-bit time_t,
utimensat() needs to use the new __kernel_timespec structure as its
argument.

The older utime(), utimes() and futimesat() system calls don't need a
corresponding change as they are no longer used on C libraries that have
64-bit time support.

As we do for the other syscalls that have timespec arguments, we reuse
the 'compat' syscall entry points to implement the traditional four
interfaces, and only leave the new utimensat() as a native handler,
so that the same code gets used on both 32-bit and 64-bit kernels
on each syscall.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 fs/utimes.c              | 2 +-
 include/linux/syscalls.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/utimes.c b/fs/utimes.c
index 79a65c163f40..d30f409ecc1a 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -166,7 +166,7 @@ out:
 }
 
 SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename,
-		struct timespec __user *, utimes, int, flags)
+		struct __kernel_timespec __user *, utimes, int, flags)
 {
 	struct timespec64 tstimes[2];
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b3e27e5ee322..420ac881a610 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -513,7 +513,8 @@ asmlinkage long sys_timerfd_gettime(int ufd, struct __kernel_itimerspec __user *
 
 /* fs/utimes.c */
 asmlinkage long sys_utimensat(int dfd, const char __user *filename,
-				struct timespec __user *utimes, int flags);
+				struct __kernel_timespec __user *utimes,
+				int flags);
 
 /* kernel/acct.c */
 asmlinkage long sys_acct(const char __user *name);
-- 
cgit v1.2.3


From 185cfaf7641e14af85635bb2750da302e32b04e3 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 17 Apr 2018 09:11:58 +0200
Subject: y2038: Compile utimes()/futimesat() conditionally

There are four generations of utimes() syscalls: utime(), utimes(),
futimesat() and utimensat(), each one being a superset of the previous
one. For y2038 support, we have to add another one, which is the same
as the existing utimensat() but always passes 64-bit times_t based
timespec values.

There are currently 10 architectures that only use utimensat(), two
that use utimes(), futimesat() and utimensat() but not utime(), and 11
architectures that have all four, and those define __ARCH_WANT_SYS_UTIME
in order to get a sys_utime implementation. Since all the new
architectures only want utimensat(), moving all the legacy entry points
into a common __ARCH_WANT_SYS_UTIME guard simplifies the logic. Only alpha
and ia64 grow a tiny bit as they now also get an unused sys_utime(),
but it didn't seem worth the extra complexity of adding yet another
ifdef for those.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/include/asm/unistd.h |  1 +
 arch/arm/include/asm/unistd.h   |  2 +-
 arch/ia64/include/asm/unistd.h  |  1 +
 fs/utimes.c                     | 51 ++++++++++++++++++-----------------------
 include/linux/syscalls.h        | 10 ++++----
 5 files changed, 31 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h
index edc090470023..9ff37aa1165f 100644
--- a/arch/alpha/include/asm/unistd.h
+++ b/arch/alpha/include/asm/unistd.h
@@ -14,6 +14,7 @@
 #define __ARCH_WANT_SYS_GETPGRP
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_SIGPENDING
+#define __ARCH_WANT_SYS_UTIME
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 #define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 8f1159c26f20..88ef2ce1f69a 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -26,13 +26,13 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_OLD_MMAP
 #define __ARCH_WANT_SYS_OLD_SELECT
+#define __ARCH_WANT_SYS_UTIME
 
 #if !defined(CONFIG_AEABI) || defined(CONFIG_OABI_COMPAT)
 #define __ARCH_WANT_SYS_TIME
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_OLDUMOUNT
 #define __ARCH_WANT_SYS_ALARM
-#define __ARCH_WANT_SYS_UTIME
 #define __ARCH_WANT_SYS_OLD_GETRLIMIT
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_SOCKETCALL
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index c5b2620c4a4c..49e34db2529c 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -29,6 +29,7 @@
 #define __IGNORE_umount2	/* umount() */
 
 #define __ARCH_WANT_NEW_STAT
+#define __ARCH_WANT_SYS_UTIME
 
 #if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
 
diff --git a/fs/utimes.c b/fs/utimes.c
index d30f409ecc1a..2f6f08061a26 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -8,35 +8,6 @@
 #include <linux/compat.h>
 #include <asm/unistd.h>
 
-#ifdef __ARCH_WANT_SYS_UTIME
-
-/*
- * sys_utime() can be implemented in user-level using sys_utimes().
- * Is this for backwards compatibility?  If so, why not move it
- * into the appropriate arch directory (for those architectures that
- * need it).
- */
-
-/* If times==NULL, set access and modification to current time,
- * must be owner or have write permission.
- * Else, update from *times, must be owner or super user.
- */
-SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
-{
-	struct timespec64 tv[2];
-
-	if (times) {
-		if (get_user(tv[0].tv_sec, &times->actime) ||
-		    get_user(tv[1].tv_sec, &times->modtime))
-			return -EFAULT;
-		tv[0].tv_nsec = 0;
-		tv[1].tv_nsec = 0;
-	}
-	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
-}
-
-#endif
-
 static bool nsec_valid(long nsec)
 {
 	if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
@@ -184,6 +155,13 @@ SYSCALL_DEFINE4(utimensat, int, dfd, const char __user *, filename,
 	return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags);
 }
 
+#ifdef __ARCH_WANT_SYS_UTIME
+/*
+ * futimesat(), utimes() and utime() are older versions of utimensat()
+ * that are provided for compatibility with traditional C libraries.
+ * On modern architectures, we always use libc wrappers around
+ * utimensat() instead.
+ */
 static long do_futimesat(int dfd, const char __user *filename,
 			 struct timeval __user *utimes)
 {
@@ -225,6 +203,21 @@ SYSCALL_DEFINE2(utimes, char __user *, filename,
 	return do_futimesat(AT_FDCWD, filename, utimes);
 }
 
+SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
+{
+	struct timespec64 tv[2];
+
+	if (times) {
+		if (get_user(tv[0].tv_sec, &times->actime) ||
+		    get_user(tv[1].tv_sec, &times->modtime))
+			return -EFAULT;
+		tv[0].tv_nsec = 0;
+		tv[1].tv_nsec = 0;
+	}
+	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, 0);
+}
+#endif
+
 #ifdef CONFIG_COMPAT
 /*
  * Not all architectures have sys_utime, so implement this in terms
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 420ac881a610..95e795fb0593 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -955,8 +955,6 @@ asmlinkage long sys_access(const char __user *filename, int mode);
 asmlinkage long sys_rename(const char __user *oldname,
 				const char __user *newname);
 asmlinkage long sys_symlink(const char __user *old, const char __user *new);
-asmlinkage long sys_utimes(char __user *filename,
-				struct timeval __user *utimes);
 #if defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_COMPAT_STAT64)
 asmlinkage long sys_stat64(const char __user *filename,
 				struct stat64 __user *statbuf);
@@ -986,14 +984,18 @@ asmlinkage long sys_alarm(unsigned int seconds);
 asmlinkage long sys_getpgrp(void);
 asmlinkage long sys_pause(void);
 asmlinkage long sys_time(time_t __user *tloc);
+#ifdef __ARCH_WANT_SYS_UTIME
 asmlinkage long sys_utime(char __user *filename,
 				struct utimbuf __user *times);
+asmlinkage long sys_utimes(char __user *filename,
+				struct timeval __user *utimes);
+asmlinkage long sys_futimesat(int dfd, const char __user *filename,
+			      struct timeval __user *utimes);
+#endif
 asmlinkage long sys_creat(const char __user *pathname, umode_t mode);
 asmlinkage long sys_getdents(unsigned int fd,
 				struct linux_dirent __user *dirent,
 				unsigned int count);
-asmlinkage long sys_futimesat(int dfd, const char __user *filename,
-			      struct timeval __user *utimes);
 asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
 			fd_set __user *exp, struct timeval __user *tvp);
 asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
-- 
cgit v1.2.3


From 4faea239e529d1d6b3b93fbf08d5e90427961a41 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 17 Apr 2018 12:03:19 +0200
Subject: y2038: utimes: Rework #ifdef guards for compat syscalls

After changing over to 64-bit time_t syscalls, many architectures will
want compat_sys_utimensat() but not respective handlers for utime(),
utimes() and futimesat(). This adds a new __ARCH_WANT_SYS_UTIME32 to
complement __ARCH_WANT_SYS_UTIME. For now, all 64-bit architectures that
support CONFIG_COMPAT set it, but future 64-bit architectures will not
(tile would not have needed it either, but got removed).

As older 32-bit architectures get converted to using CONFIG_64BIT_TIME,
they will have to use __ARCH_WANT_SYS_UTIME32 instead of
__ARCH_WANT_SYS_UTIME. Architectures using the generic syscall ABI don't
need either of them as they never had a utime syscall.

Since the compat_utimbuf structure is now required outside of
CONFIG_COMPAT, I'm moving it into compat_time.h.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
changed from last version:
- renamed __ARCH_WANT_COMPAT_SYS_UTIME to __ARCH_WANT_SYS_UTIME32
---
 arch/arm64/include/asm/unistd.h   | 1 +
 arch/mips/include/asm/unistd.h    | 1 +
 arch/parisc/include/asm/unistd.h  | 1 +
 arch/powerpc/include/asm/unistd.h | 1 +
 arch/s390/include/asm/unistd.h    | 1 +
 arch/sparc/include/asm/unistd.h   | 1 +
 arch/x86/include/asm/unistd.h     | 1 +
 fs/utimes.c                       | 8 ++++++--
 include/linux/compat.h            | 7 +------
 include/linux/time32.h            | 5 +++++
 10 files changed, 19 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 2af308f13463..b13ca091f833 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -22,6 +22,7 @@
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
 
diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h
index c2174b80e50e..c68b8ae3efcb 100644
--- a/arch/mips/include/asm/unistd.h
+++ b/arch/mips/include/asm/unistd.h
@@ -31,6 +31,7 @@
 #define __ARCH_WANT_SYS_IPC
 #define __ARCH_WANT_SYS_PAUSE
 #define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_SYS_WAITPID
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_GETPGRP
diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h
index a0c38374fff0..bc37a4953eaa 100644
--- a/arch/parisc/include/asm/unistd.h
+++ b/arch/parisc/include/asm/unistd.h
@@ -152,6 +152,7 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5)	\
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL
 #define __ARCH_WANT_SYS_UTIME
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_SYS_WAITPID
 #define __ARCH_WANT_SYS_SOCKETCALL
 #define __ARCH_WANT_SYS_FADVISE64
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 9326c9133516..b0de85b477e1 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -47,6 +47,7 @@
 #endif
 #ifdef CONFIG_PPC64
 #define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_SYS_NEWFSTATAT
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index a2d583ea526d..a1fbf15d53aa 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -34,6 +34,7 @@
 #define __ARCH_WANT_SYS_SIGPROCMASK
 # ifdef CONFIG_COMPAT
 #   define __ARCH_WANT_COMPAT_SYS_TIME
+#   define __ARCH_WANT_SYS_UTIME32
 # endif
 #define __ARCH_WANT_SYS_FORK
 #define __ARCH_WANT_SYS_VFORK
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index f7514d3eae7b..00f87dbd0b17 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -42,6 +42,7 @@
 #define __ARCH_WANT_SYS_IPC
 #else
 #define __ARCH_WANT_COMPAT_SYS_TIME
+#define __ARCH_WANT_SYS_UTIME32
 #define __ARCH_WANT_COMPAT_SYS_SENDFILE
 #endif
 
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 153d2a5e6b02..dc4ed8bc2382 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -24,6 +24,7 @@
 #  include <asm/unistd_64.h>
 #  include <asm/unistd_64_x32.h>
 #  define __ARCH_WANT_COMPAT_SYS_TIME
+#  define __ARCH_WANT_SYS_UTIME32
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64
 #  define __ARCH_WANT_COMPAT_SYS_PWRITEV64
 #  define __ARCH_WANT_COMPAT_SYS_PREADV64V2
diff --git a/fs/utimes.c b/fs/utimes.c
index 2f6f08061a26..bdcf2daf39c1 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -218,13 +218,14 @@ SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
 }
 #endif
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 /*
  * Not all architectures have sys_utime, so implement this in terms
  * of sys_utimes.
  */
+#ifdef __ARCH_WANT_SYS_UTIME32
 COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
-		       struct compat_utimbuf __user *, t)
+		       struct old_utimbuf32 __user *, t)
 {
 	struct timespec64 tv[2];
 
@@ -237,6 +238,7 @@ COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
 	}
 	return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
 }
+#endif
 
 COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct old_timespec32 __user *, t, int, flags)
 {
@@ -253,6 +255,7 @@ COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filena
 	return do_utimes(dfd, filename, t ? tv : NULL, flags);
 }
 
+#ifdef __ARCH_WANT_SYS_UTIME32
 static long do_compat_futimesat(unsigned int dfd, const char __user *filename,
 				struct old_timeval32 __user *t)
 {
@@ -285,3 +288,4 @@ COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct old_timeval
 	return do_compat_futimesat(AT_FDCWD, filename, t);
 }
 #endif
+#endif
diff --git a/include/linux/compat.h b/include/linux/compat.h
index d376fa8be00d..6fb5abdb87be 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -113,11 +113,6 @@ typedef __compat_gid32_t	compat_gid_t;
 struct compat_sel_arg_struct;
 struct rusage;
 
-struct compat_utimbuf {
-	old_time32_t		actime;
-	old_time32_t		modtime;
-};
-
 struct compat_itimerval {
 	struct old_timeval32	it_interval;
 	struct old_timeval32	it_value;
@@ -942,7 +937,7 @@ asmlinkage long compat_sys_newlstat(const char __user *filename,
 /* __ARCH_WANT_SYSCALL_DEPRECATED */
 asmlinkage long compat_sys_time(old_time32_t __user *tloc);
 asmlinkage long compat_sys_utime(const char __user *filename,
-				 struct compat_utimbuf __user *t);
+				 struct old_utimbuf32 __user *t);
 asmlinkage long compat_sys_futimesat(unsigned int dfd,
 				     const char __user *filename,
 				     struct old_timeval32 __user *t);
diff --git a/include/linux/time32.h b/include/linux/time32.h
index 0e0d7304d1a8..61904a6c098f 100644
--- a/include/linux/time32.h
+++ b/include/linux/time32.h
@@ -30,6 +30,11 @@ struct old_itimerspec32 {
 	struct old_timespec32 it_value;
 };
 
+struct old_utimbuf32 {
+	old_time32_t	actime;
+	old_time32_t	modtime;
+};
+
 extern int get_old_timespec32(struct timespec64 *, const void __user *);
 extern int put_old_timespec32(const struct timespec64 *, void __user *);
 extern int get_old_itimerspec32(struct itimerspec64 *its,
-- 
cgit v1.2.3


From 474b9c777b20b8340a6ee0f7ba6ebbd6a4bf47e2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 17 Apr 2018 21:59:47 +0200
Subject: y2038: sched: Change sched_rr_get_interval to use __kernel_timespec

This is a preparation patch for converting sys_sched_rr_get_interval to
work with 64-bit time_t on 32-bit architectures. The 'interval' argument
is changed to struct __kernel_timespec, which will be redefined using
64-bit time_t in the future. The compat version of the system call in
turn is enabled for compilation with CONFIG_COMPAT_32BIT_TIME so
the individual 32-bit architectures can share the handling of the
traditional argument with 64-bit architectures providing it for their
compat mode.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/syscalls.h | 2 +-
 kernel/sched/core.c      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 95e795fb0593..5642016a312d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -614,7 +614,7 @@ asmlinkage long sys_sched_yield(void);
 asmlinkage long sys_sched_get_priority_max(int policy);
 asmlinkage long sys_sched_get_priority_min(int policy);
 asmlinkage long sys_sched_rr_get_interval(pid_t pid,
-					struct timespec __user *interval);
+				struct __kernel_timespec __user *interval);
 
 /* kernel/signal.c */
 asmlinkage long sys_restart_syscall(void);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8287b75ed961..39af2bec2b39 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5243,7 +5243,7 @@ out_unlock:
  * an error code.
  */
 SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
-		struct timespec __user *, interval)
+		struct __kernel_timespec __user *, interval)
 {
 	struct timespec64 t;
 	int retval = sched_rr_get_interval(pid, &t);
@@ -5254,7 +5254,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
 	return retval;
 }
 
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
 COMPAT_SYSCALL_DEFINE2(sched_rr_get_interval,
 		       compat_pid_t, pid,
 		       struct old_timespec32 __user *, interval)
-- 
cgit v1.2.3


From c2e6c8567acdba8db1055b242c34ceb123c6a253 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 Apr 2018 13:42:25 +0200
Subject: y2038: socket: Change recvmmsg to use __kernel_timespec

This converts the recvmmsg() system call in all its variations to use
'timespec64' internally for its timeout, and have a __kernel_timespec64
argument in the native entry point. This lets us change the type to use
64-bit time_t at a later point while using the 32-bit compat system call
emulation for existing user space.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/socket.h   |  4 ++--
 include/linux/syscalls.h |  2 +-
 net/compat.c             |  6 +++---
 net/socket.c             | 18 ++++++++----------
 4 files changed, 14 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 7ed4713d5337..8b571e9b9f76 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -348,7 +348,7 @@ struct ucred {
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
-struct timespec;
+struct timespec64;
 
 /* The __sys_...msg variants allow MSG_CMSG_COMPAT iff
  * forbid_cmsg_compat==false
@@ -358,7 +358,7 @@ extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg,
 extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg,
 			  unsigned int flags, bool forbid_cmsg_compat);
 extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
-			  unsigned int flags, struct timespec *timeout);
+			  unsigned int flags, struct timespec64 *timeout);
 extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
 			  unsigned int vlen, unsigned int flags,
 			  bool forbid_cmsg_compat);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5642016a312d..fb3a05fab8aa 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -830,7 +830,7 @@ asmlinkage long sys_perf_event_open(
 asmlinkage long sys_accept4(int, struct sockaddr __user *, int __user *, int);
 asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
 			     unsigned int vlen, unsigned flags,
-			     struct timespec __user *timeout);
+			     struct __kernel_timespec __user *timeout);
 
 asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr,
 				int options, struct rusage __user *ru);
diff --git a/net/compat.c b/net/compat.c
index 2ef160876bc1..47a614b370cd 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -815,18 +815,18 @@ static int __compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
 				 struct old_timespec32 __user *timeout)
 {
 	int datagrams;
-	struct timespec ktspec;
+	struct timespec64 ktspec;
 
 	if (timeout == NULL)
 		return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 				      flags | MSG_CMSG_COMPAT, NULL);
 
-	if (compat_get_timespec(&ktspec, timeout))
+	if (compat_get_timespec64(&ktspec, timeout))
 		return -EFAULT;
 
 	datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
 				   flags | MSG_CMSG_COMPAT, &ktspec);
-	if (datagrams > 0 && compat_put_timespec(&ktspec, timeout))
+	if (datagrams > 0 && compat_put_timespec64(&ktspec, timeout))
 		datagrams = -EFAULT;
 
 	return datagrams;
diff --git a/net/socket.c b/net/socket.c
index e6945e318f02..b9d71b503720 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2340,7 +2340,7 @@ SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
  */
 
 int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
-		   unsigned int flags, struct timespec *timeout)
+		   unsigned int flags, struct timespec64 *timeout)
 {
 	int fput_needed, err, datagrams;
 	struct socket *sock;
@@ -2405,8 +2405,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 
 		if (timeout) {
 			ktime_get_ts64(&timeout64);
-			*timeout = timespec64_to_timespec(
-					timespec64_sub(end_time, timeout64));
+			*timeout = timespec64_sub(end_time, timeout64);
 			if (timeout->tv_sec < 0) {
 				timeout->tv_sec = timeout->tv_nsec = 0;
 				break;
@@ -2452,10 +2451,10 @@ out_put:
 
 static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
 			   unsigned int vlen, unsigned int flags,
-			   struct timespec __user *timeout)
+			   struct __kernel_timespec __user *timeout)
 {
 	int datagrams;
-	struct timespec timeout_sys;
+	struct timespec64 timeout_sys;
 
 	if (flags & MSG_CMSG_COMPAT)
 		return -EINVAL;
@@ -2463,13 +2462,12 @@ static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
 	if (!timeout)
 		return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL);
 
-	if (copy_from_user(&timeout_sys, timeout, sizeof(timeout_sys)))
+	if (get_timespec64(&timeout_sys, timeout))
 		return -EFAULT;
 
 	datagrams = __sys_recvmmsg(fd, mmsg, vlen, flags, &timeout_sys);
 
-	if (datagrams > 0 &&
-	    copy_to_user(timeout, &timeout_sys, sizeof(timeout_sys)))
+	if (datagrams > 0 && put_timespec64(&timeout_sys, timeout))
 		datagrams = -EFAULT;
 
 	return datagrams;
@@ -2477,7 +2475,7 @@ static int do_sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
 
 SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
 		unsigned int, vlen, unsigned int, flags,
-		struct timespec __user *, timeout)
+		struct __kernel_timespec __user *, timeout)
 {
 	return do_sys_recvmmsg(fd, mmsg, vlen, flags, timeout);
 }
@@ -2601,7 +2599,7 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 		break;
 	case SYS_RECVMMSG:
 		err = do_sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2],
-				      a[3], (struct timespec __user *)a[4]);
+				      a[3], (struct __kernel_timespec __user *)a[4]);
 		break;
 	case SYS_ACCEPT4:
 		err = __sys_accept4(a0, (struct sockaddr __user *)a1,
-- 
cgit v1.2.3


From 49c39f8464a9af702e9d45800c00a572753aeb06 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 18 Apr 2018 15:56:13 +0200
Subject: y2038: signal: Change rt_sigtimedwait to use __kernel_timespec

This changes sys_rt_sigtimedwait() to use get_timespec64(), changing
the timeout type to __kernel_timespec, which will be changed to use
a 64-bit time_t in the future. Since the do_sigtimedwait() core
function changes, we also have to modify the compat version of this
system call in the same way.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/linux/syscalls.h |  2 +-
 kernel/signal.c          | 17 +++++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index fb3a05fab8aa..2ac3d13a915b 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -635,7 +635,7 @@ asmlinkage long sys_rt_sigprocmask(int how, sigset_t __user *set,
 asmlinkage long sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize);
 asmlinkage long sys_rt_sigtimedwait(const sigset_t __user *uthese,
 				siginfo_t __user *uinfo,
-				const struct timespec __user *uts,
+				const struct __kernel_timespec __user *uts,
 				size_t sigsetsize);
 asmlinkage long sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index a4db724e14c1..0831d56a731a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3082,7 +3082,7 @@ int copy_siginfo_from_user32(struct siginfo *to,
  *  @ts: upper bound on process time suspension
  */
 static int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
-		    const struct timespec *ts)
+		    const struct timespec64 *ts)
 {
 	ktime_t *to = NULL, timeout = KTIME_MAX;
 	struct task_struct *tsk = current;
@@ -3090,9 +3090,9 @@ static int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
 	int sig, ret = 0;
 
 	if (ts) {
-		if (!timespec_valid(ts))
+		if (!timespec64_valid(ts))
 			return -EINVAL;
-		timeout = timespec_to_ktime(*ts);
+		timeout = timespec64_to_ktime(*ts);
 		to = &timeout;
 	}
 
@@ -3140,11 +3140,12 @@ static int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
  *  @sigsetsize: size of sigset_t type
  */
 SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
-		siginfo_t __user *, uinfo, const struct timespec __user *, uts,
+		siginfo_t __user *, uinfo,
+		const struct __kernel_timespec __user *, uts,
 		size_t, sigsetsize)
 {
 	sigset_t these;
-	struct timespec ts;
+	struct timespec64 ts;
 	siginfo_t info;
 	int ret;
 
@@ -3156,7 +3157,7 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 		return -EFAULT;
 
 	if (uts) {
-		if (copy_from_user(&ts, uts, sizeof(ts)))
+		if (get_timespec64(&ts, uts))
 			return -EFAULT;
 	}
 
@@ -3176,7 +3177,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
 		struct old_timespec32 __user *, uts, compat_size_t, sigsetsize)
 {
 	sigset_t s;
-	struct timespec t;
+	struct timespec64 t;
 	siginfo_t info;
 	long ret;
 
@@ -3187,7 +3188,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
 		return -EFAULT;
 
 	if (uts) {
-		if (compat_get_timespec(&t, uts))
+		if (get_old_timespec32(&t, uts))
 			return -EFAULT;
 	}
 
-- 
cgit v1.2.3


From 823f18f8b860526fc099c222619a126d57d2ad8c Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 29 Aug 2018 15:36:10 +0300
Subject: regulator: bd71837: Disable voltage monitoring for LDO3/4

There is a HW quirk in BD71837. The shutdown sequence timings for
bucks/LDOs which are enabled via register interface are changed.
At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the
beginning of shut-down sequence. This causes LDO5/6 voltage
monitoring to detect under voltage and force PMIC to emergency
state instead of poweroff. Disable voltage monitoring for LDO5 and
LDO6 at probe to avoid this.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/regulator/bd71837-regulator.c | 19 +++++++++++++++++++
 include/linux/mfd/rohm-bd718x7.h      | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 49 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/bd71837-regulator.c b/drivers/regulator/bd71837-regulator.c
index 0f8ac8dec3e1..a1bd8aaf4d98 100644
--- a/drivers/regulator/bd71837-regulator.c
+++ b/drivers/regulator/bd71837-regulator.c
@@ -569,6 +569,25 @@ static int bd71837_probe(struct platform_device *pdev)
 			BD71837_REG_REGLOCK);
 	}
 
+	/*
+	 * There is a HW quirk in BD71837. The shutdown sequence timings for
+	 * bucks/LDOs which are controlled via register interface are changed.
+	 * At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the
+	 * beginning of shut-down sequence. As bucks 6 and 7 are parent
+	 * supplies for LDO5 and LDO6 - this causes LDO5/6 voltage
+	 * monitoring to errorneously detect under voltage and force PMIC to
+	 * emergency state instead of poweroff. In order to avoid this we
+	 * disable voltage monitoring for LDO5 and LDO6
+	 */
+	err = regmap_update_bits(pmic->mfd->regmap, BD718XX_REG_MVRFLTMASK2,
+				 BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80,
+				 BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80);
+	if (err) {
+		dev_err(&pmic->pdev->dev,
+			"Failed to disable voltage monitoring\n");
+		goto err;
+	}
+
 	for (i = 0; i < ARRAY_SIZE(pmic_regulator_inits); i++) {
 
 		struct regulator_desc *desc;
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
index a528747f8aed..e8338e5dc10b 100644
--- a/include/linux/mfd/rohm-bd718x7.h
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -78,9 +78,9 @@ enum {
 	BD71837_REG_TRANS_COND0        = 0x1F,
 	BD71837_REG_TRANS_COND1        = 0x20,
 	BD71837_REG_VRFAULTEN          = 0x21,
-	BD71837_REG_MVRFLTMASK0        = 0x22,
-	BD71837_REG_MVRFLTMASK1        = 0x23,
-	BD71837_REG_MVRFLTMASK2        = 0x24,
+	BD718XX_REG_MVRFLTMASK0        = 0x22,
+	BD718XX_REG_MVRFLTMASK1        = 0x23,
+	BD718XX_REG_MVRFLTMASK2        = 0x24,
 	BD71837_REG_RCVCFG             = 0x25,
 	BD71837_REG_RCVNUM             = 0x26,
 	BD71837_REG_PWRONCONFIG0       = 0x27,
@@ -159,6 +159,33 @@ enum {
 #define BUCK8_MASK		0x3F
 #define BUCK8_DEFAULT		0x1E
 
+/* BD718XX Voltage monitoring masks */
+#define BD718XX_BUCK1_VRMON80           0x1
+#define BD718XX_BUCK1_VRMON130          0x2
+#define BD718XX_BUCK2_VRMON80           0x4
+#define BD718XX_BUCK2_VRMON130          0x8
+#define BD718XX_1ST_NODVS_BUCK_VRMON80  0x1
+#define BD718XX_1ST_NODVS_BUCK_VRMON130 0x2
+#define BD718XX_2ND_NODVS_BUCK_VRMON80  0x4
+#define BD718XX_2ND_NODVS_BUCK_VRMON130 0x8
+#define BD718XX_3RD_NODVS_BUCK_VRMON80  0x10
+#define BD718XX_3RD_NODVS_BUCK_VRMON130 0x20
+#define BD718XX_4TH_NODVS_BUCK_VRMON80  0x40
+#define BD718XX_4TH_NODVS_BUCK_VRMON130 0x80
+#define BD718XX_LDO1_VRMON80            0x1
+#define BD718XX_LDO2_VRMON80            0x2
+#define BD718XX_LDO3_VRMON80            0x4
+#define BD718XX_LDO4_VRMON80            0x8
+#define BD718XX_LDO5_VRMON80            0x10
+#define BD718XX_LDO6_VRMON80            0x20
+
+/* BD71837 specific voltage monitoring masks */
+#define BD71837_BUCK3_VRMON80           0x10
+#define BD71837_BUCK3_VRMON130          0x20
+#define BD71837_BUCK4_VRMON80           0x40
+#define BD71837_BUCK4_VRMON130          0x80
+#define BD71837_LDO7_VRMON80            0x40
+
 /* BD71837_REG_IRQ bits */
 #define IRQ_SWRST		0x40
 #define IRQ_PWRON_S		0x20
-- 
cgit v1.2.3


From 1b27291b1ea4f1f2090fb07c3425db474cdb99ba Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Wed, 18 Jul 2018 14:32:31 -0700
Subject: rcutorture: Add forward-progress tests for RCU grace periods

This commit adds a kthread that loops going into and out of RCU
read-side critical sections, but also including a cond_resched(),
optionally guarded by a check of need_resched(), in that same loop.
This commit relies solely on rcu_torture_writer() progress to judge
the forward progress of grace periods.

Note that Tasks RCU and SRCU are exempted from forward-progress testing
due their (intentionally) less-robust forward-progress guarantees.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h |  1 +
 kernel/rcu/rcutorture.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/rcu/update.c     |  1 +
 3 files changed, 74 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 8d9a0ea8f0b5..a6353f3d6094 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -108,6 +108,7 @@ static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
  */
 static inline void rcu_virt_note_context_switch(int cpu) { }
 static inline void rcu_cpu_stall_reset(void) { }
+static inline int rcu_jiffies_till_stall_check(void) { return 21 * HZ; }
 static inline void rcu_idle_enter(void) { }
 static inline void rcu_idle_exit(void) { }
 static inline void rcu_irq_enter(void) { }
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 5df2411f7aee..fd3ce6cc8eea 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -89,6 +89,12 @@ torture_param(int, fqs_duration, 0,
 	      "Duration of fqs bursts (us), 0 to disable");
 torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
 torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
+torture_param(bool, fwd_progress, 1, "Test grace-period forward progress");
+torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait");
+torture_param(int, fwd_progress_holdoff, 60,
+	      "Time between forward-progress tests (s)");
+torture_param(bool, fwd_progress_need_resched, 1,
+	      "Hide cond_resched() behind need_resched()");
 torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
 torture_param(bool, gp_normal, false,
@@ -137,6 +143,7 @@ static struct task_struct **cbflood_task;
 static struct task_struct *fqs_task;
 static struct task_struct *boost_tasks[NR_CPUS];
 static struct task_struct *stall_task;
+static struct task_struct *fwd_prog_task;
 static struct task_struct **barrier_cbs_tasks;
 static struct task_struct *barrier_task;
 
@@ -291,6 +298,7 @@ struct rcu_torture_ops {
 	void (*cb_barrier)(void);
 	void (*fqs)(void);
 	void (*stats)(void);
+	int (*stall_dur)(void);
 	int irq_capable;
 	int can_boost;
 	int extendables;
@@ -429,6 +437,7 @@ static struct rcu_torture_ops rcu_ops = {
 	.cb_barrier	= rcu_barrier,
 	.fqs		= rcu_force_quiescent_state,
 	.stats		= NULL,
+	.stall_dur	= rcu_jiffies_till_stall_check,
 	.irq_capable	= 1,
 	.can_boost	= rcu_can_boost(),
 	.name		= "rcu"
@@ -1116,7 +1125,8 @@ rcu_torture_writer(void *arg)
 				break;
 			}
 		}
-		rcu_torture_current_version++;
+		WRITE_ONCE(rcu_torture_current_version,
+			   rcu_torture_current_version + 1);
 		/* Cycle through nesting levels of rcu_expedite_gp() calls. */
 		if (can_expedite &&
 		    !(torture_random(&rand) & 0xff & (!!expediting - 1))) {
@@ -1660,6 +1670,63 @@ static int __init rcu_torture_stall_init(void)
 	return torture_create_kthread(rcu_torture_stall, NULL, stall_task);
 }
 
+/* Carry out grace-period forward-progress testing. */
+static int rcu_torture_fwd_prog(void *args)
+{
+	unsigned long cvar;
+	int idx;
+	unsigned long stopat;
+	bool tested = false;
+
+	VERBOSE_TOROUT_STRING("rcu_torture_fwd_progress task started");
+	do {
+		schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
+		cvar = READ_ONCE(rcu_torture_current_version);
+		stopat = jiffies + cur_ops->stall_dur() / fwd_progress_div;
+		while (time_before(jiffies, stopat) && !torture_must_stop()) {
+			idx = cur_ops->readlock();
+			udelay(10);
+			cur_ops->readunlock(idx);
+			if (!fwd_progress_need_resched || need_resched())
+				cond_resched();
+		}
+		if (!time_before(jiffies, stopat) && !torture_must_stop()) {
+			tested = true;
+			WARN_ON_ONCE(cvar ==
+				     READ_ONCE(rcu_torture_current_version));
+		}
+		/* Avoid slow periods, better to test when busy. */
+		stutter_wait("rcu_torture_fwd_prog");
+	} while (!torture_must_stop());
+	WARN_ON(!tested);
+	torture_kthread_stopping("rcu_torture_fwd_prog");
+	return 0;
+}
+
+/* If forward-progress checking is requested and feasible, spawn the thread. */
+static int __init rcu_torture_fwd_prog_init(void)
+{
+	if (!fwd_progress)
+		return 0; /* Not requested, so don't do it. */
+	if (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0) {
+		VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
+		return 0;
+	}
+	if (stall_cpu > 0) {
+		VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing");
+		if (IS_MODULE(CONFIG_RCU_TORTURE_TESTS))
+			return -EINVAL; /* In module, can fail back to user. */
+		WARN_ON(1); /* Make sure rcutorture notices conflict. */
+		return 0;
+	}
+	if (fwd_progress_holdoff <= 0)
+		fwd_progress_holdoff = 1;
+	if (fwd_progress_div <= 0)
+		fwd_progress_div = 4;
+	return torture_create_kthread(rcu_torture_fwd_prog,
+				      NULL, fwd_prog_task);
+}
+
 /* Callback function for RCU barrier testing. */
 static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
 {
@@ -1833,6 +1900,7 @@ rcu_torture_cleanup(void)
 	}
 
 	rcu_torture_barrier_cleanup();
+	torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
 	torture_stop_kthread(rcu_torture_stall, stall_task);
 	torture_stop_kthread(rcu_torture_writer, writer_task);
 
@@ -2104,6 +2172,9 @@ rcu_torture_init(void)
 	if (firsterr)
 		goto unwind;
 	firsterr = rcu_torture_stall_init();
+	if (firsterr)
+		goto unwind;
+	firsterr = rcu_torture_fwd_prog_init();
 	if (firsterr)
 		goto unwind;
 	firsterr = rcu_torture_barrier_init();
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39cb23d22109..a6b860422d18 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -472,6 +472,7 @@ int rcu_jiffies_till_stall_check(void)
 	}
 	return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
 }
+EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
 
 void rcu_sysrq_start(void)
 {
-- 
cgit v1.2.3


From 474e59b476b3390ef9f730515439f21640b61623 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 7 Aug 2018 14:34:44 -0700
Subject: rcutorture: Check GP completion at stutter end

The rcu_torture_writer() function invokes stutter_wait() at the end of
each writer pass, which occasionally blocks for an extended time period
in order to ensure that RCU can handle intermittent loads.  But part of
handling a busy period is invoking all the callbacks before the end of
the idle period induced by stutter_wait().

This commit therefore adds a return value to stutter_wait() indicating
whether stutter_wait() actually waited.  In addition, this commit causes
rcu_torture_writer() to test this value and if set, checks that all the
elements of the rcu_tortures[] array have been freed up.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/torture.h | 2 +-
 kernel/rcu/rcutorture.c | 5 ++++-
 kernel/torture.c        | 3 ++-
 3 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 61dfd93b6ee4..48fad21109fc 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -77,7 +77,7 @@ void torture_shutdown_absorb(const char *title);
 int torture_shutdown_init(int ssecs, void (*cleanup)(void));
 
 /* Task stuttering, which forces load/no-load transitions. */
-void stutter_wait(const char *title);
+bool stutter_wait(const char *title);
 int torture_stutter_init(int s);
 
 /* Initialization and cleanup. */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7df8142a6a22..ae10ad531993 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1144,7 +1144,10 @@ rcu_torture_writer(void *arg)
 				       !rcu_gp_is_normal();
 		}
 		rcu_torture_writer_state = RTWS_STUTTER;
-		stutter_wait("rcu_torture_writer");
+		if (stutter_wait("rcu_torture_writer"))
+			for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
+				if (list_empty(&rcu_tortures[i].rtort_free))
+					WARN_ON_ONCE(1);
 	} while (!torture_must_stop());
 	/* Reset expediting back to unexpedited. */
 	if (expediting > 0)
diff --git a/kernel/torture.c b/kernel/torture.c
index 1ac24a826589..17d91f5fba2a 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -573,7 +573,7 @@ static int stutter;
  * Block until the stutter interval ends.  This must be called periodically
  * by all running kthreads that need to be subject to stuttering.
  */
-void stutter_wait(const char *title)
+bool stutter_wait(const char *title)
 {
 	int spt;
 
@@ -590,6 +590,7 @@ void stutter_wait(const char *title)
 		}
 		torture_shutdown_absorb(title);
 	}
+	return !!spt;
 }
 EXPORT_SYMBOL_GPL(stutter_wait);
 
-- 
cgit v1.2.3


From 6c5c9581044dd6e0cd284ab653502fb9264f08b6 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 28 Aug 2018 14:44:28 +0200
Subject: net: add napi_if_scheduled_mark_missed

The function napi_if_scheduled_mark_missed is used to check if the
NAPI context is scheduled, if so set NAPIF_STATE_MISSED and return
true. Used by the AF_XDP zero-copy i40e Tx code implementation in
order to make sure that irq affinity is honored by the napi context.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/netdevice.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca5ab98053c8..4271f6b4e419 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n)
 		barrier();
 }
 
+/**
+ *	napi_if_scheduled_mark_missed - if napi is running, set the
+ *	NAPIF_STATE_MISSED
+ *	@n: NAPI context
+ *
+ * If napi is running, set the NAPIF_STATE_MISSED, and return true if
+ * NAPI is scheduled.
+ **/
+static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n)
+{
+	unsigned long val, new;
+
+	do {
+		val = READ_ONCE(n->state);
+		if (val & NAPIF_STATE_DISABLE)
+			return true;
+
+		if (!(val & NAPIF_STATE_SCHED))
+			return false;
+
+		new = val | NAPIF_STATE_MISSED;
+	} while (cmpxchg(&n->state, val, new) != val);
+
+	return true;
+}
+
 enum netdev_queue_state_t {
 	__QUEUE_STATE_DRV_XOFF,
 	__QUEUE_STATE_STACK_XOFF,
-- 
cgit v1.2.3


From 679c782de14bd48c19dd74cd1af20a2bc05dd936 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 22 Aug 2018 20:02:19 +0100
Subject: bpf/verifier: per-register parent pointers

By giving each register its own liveness chain, we elide the skip_callee()
 logic.  Instead, each register's parent is the state it inherits from;
 both check_func_call() and prepare_func_exit() automatically connect
 reg states to the correct chain since when they copy the reg state across
 (r1-r5 into the callee as args, and r0 out as the return value) they also
 copy the parent pointer.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   8 +-
 kernel/bpf/verifier.c        | 184 +++++++++++--------------------------------
 2 files changed, 47 insertions(+), 145 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 38b04f559ad3..b42b60a83e19 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -41,6 +41,7 @@ enum bpf_reg_liveness {
 };
 
 struct bpf_reg_state {
+	/* Ordering of fields matters.  See states_equal() */
 	enum bpf_reg_type type;
 	union {
 		/* valid when type == PTR_TO_PACKET */
@@ -59,7 +60,6 @@ struct bpf_reg_state {
 	 * came from, when one is tested for != NULL.
 	 */
 	u32 id;
-	/* Ordering of fields matters.  See states_equal() */
 	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
 	 * the actual value.
 	 * For pointer types, this represents the variable part of the offset
@@ -76,15 +76,15 @@ struct bpf_reg_state {
 	s64 smax_value; /* maximum possible (s64)value */
 	u64 umin_value; /* minimum possible (u64)value */
 	u64 umax_value; /* maximum possible (u64)value */
+	/* parentage chain for liveness checking */
+	struct bpf_reg_state *parent;
 	/* Inside the callee two registers can be both PTR_TO_STACK like
 	 * R1=fp-8 and R2=fp-8, but one of them points to this function stack
 	 * while another to the caller's stack. To differentiate them 'frameno'
 	 * is used which is an index in bpf_verifier_state->frame[] array
 	 * pointing to bpf_func_state.
-	 * This field must be second to last, for states_equal() reasons.
 	 */
 	u32 frameno;
-	/* This field must be last, for states_equal() reasons. */
 	enum bpf_reg_liveness live;
 };
 
@@ -107,7 +107,6 @@ struct bpf_stack_state {
  */
 struct bpf_func_state {
 	struct bpf_reg_state regs[MAX_BPF_REG];
-	struct bpf_verifier_state *parent;
 	/* index of call instruction that called into this func */
 	int callsite;
 	/* stack frame number of this function state from pov of
@@ -129,7 +128,6 @@ struct bpf_func_state {
 struct bpf_verifier_state {
 	/* call stack tracking */
 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
-	struct bpf_verifier_state *parent;
 	u32 curframe;
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 92246117d2b0..68568d22d6bd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -380,9 +380,9 @@ static int copy_stack_state(struct bpf_func_state *dst,
 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
  * make it consume minimal amount of memory. check_stack_write() access from
  * the program calls into realloc_func_state() to grow the stack size.
- * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
- * which this function copies over. It points to previous bpf_verifier_state
- * which is never reallocated
+ * Note there is a non-zero parent pointer inside each reg of bpf_verifier_state
+ * which this function copies over. It points to corresponding reg in previous
+ * bpf_verifier_state which is never reallocated
  */
 static int realloc_func_state(struct bpf_func_state *state, int size,
 			      bool copy_old)
@@ -466,7 +466,6 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 		dst_state->frame[i] = NULL;
 	}
 	dst_state->curframe = src->curframe;
-	dst_state->parent = src->parent;
 	for (i = 0; i <= src->curframe; i++) {
 		dst = dst_state->frame[i];
 		if (!dst) {
@@ -732,6 +731,7 @@ static void init_reg_state(struct bpf_verifier_env *env,
 	for (i = 0; i < MAX_BPF_REG; i++) {
 		mark_reg_not_init(env, regs, i);
 		regs[i].live = REG_LIVE_NONE;
+		regs[i].parent = NULL;
 	}
 
 	/* frame pointer */
@@ -876,74 +876,21 @@ next:
 	return 0;
 }
 
-static
-struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
-				       const struct bpf_verifier_state *state,
-				       struct bpf_verifier_state *parent,
-				       u32 regno)
-{
-	struct bpf_verifier_state *tmp = NULL;
-
-	/* 'parent' could be a state of caller and
-	 * 'state' could be a state of callee. In such case
-	 * parent->curframe < state->curframe
-	 * and it's ok for r1 - r5 registers
-	 *
-	 * 'parent' could be a callee's state after it bpf_exit-ed.
-	 * In such case parent->curframe > state->curframe
-	 * and it's ok for r0 only
-	 */
-	if (parent->curframe == state->curframe ||
-	    (parent->curframe < state->curframe &&
-	     regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
-	    (parent->curframe > state->curframe &&
-	       regno == BPF_REG_0))
-		return parent;
-
-	if (parent->curframe > state->curframe &&
-	    regno >= BPF_REG_6) {
-		/* for callee saved regs we have to skip the whole chain
-		 * of states that belong to callee and mark as LIVE_READ
-		 * the registers before the call
-		 */
-		tmp = parent;
-		while (tmp && tmp->curframe != state->curframe) {
-			tmp = tmp->parent;
-		}
-		if (!tmp)
-			goto bug;
-		parent = tmp;
-	} else {
-		goto bug;
-	}
-	return parent;
-bug:
-	verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
-	verbose(env, "regno %d parent frame %d current frame %d\n",
-		regno, parent->curframe, state->curframe);
-	return NULL;
-}
-
+/* Parentage chain of this register (or stack slot) should take care of all
+ * issues like callee-saved registers, stack slot allocation time, etc.
+ */
 static int mark_reg_read(struct bpf_verifier_env *env,
-			 const struct bpf_verifier_state *state,
-			 struct bpf_verifier_state *parent,
-			 u32 regno)
+			 const struct bpf_reg_state *state,
+			 struct bpf_reg_state *parent)
 {
 	bool writes = parent == state->parent; /* Observe write marks */
 
-	if (regno == BPF_REG_FP)
-		/* We don't need to worry about FP liveness because it's read-only */
-		return 0;
-
 	while (parent) {
 		/* if read wasn't screened by an earlier write ... */
-		if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
+		if (writes && state->live & REG_LIVE_WRITTEN)
 			break;
-		parent = skip_callee(env, state, parent, regno);
-		if (!parent)
-			return -EFAULT;
 		/* ... then we depend on parent's value */
-		parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
+		parent->live |= REG_LIVE_READ;
 		state = parent;
 		parent = state->parent;
 		writes = true;
@@ -969,7 +916,10 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 			verbose(env, "R%d !read_ok\n", regno);
 			return -EACCES;
 		}
-		return mark_reg_read(env, vstate, vstate->parent, regno);
+		/* We don't need to worry about FP liveness because it's read-only */
+		if (regno != BPF_REG_FP)
+			return mark_reg_read(env, &regs[regno],
+					     regs[regno].parent);
 	} else {
 		/* check whether register used as dest operand can be written to */
 		if (regno == BPF_REG_FP) {
@@ -1080,8 +1030,8 @@ static int check_stack_write(struct bpf_verifier_env *env,
 	} else {
 		u8 type = STACK_MISC;
 
-		/* regular write of data into stack */
-		state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
+		/* regular write of data into stack destroys any spilled ptr */
+		state->stack[spi].spilled_ptr.type = NOT_INIT;
 
 		/* only mark the slot as written if all 8 bytes were written
 		 * otherwise read propagation may incorrectly stop too soon
@@ -1106,61 +1056,6 @@ static int check_stack_write(struct bpf_verifier_env *env,
 	return 0;
 }
 
-/* registers of every function are unique and mark_reg_read() propagates
- * the liveness in the following cases:
- * - from callee into caller for R1 - R5 that were used as arguments
- * - from caller into callee for R0 that used as result of the call
- * - from caller to the same caller skipping states of the callee for R6 - R9,
- *   since R6 - R9 are callee saved by implicit function prologue and
- *   caller's R6 != callee's R6, so when we propagate liveness up to
- *   parent states we need to skip callee states for R6 - R9.
- *
- * stack slot marking is different, since stacks of caller and callee are
- * accessible in both (since caller can pass a pointer to caller's stack to
- * callee which can pass it to another function), hence mark_stack_slot_read()
- * has to propagate the stack liveness to all parent states at given frame number.
- * Consider code:
- * f1() {
- *   ptr = fp - 8;
- *   *ptr = ctx;
- *   call f2 {
- *      .. = *ptr;
- *   }
- *   .. = *ptr;
- * }
- * First *ptr is reading from f1's stack and mark_stack_slot_read() has
- * to mark liveness at the f1's frame and not f2's frame.
- * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
- * to propagate liveness to f2 states at f1's frame level and further into
- * f1 states at f1's frame level until write into that stack slot
- */
-static void mark_stack_slot_read(struct bpf_verifier_env *env,
-				 const struct bpf_verifier_state *state,
-				 struct bpf_verifier_state *parent,
-				 int slot, int frameno)
-{
-	bool writes = parent == state->parent; /* Observe write marks */
-
-	while (parent) {
-		if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
-			/* since LIVE_WRITTEN mark is only done for full 8-byte
-			 * write the read marks are conservative and parent
-			 * state may not even have the stack allocated. In such case
-			 * end the propagation, since the loop reached beginning
-			 * of the function
-			 */
-			break;
-		/* if read wasn't screened by an earlier write ... */
-		if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
-			break;
-		/* ... then we depend on parent's value */
-		parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
-		state = parent;
-		parent = state->parent;
-		writes = true;
-	}
-}
-
 static int check_stack_read(struct bpf_verifier_env *env,
 			    struct bpf_func_state *reg_state /* func where register points to */,
 			    int off, int size, int value_regno)
@@ -1198,8 +1093,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
 			 */
 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
 		}
-		mark_stack_slot_read(env, vstate, vstate->parent, spi,
-				     reg_state->frameno);
+		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
+			      reg_state->stack[spi].spilled_ptr.parent);
 		return 0;
 	} else {
 		int zeros = 0;
@@ -1215,8 +1110,8 @@ static int check_stack_read(struct bpf_verifier_env *env,
 				off, i, size);
 			return -EACCES;
 		}
-		mark_stack_slot_read(env, vstate, vstate->parent, spi,
-				     reg_state->frameno);
+		mark_reg_read(env, &reg_state->stack[spi].spilled_ptr,
+			      reg_state->stack[spi].spilled_ptr.parent);
 		if (value_regno >= 0) {
 			if (zeros == size) {
 				/* any size read into register is zero extended,
@@ -1908,8 +1803,8 @@ mark:
 		/* reading any byte out of 8-byte 'spill_slot' will cause
 		 * the whole slot to be marked as 'read'
 		 */
-		mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
-				     spi, state->frameno);
+		mark_reg_read(env, &state->stack[spi].spilled_ptr,
+			      state->stack[spi].spilled_ptr.parent);
 	}
 	return update_stack_depth(env, state, off);
 }
@@ -2366,11 +2261,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			state->curframe + 1 /* frameno within this callchain */,
 			subprog /* subprog number within this prog */);
 
-	/* copy r1 - r5 args that callee can access */
+	/* copy r1 - r5 args that callee can access.  The copy includes parent
+	 * pointers, which connects us up to the liveness chain
+	 */
 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
 		callee->regs[i] = caller->regs[i];
 
-	/* after the call regsiters r0 - r5 were scratched */
+	/* after the call registers r0 - r5 were scratched */
 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 		mark_reg_not_init(env, caller->regs, caller_saved[i]);
 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
@@ -4370,7 +4267,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 		/* explored state didn't use this */
 		return true;
 
-	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
+	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
 
 	if (rold->type == PTR_TO_STACK)
 		/* two stack pointers are equal only if they're pointing to
@@ -4603,7 +4500,7 @@ static bool states_equal(struct bpf_verifier_env *env,
  * equivalent state (jump target or such) we didn't arrive by the straight-line
  * code, so read marks in the state must propagate to the parent regardless
  * of the state's write marks. That's what 'parent == state->parent' comparison
- * in mark_reg_read() and mark_stack_slot_read() is for.
+ * in mark_reg_read() is for.
  */
 static int propagate_liveness(struct bpf_verifier_env *env,
 			      const struct bpf_verifier_state *vstate,
@@ -4624,7 +4521,8 @@ static int propagate_liveness(struct bpf_verifier_env *env,
 		if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
 			continue;
 		if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
-			err = mark_reg_read(env, vstate, vparent, i);
+			err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i],
+					    &vparent->frame[vstate->curframe]->regs[i]);
 			if (err)
 				return err;
 		}
@@ -4639,7 +4537,8 @@ static int propagate_liveness(struct bpf_verifier_env *env,
 			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
 				continue;
 			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
-				mark_stack_slot_read(env, vstate, vparent, i, frame);
+				mark_reg_read(env, &state->stack[i].spilled_ptr,
+					      &parent->stack[i].spilled_ptr);
 		}
 	}
 	return err;
@@ -4649,7 +4548,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 {
 	struct bpf_verifier_state_list *new_sl;
 	struct bpf_verifier_state_list *sl;
-	struct bpf_verifier_state *cur = env->cur_state;
+	struct bpf_verifier_state *cur = env->cur_state, *new;
 	int i, j, err;
 
 	sl = env->explored_states[insn_idx];
@@ -4691,16 +4590,18 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 		return -ENOMEM;
 
 	/* add new state to the head of linked list */
-	err = copy_verifier_state(&new_sl->state, cur);
+	new = &new_sl->state;
+	err = copy_verifier_state(new, cur);
 	if (err) {
-		free_verifier_state(&new_sl->state, false);
+		free_verifier_state(new, false);
 		kfree(new_sl);
 		return err;
 	}
 	new_sl->next = env->explored_states[insn_idx];
 	env->explored_states[insn_idx] = new_sl;
 	/* connect new state to parentage chain */
-	cur->parent = &new_sl->state;
+	for (i = 0; i < BPF_REG_FP; i++)
+		cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i];
 	/* clear write marks in current state: the writes we did are not writes
 	 * our child did, so they don't screen off its reads from us.
 	 * (There are no read marks in current state, because reads always mark
@@ -4713,9 +4614,13 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	/* all stack frames are accessible from callee, clear them all */
 	for (j = 0; j <= cur->curframe; j++) {
 		struct bpf_func_state *frame = cur->frame[j];
+		struct bpf_func_state *newframe = new->frame[j];
 
-		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
+		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
+			frame->stack[i].spilled_ptr.parent =
+						&newframe->stack[i].spilled_ptr;
+		}
 	}
 	return 0;
 }
@@ -4734,7 +4639,6 @@ static int do_check(struct bpf_verifier_env *env)
 	if (!state)
 		return -ENOMEM;
 	state->curframe = 0;
-	state->parent = NULL;
 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
 	if (!state->frame[0]) {
 		kfree(state);
-- 
cgit v1.2.3


From 9b3004953503462a4fab31b85e44ae446d48f0bd Mon Sep 17 00:00:00 2001
From: Michal Kubecek <mkubecek@suse.cz>
Date: Tue, 28 Aug 2018 19:56:58 +0200
Subject: ethtool: drop get_settings and set_settings callbacks

Since [gs]et_settings ethtool_ops callbacks have been deprecated in
February 2016, all in tree NIC drivers have been converted to provide
[gs]et_link_ksettings() and out of tree drivers have had enough time to do
the same.

Drop get_settings() and set_settings() and implement both ETHTOOL_[GS]SET
and ETHTOOL_[GS]LINKSETTINGS only using [gs]et_link_ksettings().

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net |   4 +-
 include/linux/ethtool.h                   |  33 ++-----
 include/uapi/linux/ethtool.h              |  15 +--
 net/core/ethtool.c                        | 158 +++++++-----------------------
 4 files changed, 50 insertions(+), 160 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 2f1788111cd9..e2e0fe553ad8 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -117,7 +117,7 @@ Description:
 		full: full duplex
 
 		Note: This attribute is only valid for interfaces that implement
-		the ethtool get_settings method (mostly Ethernet).
+		the ethtool get_link_ksettings method (mostly Ethernet).
 
 What:		/sys/class/net/<iface>/flags
 Date:		April 2005
@@ -224,7 +224,7 @@ Description:
 		an integer representing the link speed in Mbits/sec.
 
 		Note: this attribute is only valid for interfaces that implement
-		the ethtool get_settings method (mostly Ethernet ).
+		the ethtool get_link_ksettings method (mostly Ethernet).
 
 What:		/sys/class/net/<iface>/tx_queue_len
 Date:		April 2005
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f8a2245b70ac..afd9596ce636 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -183,14 +183,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
 
 /**
  * struct ethtool_ops - optional netdev operations
- * @get_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings
- *	API. Get various device settings including Ethernet link
- *	settings. The @cmd parameter is expected to have been cleared
- *	before get_settings is called. Returns a negative error code
- *	or zero.
- * @set_settings: DEPRECATED, use %get_link_ksettings/%set_link_ksettings
- *	API. Set various device settings including Ethernet link
- *	settings.  Returns a negative error code or zero.
  * @get_drvinfo: Report driver/device information.  Should only set the
  *	@driver, @version, @fw_version and @bus_info fields.  If not
  *	implemented, the @driver and @bus_info fields will be filled in
@@ -297,19 +289,16 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
  *	a TX queue has this number, return -EINVAL. If only a RX queue or a TX
  *	queue has this number, ignore the inapplicable fields.
  *	Returns a negative error code or zero.
- * @get_link_ksettings: When defined, takes precedence over the
- *	%get_settings method. Get various device settings
- *	including Ethernet link settings. The %cmd and
- *	%link_mode_masks_nwords fields should be ignored (use
- *	%__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter), any
- *	change to them will be overwritten by kernel. Returns a
- *	negative error code or zero.
- * @set_link_ksettings: When defined, takes precedence over the
- *	%set_settings method. Set various device settings including
- *	Ethernet link settings. The %cmd and %link_mode_masks_nwords
- *	fields should be ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS
- *	instead of the latter), any change to them will be overwritten
- *	by kernel. Returns a negative error code or zero.
+ * @get_link_ksettings: Get various device settings including Ethernet link
+ *	settings. The %cmd and %link_mode_masks_nwords fields should be
+ *	ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter),
+ *	any change to them will be overwritten by kernel. Returns a negative
+ *	error code or zero.
+ * @set_link_ksettings: Set various device settings including Ethernet link
+ *	settings. The %cmd and %link_mode_masks_nwords fields should be
+ *	ignored (use %__ETHTOOL_LINK_MODE_MASK_NBITS instead of the latter),
+ *	any change to them will be overwritten by kernel. Returns a negative
+ *	error code or zero.
  * @get_fecparam: Get the network device Forward Error Correction parameters.
  * @set_fecparam: Set the network device Forward Error Correction parameters.
  * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
@@ -329,8 +318,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
  * of the generic netdev features interface.
  */
 struct ethtool_ops {
-	int	(*get_settings)(struct net_device *, struct ethtool_cmd *);
-	int	(*set_settings)(struct net_device *, struct ethtool_cmd *);
 	void	(*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *);
 	int	(*get_regs_len)(struct net_device *);
 	void	(*get_regs)(struct net_device *, struct ethtool_regs *, void *);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index dc69391d2bba..c8f8e2455bf3 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -91,10 +91,6 @@
  * %ETHTOOL_GSET to get the current values before making specific
  * changes and then applying them with %ETHTOOL_SSET.
  *
- * Drivers that implement set_settings() should validate all fields
- * other than @cmd that are not described as read-only or deprecated,
- * and must ignore all fields described as read-only.
- *
  * Deprecated fields should be ignored by both users and drivers.
  */
 struct ethtool_cmd {
@@ -1800,14 +1796,9 @@ enum ethtool_reset_flags {
  * rejected.
  *
  * Deprecated %ethtool_cmd fields transceiver, maxtxpkt and maxrxpkt
- * are not available in %ethtool_link_settings. Until all drivers are
- * converted to ignore them or to the new %ethtool_link_settings API,
- * for both queries and changes, users should always try
- * %ETHTOOL_GLINKSETTINGS first, and if it fails with -ENOTSUPP stick
- * only to %ETHTOOL_GSET and %ETHTOOL_SSET consistently. If it
- * succeeds, then users should stick to %ETHTOOL_GLINKSETTINGS and
- * %ETHTOOL_SLINKSETTINGS (which would support drivers implementing
- * either %ethtool_cmd or %ethtool_link_settings).
+ * are not available in %ethtool_link_settings. These fields will be
+ * always set to zero in %ETHTOOL_GSET reply and %ETHTOOL_SSET will
+ * fail if any of them is set to non-zero value.
  *
  * Users should assume that all fields not marked read-only are
  * writable and subject to validation by the driver.  They should use
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index c9993c6c2fd4..9d4e56d97080 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -539,47 +539,17 @@ struct ethtool_link_usettings {
 	} link_modes;
 };
 
-/* Internal kernel helper to query a device ethtool_link_settings.
- *
- * Backward compatibility note: for compatibility with legacy drivers
- * that implement only the ethtool_cmd API, this has to work with both
- * drivers implementing get_link_ksettings API and drivers
- * implementing get_settings API. When drivers implement get_settings
- * and report ethtool_cmd deprecated fields
- * (transceiver/maxrxpkt/maxtxpkt), these fields are silently ignored
- * because the resulting struct ethtool_link_settings does not report them.
- */
+/* Internal kernel helper to query a device ethtool_link_settings. */
 int __ethtool_get_link_ksettings(struct net_device *dev,
 				 struct ethtool_link_ksettings *link_ksettings)
 {
-	int err;
-	struct ethtool_cmd cmd;
-
 	ASSERT_RTNL();
 
-	if (dev->ethtool_ops->get_link_ksettings) {
-		memset(link_ksettings, 0, sizeof(*link_ksettings));
-		return dev->ethtool_ops->get_link_ksettings(dev,
-							    link_ksettings);
-	}
-
-	/* driver doesn't support %ethtool_link_ksettings API. revert to
-	 * legacy %ethtool_cmd API, unless it's not supported either.
-	 * TODO: remove when ethtool_ops::get_settings disappears internally
-	 */
-	if (!dev->ethtool_ops->get_settings)
+	if (!dev->ethtool_ops->get_link_ksettings)
 		return -EOPNOTSUPP;
 
-	memset(&cmd, 0, sizeof(cmd));
-	cmd.cmd = ETHTOOL_GSET;
-	err = dev->ethtool_ops->get_settings(dev, &cmd);
-	if (err < 0)
-		return err;
-
-	/* we ignore deprecated fields transceiver/maxrxpkt/maxtxpkt
-	 */
-	convert_legacy_settings_to_link_ksettings(link_ksettings, &cmd);
-	return err;
+	memset(link_ksettings, 0, sizeof(*link_ksettings));
+	return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
 }
 EXPORT_SYMBOL(__ethtool_get_link_ksettings);
 
@@ -635,16 +605,7 @@ store_link_ksettings_for_user(void __user *to,
 	return 0;
 }
 
-/* Query device for its ethtool_link_settings.
- *
- * Backward compatibility note: this function must fail when driver
- * does not implement ethtool::get_link_ksettings, even if legacy
- * ethtool_ops::get_settings is implemented. This tells new versions
- * of ethtool that they should use the legacy API %ETHTOOL_GSET for
- * this driver, so that they can correctly access the ethtool_cmd
- * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
- * implements ethtool_ops::get_settings anymore.
- */
+/* Query device for its ethtool_link_settings. */
 static int ethtool_get_link_ksettings(struct net_device *dev,
 				      void __user *useraddr)
 {
@@ -652,7 +613,6 @@ static int ethtool_get_link_ksettings(struct net_device *dev,
 	struct ethtool_link_ksettings link_ksettings;
 
 	ASSERT_RTNL();
-
 	if (!dev->ethtool_ops->get_link_ksettings)
 		return -EOPNOTSUPP;
 
@@ -699,16 +659,7 @@ static int ethtool_get_link_ksettings(struct net_device *dev,
 	return store_link_ksettings_for_user(useraddr, &link_ksettings);
 }
 
-/* Update device ethtool_link_settings.
- *
- * Backward compatibility note: this function must fail when driver
- * does not implement ethtool::set_link_ksettings, even if legacy
- * ethtool_ops::set_settings is implemented. This tells new versions
- * of ethtool that they should use the legacy API %ETHTOOL_SSET for
- * this driver, so that they can correctly update the ethtool_cmd
- * deprecated fields (transceiver/maxrxpkt/maxtxpkt), until no driver
- * implements ethtool_ops::get_settings anymore.
- */
+/* Update device ethtool_link_settings. */
 static int ethtool_set_link_ksettings(struct net_device *dev,
 				      void __user *useraddr)
 {
@@ -746,51 +697,31 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
 
 /* Query device for its ethtool_cmd settings.
  *
- * Backward compatibility note: for compatibility with legacy ethtool,
- * this has to work with both drivers implementing get_link_ksettings
- * API and drivers implementing get_settings API. When drivers
- * implement get_link_ksettings and report higher link mode bits, a
- * kernel warning is logged once (with name of 1st driver/device) to
- * recommend user to upgrade ethtool, but the command is successful
- * (only the lower link mode bits reported back to user).
+ * Backward compatibility note: for compatibility with legacy ethtool, this is
+ * now implemented via get_link_ksettings. When driver reports higher link mode
+ * bits, a kernel warning is logged once (with name of 1st driver/device) to
+ * recommend user to upgrade ethtool, but the command is successful (only the
+ * lower link mode bits reported back to user). Deprecated fields from
+ * ethtool_cmd (transceiver/maxrxpkt/maxtxpkt) are always set to zero.
  */
 static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 {
+	struct ethtool_link_ksettings link_ksettings;
 	struct ethtool_cmd cmd;
+	int err;
 
 	ASSERT_RTNL();
+	if (!dev->ethtool_ops->get_link_ksettings)
+		return -EOPNOTSUPP;
 
-	if (dev->ethtool_ops->get_link_ksettings) {
-		/* First, use link_ksettings API if it is supported */
-		int err;
-		struct ethtool_link_ksettings link_ksettings;
-
-		memset(&link_ksettings, 0, sizeof(link_ksettings));
-		err = dev->ethtool_ops->get_link_ksettings(dev,
-							   &link_ksettings);
-		if (err < 0)
-			return err;
-		convert_link_ksettings_to_legacy_settings(&cmd,
-							  &link_ksettings);
-
-		/* send a sensible cmd tag back to user */
-		cmd.cmd = ETHTOOL_GSET;
-	} else {
-		/* driver doesn't support %ethtool_link_ksettings
-		 * API. revert to legacy %ethtool_cmd API, unless it's
-		 * not supported either.
-		 */
-		int err;
-
-		if (!dev->ethtool_ops->get_settings)
-			return -EOPNOTSUPP;
+	memset(&link_ksettings, 0, sizeof(link_ksettings));
+	err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings);
+	if (err < 0)
+		return err;
+	convert_link_ksettings_to_legacy_settings(&cmd, &link_ksettings);
 
-		memset(&cmd, 0, sizeof(cmd));
-		cmd.cmd = ETHTOOL_GSET;
-		err = dev->ethtool_ops->get_settings(dev, &cmd);
-		if (err < 0)
-			return err;
-	}
+	/* send a sensible cmd tag back to user */
+	cmd.cmd = ETHTOOL_GSET;
 
 	if (copy_to_user(useraddr, &cmd, sizeof(cmd)))
 		return -EFAULT;
@@ -800,48 +731,29 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr)
 
 /* Update device link settings with given ethtool_cmd.
  *
- * Backward compatibility note: for compatibility with legacy ethtool,
- * this has to work with both drivers implementing set_link_ksettings
- * API and drivers implementing set_settings API. When drivers
- * implement set_link_ksettings and user's request updates deprecated
- * ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
- * warning is logged once (with name of 1st driver/device) to
- * recommend user to upgrade ethtool, and the request is rejected.
+ * Backward compatibility note: for compatibility with legacy ethtool, this is
+ * now always implemented via set_link_settings. When user's request updates
+ * deprecated ethtool_cmd fields (transceiver/maxrxpkt/maxtxpkt), a kernel
+ * warning is logged once (with name of 1st driver/device) to recommend user to
+ * upgrade ethtool, and the request is rejected.
  */
 static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 {
+	struct ethtool_link_ksettings link_ksettings;
 	struct ethtool_cmd cmd;
 
 	ASSERT_RTNL();
 
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
-
-	/* first, try new %ethtool_link_ksettings API. */
-	if (dev->ethtool_ops->set_link_ksettings) {
-		struct ethtool_link_ksettings link_ksettings;
-
-		if (!convert_legacy_settings_to_link_ksettings(&link_ksettings,
-							       &cmd))
-			return -EINVAL;
-
-		link_ksettings.base.cmd = ETHTOOL_SLINKSETTINGS;
-		link_ksettings.base.link_mode_masks_nwords
-			= __ETHTOOL_LINK_MODE_MASK_NU32;
-		return dev->ethtool_ops->set_link_ksettings(dev,
-							    &link_ksettings);
-	}
-
-	/* legacy %ethtool_cmd API */
-
-	/* TODO: return -EOPNOTSUPP when ethtool_ops::get_settings
-	 * disappears internally
-	 */
-
-	if (!dev->ethtool_ops->set_settings)
+	if (!dev->ethtool_ops->set_link_ksettings)
 		return -EOPNOTSUPP;
 
-	return dev->ethtool_ops->set_settings(dev, &cmd);
+	if (!convert_legacy_settings_to_link_ksettings(&link_ksettings, &cmd))
+		return -EINVAL;
+	link_ksettings.base.link_mode_masks_nwords =
+		__ETHTOOL_LINK_MODE_MASK_NU32;
+	return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
 }
 
 static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
-- 
cgit v1.2.3


From 45cd0faae3715e305bc46e23b34c5ed4d185ceb8 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Mon, 27 Aug 2018 15:56:02 +0300
Subject: vfs: add the fadvise() file operation

This is going to be used by overlayfs and possibly useful
for other filesystems.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 Documentation/filesystems/vfs.txt |  3 ++
 include/linux/fs.h                |  5 +++
 mm/fadvise.c                      | 78 ++++++++++++++++++++++-----------------
 3 files changed, 53 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index ec2142c8dbd3..a6c6a8af48a2 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -885,6 +885,7 @@ struct file_operations {
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
 	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
 	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
+	int (*fadvise)(struct file *, loff_t, loff_t, int);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -965,6 +966,8 @@ otherwise noted.
   dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
 	command.
 
+  fadvise: possibly called by the fadvise64() system call.
+
 Note that the file operations are implemented by the specific
 filesystem in which the inode resides. When opening a device node
 (character or block special) most filesystems will call special
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 33322702c910..6c0b4a1c22ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1763,6 +1763,7 @@ struct file_operations {
 			u64);
 	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
 			u64);
+	int (*fadvise)(struct file *, loff_t, loff_t, int);
 } __randomize_layout;
 
 struct inode_operations {
@@ -3459,4 +3460,8 @@ static inline bool dir_relax_shared(struct inode *inode)
 extern bool path_noexec(const struct path *path);
 extern void inode_nohighmem(struct inode *inode);
 
+/* mm/fadvise.c */
+extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len,
+		       int advice);
+
 #endif /* _LINUX_FS_H */
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 2d8376e3c640..2f59bac1cb77 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -27,9 +27,9 @@
  * deactivate the pages and clear PG_Referenced.
  */
 
-int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
+static int generic_fadvise(struct file *file, loff_t offset, loff_t len,
+			   int advice)
 {
-	struct fd f = fdget(fd);
 	struct inode *inode;
 	struct address_space *mapping;
 	struct backing_dev_info *bdi;
@@ -37,22 +37,14 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 	pgoff_t start_index;
 	pgoff_t end_index;
 	unsigned long nrpages;
-	int ret = 0;
 
-	if (!f.file)
-		return -EBADF;
+	inode = file_inode(file);
+	if (S_ISFIFO(inode->i_mode))
+		return -ESPIPE;
 
-	inode = file_inode(f.file);
-	if (S_ISFIFO(inode->i_mode)) {
-		ret = -ESPIPE;
-		goto out;
-	}
-
-	mapping = f.file->f_mapping;
-	if (!mapping || len < 0) {
-		ret = -EINVAL;
-		goto out;
-	}
+	mapping = file->f_mapping;
+	if (!mapping || len < 0)
+		return -EINVAL;
 
 	bdi = inode_to_bdi(mapping->host);
 
@@ -67,9 +59,9 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 			/* no bad return value, but ignore advice */
 			break;
 		default:
-			ret = -EINVAL;
+			return -EINVAL;
 		}
-		goto out;
+		return 0;
 	}
 
 	/*
@@ -85,21 +77,21 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 
 	switch (advice) {
 	case POSIX_FADV_NORMAL:
-		f.file->f_ra.ra_pages = bdi->ra_pages;
-		spin_lock(&f.file->f_lock);
-		f.file->f_mode &= ~FMODE_RANDOM;
-		spin_unlock(&f.file->f_lock);
+		file->f_ra.ra_pages = bdi->ra_pages;
+		spin_lock(&file->f_lock);
+		file->f_mode &= ~FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_RANDOM:
-		spin_lock(&f.file->f_lock);
-		f.file->f_mode |= FMODE_RANDOM;
-		spin_unlock(&f.file->f_lock);
+		spin_lock(&file->f_lock);
+		file->f_mode |= FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_SEQUENTIAL:
-		f.file->f_ra.ra_pages = bdi->ra_pages * 2;
-		spin_lock(&f.file->f_lock);
-		f.file->f_mode &= ~FMODE_RANDOM;
-		spin_unlock(&f.file->f_lock);
+		file->f_ra.ra_pages = bdi->ra_pages * 2;
+		spin_lock(&file->f_lock);
+		file->f_mode &= ~FMODE_RANDOM;
+		spin_unlock(&file->f_lock);
 		break;
 	case POSIX_FADV_WILLNEED:
 		/* First and last PARTIAL page! */
@@ -115,8 +107,7 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 		 * Ignore return value because fadvise() shall return
 		 * success even if filesystem can't retrieve a hint,
 		 */
-		force_page_cache_readahead(mapping, f.file, start_index,
-					   nrpages);
+		force_page_cache_readahead(mapping, file, start_index, nrpages);
 		break;
 	case POSIX_FADV_NOREUSE:
 		break;
@@ -183,9 +174,30 @@ int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
 		}
 		break;
 	default:
-		ret = -EINVAL;
+		return -EINVAL;
 	}
-out:
+	return 0;
+}
+
+int vfs_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
+{
+	if (file->f_op->fadvise)
+		return file->f_op->fadvise(file, offset, len, advice);
+
+	return generic_fadvise(file, offset, len, advice);
+}
+EXPORT_SYMBOL(vfs_fadvise);
+
+int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
+{
+	struct fd f = fdget(fd);
+	int ret;
+
+	if (!f.file)
+		return -EBADF;
+
+	ret = vfs_fadvise(f.file, offset, len, advice);
+
 	fdput(f);
 	return ret;
 }
-- 
cgit v1.2.3


From a4e0109a19c554e44c832958e426303781e1ad30 Mon Sep 17 00:00:00 2001
From: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Date: Mon, 20 Aug 2018 08:12:32 -0700
Subject: virtchnl: use u8 type for a field in the virtchnl_filter struct

The virtchnl_filter struct has a field called field_flags. A previous
commit mistakenly had the type to be a __u8. What we want is for the
field to be an unsigned 8 bit value, so let's just use the existing
kernel type u8 for that.

Signed-off-by: Harshitha Ramamurthy <harshitha.ramamurthy@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 212b3822d180..b41f7bc958ef 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -573,7 +573,7 @@ struct virtchnl_filter {
 	enum	virtchnl_flow_type flow_type;
 	enum	virtchnl_action action;
 	u32	action_meta;
-	__u8	field_flags;
+	u8	field_flags;
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
-- 
cgit v1.2.3


From 3e31009898699dfca823893054748d85048dc7b3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 21 Jun 2018 12:50:01 -0700
Subject: rcu: Defer reporting RCU-preempt quiescent states when disabled

This commit defers reporting of RCU-preempt quiescent states at
rcu_read_unlock_special() time when any of interrupts, softirq, or
preemption are disabled.  These deferred quiescent states are reported
at a later RCU_SOFTIRQ, context switch, idle entry, or CPU-hotplug
offline operation.  Of course, if another RCU read-side critical
section has started in the meantime, the reporting of the quiescent
state will be further deferred.

This also means that disabling preemption, interrupts, and/or
softirqs will act as an RCU-preempt read-side critical section.
This is enforced by checking preempt_count() as needed.

Some special cases must be handled on an ad-hoc basis, for example,
context switch is a quiescent state even though both the scheduler and
do_exit() disable preemption.  In these cases, additional calls to
rcu_preempt_deferred_qs() override the preemption disabling.  Similar
logic overrides disabled interrupts in rcu_preempt_check_callbacks()
because in this case the quiescent state happened just before the
corresponding scheduling-clock interrupt.

In theory, this change lifts a long-standing restriction that required
that if interrupts were disabled across a call to rcu_read_unlock()
that the matching rcu_read_lock() also be contained within that
interrupts-disabled region of code.  Because the reporting of the
corresponding RCU-preempt quiescent state is now deferred until
after interrupts have been enabled, it is no longer possible for this
situation to result in deadlocks involving the scheduler's runqueue and
priority-inheritance locks.  This may allow some code simplification that
might reduce interrupt latency a bit.  Unfortunately, in practice this
would also defer deboosting a low-priority task that had been subjected
to RCU priority boosting, so real-time-response considerations might
well force this restriction to remain in place.

Because RCU-preempt grace periods are now blocked not only by RCU
read-side critical sections, but also by disabling of interrupts,
preemption, and softirqs, it will be possible to eliminate RCU-bh and
RCU-sched in favor of RCU-preempt in CONFIG_PREEMPT=y kernels.  This may
require some additional plumbing to provide the network denial-of-service
guarantees that have been traditionally provided by RCU-bh.  Once these
are in place, CONFIG_PREEMPT=n kernels will be able to fold RCU-bh
into RCU-sched.  This would mean that all kernels would have but
one flavor of RCU, which would open the door to significant code
cleanup.

Moving to a single flavor of RCU would also have the beneficial effect
of reducing the NOCB kthreads by at least a factor of two.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Apply rcu_read_unlock_special() preempt_count() feedback
  from Joel Fernandes. ]
[ paulmck: Adjust rcu_eqs_enter() call to rcu_preempt_deferred_qs() in
  response to bug reports from kbuild test robot. ]
[ paulmck: Fix bug located by kbuild test robot involving recursion
  via rcu_preempt_deferred_qs(). ]
---
 .../RCU/Design/Requirements/Requirements.html      |  50 ++++---
 include/linux/rcutiny.h                            |   5 +
 kernel/rcu/tree.c                                  |   9 ++
 kernel/rcu/tree.h                                  |   3 +
 kernel/rcu/tree_exp.h                              |  71 +++++++---
 kernel/rcu/tree_plugin.h                           | 144 ++++++++++++++++-----
 6 files changed, 205 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index 49690228b1c6..038714475edb 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -2394,30 +2394,9 @@ when invoked from a CPU-hotplug notifier.
 <p>
 RCU depends on the scheduler, and the scheduler uses RCU to
 protect some of its data structures.
-This means the scheduler is forbidden from acquiring
-the runqueue locks and the priority-inheritance locks
-in the middle of an outermost RCU read-side critical section unless either
-(1)&nbsp;it releases them before exiting that same
-RCU read-side critical section, or
-(2)&nbsp;interrupts are disabled across
-that entire RCU read-side critical section.
-This same prohibition also applies (recursively!) to any lock that is acquired
-while holding any lock to which this prohibition applies.
-Adhering to this rule prevents preemptible RCU from invoking
-<tt>rcu_read_unlock_special()</tt> while either runqueue or
-priority-inheritance locks are held, thus avoiding deadlock.
-
-<p>
-Prior to v4.4, it was only necessary to disable preemption across
-RCU read-side critical sections that acquired scheduler locks.
-In v4.4, expedited grace periods started using IPIs, and these
-IPIs could force a <tt>rcu_read_unlock()</tt> to take the slowpath.
-Therefore, this expedited-grace-period change required disabling of
-interrupts, not just preemption.
-
-<p>
-For RCU's part, the preemptible-RCU <tt>rcu_read_unlock()</tt>
-implementation must be written carefully to avoid similar deadlocks.
+The preemptible-RCU <tt>rcu_read_unlock()</tt>
+implementation must therefore be written carefully to avoid deadlocks
+involving the scheduler's runqueue and priority-inheritance locks.
 In particular, <tt>rcu_read_unlock()</tt> must tolerate an
 interrupt where the interrupt handler invokes both
 <tt>rcu_read_lock()</tt> and <tt>rcu_read_unlock()</tt>.
@@ -2426,7 +2405,7 @@ negative nesting levels to avoid destructive recursion via
 interrupt handler's use of RCU.
 
 <p>
-This pair of mutual scheduler-RCU requirements came as a
+This scheduler-RCU requirement came as a
 <a href="https://lwn.net/Articles/453002/">complete surprise</a>.
 
 <p>
@@ -2437,9 +2416,28 @@ when running context-switch-heavy workloads when built with
 <tt>CONFIG_NO_HZ_FULL=y</tt>
 <a href="http://www.rdrop.com/users/paulmck/scalability/paper/BareMetal.2015.01.15b.pdf">did come as a surprise [PDF]</a>.
 RCU has made good progress towards meeting this requirement, even
-for context-switch-have <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
+for context-switch-heavy <tt>CONFIG_NO_HZ_FULL=y</tt> workloads,
 but there is room for further improvement.
 
+<p>
+In the past, it was forbidden to disable interrupts across an
+<tt>rcu_read_unlock()</tt> unless that interrupt-disabled region
+of code also included the matching <tt>rcu_read_lock()</tt>.
+Violating this restriction could result in deadlocks involving the
+scheduler's runqueue and priority-inheritance spinlocks.
+This restriction was lifted when interrupt-disabled calls to
+<tt>rcu_read_unlock()</tt> started deferring the reporting of
+the resulting RCU-preempt quiescent state until the end of that
+interrupts-disabled region.
+This deferred reporting means that the scheduler's runqueue and
+priority-inheritance locks cannot be held while reporting an RCU-preempt
+quiescent state, which lifts the earlier restriction, at least from
+a deadlock perspective.
+Unfortunately, real-time systems using RCU priority boosting may
+need this restriction to remain in effect because deferred
+quiescent-state reporting also defers deboosting, which in turn
+degrades real-time latencies.
+
 <h3><a name="Tracing and RCU">Tracing and RCU</a></h3>
 
 <p>
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 8d9a0ea8f0b5..f617ab19bb51 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -115,6 +115,11 @@ static inline void rcu_irq_exit_irqson(void) { }
 static inline void rcu_irq_enter_irqson(void) { }
 static inline void rcu_irq_exit(void) { }
 static inline void exit_rcu(void) { }
+static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+{
+	return false;
+}
+static inline void rcu_preempt_deferred_qs(struct task_struct *t) { }
 #ifdef CONFIG_SRCU
 void rcu_scheduler_starting(void);
 #else /* #ifndef CONFIG_SRCU */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 36786789b625..346624716d6e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -422,6 +422,7 @@ static void rcu_momentary_dyntick_idle(void)
 	special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
 	/* It is illegal to call this from idle state. */
 	WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
+	rcu_preempt_deferred_qs(current);
 }
 
 /*
@@ -729,6 +730,7 @@ static void rcu_eqs_enter(bool user)
 		do_nocb_deferred_wakeup(rdp);
 	}
 	rcu_prepare_for_idle();
+	rcu_preempt_deferred_qs(current);
 	WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
 	rcu_dynticks_eqs_enter();
 	rcu_dynticks_task_enter();
@@ -2850,6 +2852,12 @@ __rcu_process_callbacks(struct rcu_state *rsp)
 
 	WARN_ON_ONCE(!rdp->beenonline);
 
+	/* Report any deferred quiescent states if preemption enabled. */
+	if (!(preempt_count() & PREEMPT_MASK))
+		rcu_preempt_deferred_qs(current);
+	else if (rcu_preempt_need_deferred_qs(current))
+		resched_cpu(rdp->cpu); /* Provoke future context switch. */
+
 	/* Update RCU state based on any recent quiescent states. */
 	rcu_check_quiescent_state(rsp, rdp);
 
@@ -3823,6 +3831,7 @@ void rcu_report_dead(unsigned int cpu)
 	rcu_report_exp_rdp(&rcu_sched_state,
 			   this_cpu_ptr(rcu_sched_state.rda), true);
 	preempt_enable();
+	rcu_preempt_deferred_qs(current);
 	for_each_rcu_flavor(rsp)
 		rcu_cleanup_dying_idle_cpu(cpu, rsp);
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4e74df768c57..025bd2e5592b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -195,6 +195,7 @@ struct rcu_data {
 	bool		core_needs_qs;	/* Core waits for quiesc state. */
 	bool		beenonline;	/* CPU online at least once. */
 	bool		gpwrap;		/* Possible ->gp_seq wrap. */
+	bool		deferred_qs;	/* This CPU awaiting a deferred QS? */
 	struct rcu_node *mynode;	/* This CPU's leaf of hierarchy */
 	unsigned long grpmask;		/* Mask to apply to leaf qsmask. */
 	unsigned long	ticks_this_gp;	/* The number of scheduling-clock */
@@ -461,6 +462,8 @@ static void rcu_cleanup_after_idle(void);
 static void rcu_prepare_for_idle(void);
 static void rcu_idle_count_callbacks_posted(void);
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
+static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
+static void rcu_preempt_deferred_qs(struct task_struct *t);
 static void print_cpu_stall_info_begin(void);
 static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
 static void print_cpu_stall_info_end(void);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 0b2c2ad69629..f9d5bbd8adce 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -262,6 +262,7 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
 static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
 			       bool wake)
 {
+	WRITE_ONCE(rdp->deferred_qs, false);
 	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
 }
 
@@ -735,32 +736,70 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
  */
 static void sync_rcu_exp_handler(void *info)
 {
-	struct rcu_data *rdp;
+	unsigned long flags;
 	struct rcu_state *rsp = info;
+	struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
+	struct rcu_node *rnp = rdp->mynode;
 	struct task_struct *t = current;
 
 	/*
-	 * Within an RCU read-side critical section, request that the next
-	 * rcu_read_unlock() report.  Unless this RCU read-side critical
-	 * section has already blocked, in which case it is already set
-	 * up for the expedited grace period to wait on it.
+	 * First, the common case of not being in an RCU read-side
+	 * critical section.  If also enabled or idle, immediately
+	 * report the quiescent state, otherwise defer.
 	 */
-	if (t->rcu_read_lock_nesting > 0 &&
-	    !t->rcu_read_unlock_special.b.blocked) {
-		t->rcu_read_unlock_special.b.exp_need_qs = true;
+	if (!t->rcu_read_lock_nesting) {
+		if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
+		    rcu_dynticks_curr_cpu_in_eqs()) {
+			rcu_report_exp_rdp(rsp, rdp, true);
+		} else {
+			rdp->deferred_qs = true;
+			resched_cpu(rdp->cpu);
+		}
 		return;
 	}
 
 	/*
-	 * We are either exiting an RCU read-side critical section (negative
-	 * values of t->rcu_read_lock_nesting) or are not in one at all
-	 * (zero value of t->rcu_read_lock_nesting).  Or we are in an RCU
-	 * read-side critical section that blocked before this expedited
-	 * grace period started.  Either way, we can immediately report
-	 * the quiescent state.
+	 * Second, the less-common case of being in an RCU read-side
+	 * critical section.  In this case we can count on a future
+	 * rcu_read_unlock().  However, this rcu_read_unlock() might
+	 * execute on some other CPU, but in that case there will be
+	 * a future context switch.  Either way, if the expedited
+	 * grace period is still waiting on this CPU, set ->deferred_qs
+	 * so that the eventual quiescent state will be reported.
+	 * Note that there is a large group of race conditions that
+	 * can have caused this quiescent state to already have been
+	 * reported, so we really do need to check ->expmask.
 	 */
-	rdp = this_cpu_ptr(rsp->rda);
-	rcu_report_exp_rdp(rsp, rdp, true);
+	if (t->rcu_read_lock_nesting > 0) {
+		raw_spin_lock_irqsave_rcu_node(rnp, flags);
+		if (rnp->expmask & rdp->grpmask)
+			rdp->deferred_qs = true;
+		raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+	}
+
+	/*
+	 * The final and least likely case is where the interrupted
+	 * code was just about to or just finished exiting the RCU-preempt
+	 * read-side critical section, and no, we can't tell which.
+	 * So either way, set ->deferred_qs to flag later code that
+	 * a quiescent state is required.
+	 *
+	 * If the CPU is fully enabled (or if some buggy RCU-preempt
+	 * read-side critical section is being used from idle), just
+	 * invoke rcu_preempt_defer_qs() to immediately report the
+	 * quiescent state.  We cannot use rcu_read_unlock_special()
+	 * because we are in an interrupt handler, which will cause that
+	 * function to take an early exit without doing anything.
+	 *
+	 * Otherwise, use resched_cpu() to force a context switch after
+	 * the CPU enables everything.
+	 */
+	rdp->deferred_qs = true;
+	if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
+	    WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()))
+		rcu_preempt_deferred_qs(t);
+	else
+		resched_cpu(rdp->cpu);
 }
 
 /**
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a97c20ea9bce..542791361908 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -371,6 +371,9 @@ static void rcu_preempt_note_context_switch(bool preempt)
 		 * behalf of preempted instance of __rcu_read_unlock().
 		 */
 		rcu_read_unlock_special(t);
+		rcu_preempt_deferred_qs(t);
+	} else {
+		rcu_preempt_deferred_qs(t);
 	}
 
 	/*
@@ -464,54 +467,51 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
 }
 
 /*
- * Handle special cases during rcu_read_unlock(), such as needing to
- * notify RCU core processing or task having blocked during the RCU
- * read-side critical section.
+ * Report deferred quiescent states.  The deferral time can
+ * be quite short, for example, in the case of the call from
+ * rcu_read_unlock_special().
  */
-static void rcu_read_unlock_special(struct task_struct *t)
+static void
+rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 {
 	bool empty_exp;
 	bool empty_norm;
 	bool empty_exp_now;
-	unsigned long flags;
 	struct list_head *np;
 	bool drop_boost_mutex = false;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;
 	union rcu_special special;
 
-	/* NMI handlers cannot block and cannot safely manipulate state. */
-	if (in_nmi())
-		return;
-
-	local_irq_save(flags);
-
 	/*
 	 * If RCU core is waiting for this CPU to exit its critical section,
 	 * report the fact that it has exited.  Because irqs are disabled,
 	 * t->rcu_read_unlock_special cannot change.
 	 */
 	special = t->rcu_read_unlock_special;
+	rdp = this_cpu_ptr(rcu_state_p->rda);
+	if (!special.s && !rdp->deferred_qs) {
+		local_irq_restore(flags);
+		return;
+	}
 	if (special.b.need_qs) {
 		rcu_preempt_qs();
 		t->rcu_read_unlock_special.b.need_qs = false;
-		if (!t->rcu_read_unlock_special.s) {
+		if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
 			local_irq_restore(flags);
 			return;
 		}
 	}
 
 	/*
-	 * Respond to a request for an expedited grace period, but only if
-	 * we were not preempted, meaning that we were running on the same
-	 * CPU throughout.  If we were preempted, the exp_need_qs flag
-	 * would have been cleared at the time of the first preemption,
-	 * and the quiescent state would be reported when we were dequeued.
+	 * Respond to a request by an expedited grace period for a
+	 * quiescent state from this CPU.  Note that requests from
+	 * tasks are handled when removing the task from the
+	 * blocked-tasks list below.
 	 */
-	if (special.b.exp_need_qs) {
-		WARN_ON_ONCE(special.b.blocked);
+	if (special.b.exp_need_qs || rdp->deferred_qs) {
 		t->rcu_read_unlock_special.b.exp_need_qs = false;
-		rdp = this_cpu_ptr(rcu_state_p->rda);
+		rdp->deferred_qs = false;
 		rcu_report_exp_rdp(rcu_state_p, rdp, true);
 		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
@@ -519,19 +519,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
 		}
 	}
 
-	/* Hardware IRQ handlers cannot block, complain if they get here. */
-	if (in_irq() || in_serving_softirq()) {
-		lockdep_rcu_suspicious(__FILE__, __LINE__,
-				       "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
-		pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
-			 t->rcu_read_unlock_special.s,
-			 t->rcu_read_unlock_special.b.blocked,
-			 t->rcu_read_unlock_special.b.exp_need_qs,
-			 t->rcu_read_unlock_special.b.need_qs);
-		local_irq_restore(flags);
-		return;
-	}
-
 	/* Clean up if blocked during RCU read-side critical section. */
 	if (special.b.blocked) {
 		t->rcu_read_unlock_special.b.blocked = false;
@@ -602,6 +589,72 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	}
 }
 
+/*
+ * Is a deferred quiescent-state pending, and are we also not in
+ * an RCU read-side critical section?  It is the caller's responsibility
+ * to ensure it is otherwise safe to report any deferred quiescent
+ * states.  The reason for this is that it is safe to report a
+ * quiescent state during context switch even though preemption
+ * is disabled.  This function cannot be expected to understand these
+ * nuances, so the caller must handle them.
+ */
+static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+{
+	return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs ||
+		READ_ONCE(t->rcu_read_unlock_special.s)) &&
+	       !t->rcu_read_lock_nesting;
+}
+
+/*
+ * Report a deferred quiescent state if needed and safe to do so.
+ * As with rcu_preempt_need_deferred_qs(), "safe" involves only
+ * not being in an RCU read-side critical section.  The caller must
+ * evaluate safety in terms of interrupt, softirq, and preemption
+ * disabling.
+ */
+static void rcu_preempt_deferred_qs(struct task_struct *t)
+{
+	unsigned long flags;
+	bool couldrecurse = t->rcu_read_lock_nesting >= 0;
+
+	if (!rcu_preempt_need_deferred_qs(t))
+		return;
+	if (couldrecurse)
+		t->rcu_read_lock_nesting -= INT_MIN;
+	local_irq_save(flags);
+	rcu_preempt_deferred_qs_irqrestore(t, flags);
+	if (couldrecurse)
+		t->rcu_read_lock_nesting += INT_MIN;
+}
+
+/*
+ * Handle special cases during rcu_read_unlock(), such as needing to
+ * notify RCU core processing or task having blocked during the RCU
+ * read-side critical section.
+ */
+static void rcu_read_unlock_special(struct task_struct *t)
+{
+	unsigned long flags;
+	bool preempt_bh_were_disabled =
+			!!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
+	bool irqs_were_disabled;
+
+	/* NMI handlers cannot block and cannot safely manipulate state. */
+	if (in_nmi())
+		return;
+
+	local_irq_save(flags);
+	irqs_were_disabled = irqs_disabled_flags(flags);
+	if ((preempt_bh_were_disabled || irqs_were_disabled) &&
+	    t->rcu_read_unlock_special.b.blocked) {
+		/* Need to defer quiescent state until everything is enabled. */
+		raise_softirq_irqoff(RCU_SOFTIRQ);
+		local_irq_restore(flags);
+		return;
+	}
+	rcu_preempt_deferred_qs_irqrestore(t, flags);
+}
+
 /*
  * Dump detailed information for all tasks blocking the current RCU
  * grace period on the specified rcu_node structure.
@@ -737,10 +790,20 @@ static void rcu_preempt_check_callbacks(void)
 	struct rcu_state *rsp = &rcu_preempt_state;
 	struct task_struct *t = current;
 
-	if (t->rcu_read_lock_nesting == 0) {
-		rcu_preempt_qs();
+	if (t->rcu_read_lock_nesting > 0 ||
+	    (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
+		/* No QS, force context switch if deferred. */
+		if (rcu_preempt_need_deferred_qs(t))
+			resched_cpu(smp_processor_id());
+	} else if (rcu_preempt_need_deferred_qs(t)) {
+		rcu_preempt_deferred_qs(t); /* Report deferred QS. */
+		return;
+	} else if (!t->rcu_read_lock_nesting) {
+		rcu_preempt_qs(); /* Report immediate QS. */
 		return;
 	}
+
+	/* If GP is oldish, ask for help from rcu_read_unlock_special(). */
 	if (t->rcu_read_lock_nesting > 0 &&
 	    __this_cpu_read(rcu_data_p->core_needs_qs) &&
 	    __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) &&
@@ -859,6 +922,7 @@ void exit_rcu(void)
 	barrier();
 	t->rcu_read_unlock_special.b.blocked = true;
 	__rcu_read_unlock();
+	rcu_preempt_deferred_qs(current);
 }
 
 /*
@@ -940,6 +1004,16 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
 	return false;
 }
 
+/*
+ * Because there is no preemptible RCU, there can be no deferred quiescent
+ * states.
+ */
+static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
+{
+	return false;
+}
+static void rcu_preempt_deferred_qs(struct task_struct *t) { }
+
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * tasks blocked within RCU read-side critical sections.
-- 
cgit v1.2.3


From fcc878e4dfb70128a73857c609d70570629b0d9e Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2018 07:39:59 -0700
Subject: rcu: Remove now-unused ->b.exp_need_qs field from the rcu_special
 union

The ->b.exp_need_qs field is now set only to false, so this commit
removes it.  The job this field used to do is now done by the rcu_data
structure's ->deferred_qs field, which is a consequence of a better
split between task-based (the rcu_node structure's ->exp_tasks field) and
CPU-based (the aforementioned rcu_data structure's ->deferred_qs field)
tracking of quiescent states for RCU-preempt expedited grace periods.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/sched.h    |  6 +-----
 kernel/rcu/tree_plugin.h | 13 ++++---------
 2 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9..004ca21f7e80 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -571,12 +571,8 @@ union rcu_special {
 	struct {
 		u8			blocked;
 		u8			need_qs;
-		u8			exp_need_qs;
-
-		/* Otherwise the compiler can store garbage here: */
-		u8			pad;
 	} b; /* Bits. */
-	u32 s; /* Set of bits. */
+	u16 s; /* Set of bits. */
 };
 
 enum perf_event_task_context {
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 24c209676d20..527a52792dce 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -284,13 +284,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 	 * no need to check for a subsequent expedited GP.  (Though we are
 	 * still in a quiescent state in any case.)
 	 */
-	if (blkd_state & RCU_EXP_BLKD &&
-	    t->rcu_read_unlock_special.b.exp_need_qs) {
-		t->rcu_read_unlock_special.b.exp_need_qs = false;
+	if (blkd_state & RCU_EXP_BLKD && rdp->deferred_qs)
 		rcu_report_exp_rdp(rdp->rsp, rdp, true);
-	} else {
-		WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
-	}
+	else
+		WARN_ON_ONCE(rdp->deferred_qs);
 }
 
 /*
@@ -509,9 +506,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 	 * tasks are handled when removing the task from the
 	 * blocked-tasks list below.
 	 */
-	if (special.b.exp_need_qs || rdp->deferred_qs) {
-		t->rcu_read_unlock_special.b.exp_need_qs = false;
-		rdp->deferred_qs = false;
+	if (rdp->deferred_qs) {
 		rcu_report_exp_rdp(rcu_state_p, rdp, true);
 		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
-- 
cgit v1.2.3


From d28139c4e96713d52a300fb9036c5be2f45e0741 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2018 14:45:25 -0700
Subject: rcu: Apply RCU-bh QSes to RCU-sched and RCU-preempt when safe

One necessary step towards consolidating the three flavors of RCU is to
make sure that the resulting consolidated "one flavor to rule them all"
correctly handles networking denial-of-service attacks.  One thing that
allows RCU-bh to do so is that __do_softirq() invokes rcu_bh_qs() every
so often, and so something similar has to happen for consolidated RCU.

This must be done carefully.  For example, if a preemption-disabled
region of code takes an interrupt which does softirq processing before
returning, consolidated RCU must ignore the resulting rcu_bh_qs()
invocations -- preemption is still disabled, and that means an RCU
reader for the consolidated flavor.

This commit therefore creates a new rcu_softirq_qs() that is called only
from the ksoftirqd task, thus avoiding the interrupted-a-preempted-region
problem.  This new rcu_softirq_qs() function invokes rcu_sched_qs(),
rcu_preempt_qs(), and rcu_preempt_deferred_qs().  The latter call handles
any deferred quiescent states.

Note that __do_softirq() still invokes rcu_bh_qs().  It will continue to
do so until a later stage of cleanup when the RCU-bh flavor is removed.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Fix !SMP issue located by kbuild test robot. ]
---
 include/linux/rcutiny.h  | 5 +++++
 include/linux/rcutree.h  | 1 +
 kernel/rcu/tree.c        | 7 +++++++
 kernel/rcu/tree.h        | 1 +
 kernel/rcu/tree_plugin.h | 5 +++++
 kernel/softirq.c         | 2 ++
 6 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index f617ab19bb51..bcfbc40a7239 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -90,6 +90,11 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 	call_rcu(head, func);
 }
 
+static inline void rcu_softirq_qs(void)
+{
+	rcu_sched_qs();
+}
+
 #define rcu_note_context_switch(preempt) \
 	do { \
 		rcu_sched_qs(); \
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 914655848ef6..664b580695d6 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,6 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
+void rcu_softirq_qs(void);
 void rcu_note_context_switch(bool preempt);
 int rcu_needs_cpu(u64 basem, u64 *nextevt);
 void rcu_cpu_stall_reset(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0b42249e2e40..cb35a417d947 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -255,6 +255,13 @@ void rcu_bh_qs(void)
 	}
 }
 
+void rcu_softirq_qs(void)
+{
+	rcu_sched_qs();
+	rcu_preempt_qs();
+	rcu_preempt_deferred_qs(current);
+}
+
 /*
  * Steal a bit from the bottom of ->dynticks for idle entry/exit
  * control.  Initially this is for TLB flushing.
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 025bd2e5592b..e02c882861eb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -433,6 +433,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
 
 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
+static void rcu_preempt_qs(void);
 static void rcu_preempt_note_context_switch(bool preempt);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 527a52792dce..c686bf63bba5 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -974,6 +974,11 @@ static void __init rcu_bootup_announce(void)
 	rcu_bootup_announce_oddness();
 }
 
+/* Because preemptible RCU does not exist, we can ignore its QSes. */
+static void rcu_preempt_qs(void)
+{
+}
+
 /*
  * Because preemptible RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 6f584861d329..ebd69694144a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -302,6 +302,8 @@ restart:
 	}
 
 	rcu_bh_qs();
+	if (__this_cpu_read(ksoftirqd) == current)
+		rcu_softirq_qs();
 	local_irq_disable();
 
 	pending = local_softirq_pending();
-- 
cgit v1.2.3


From 65cfe3583b612a22e12fba9a7bbd2d37ca5ad941 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 1 Jul 2018 07:40:52 -0700
Subject: rcu: Define RCU-bh update API in terms of RCU

Now that the main RCU API knows about softirq disabling and softirq's
quiescent states, the RCU-bh update code can be dispensed with.
This commit therefore removes the RCU-bh update-side implementation and
defines RCU-bh's update-side API in terms of that of either RCU-preempt or
RCU-sched, depending on the setting of the CONFIG_PREEMPT Kconfig option.

In kernels built with CONFIG_RCU_NOCB_CPU=y this has the knock-on effect
of reducing by one the number of rcuo kthreads per CPU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  10 +++--
 include/linux/rcutiny.h  |  10 +++--
 include/linux/rcutree.h  |   8 +++-
 kernel/rcu/tiny.c        | 115 +++++++++--------------------------------------
 kernel/rcu/tree.c        |  97 ++++-----------------------------------
 kernel/rcu/tree_plugin.h |   1 -
 kernel/softirq.c         |   1 -
 7 files changed, 48 insertions(+), 194 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 75e5b393cf44..9ebfd436cec7 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -55,11 +55,15 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 #define	call_rcu	call_rcu_sched
 #endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
-void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 void synchronize_sched(void);
 void rcu_barrier_tasks(void);
 
+static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+
 #ifdef CONFIG_PREEMPT_RCU
 
 void __rcu_read_lock(void);
@@ -104,7 +108,6 @@ static inline int rcu_preempt_depth(void)
 void rcu_init(void);
 extern int rcu_scheduler_active __read_mostly;
 void rcu_sched_qs(void);
-void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
@@ -326,8 +329,7 @@ static inline void rcu_preempt_sleep_check(void) { }
  * and rcu_assign_pointer().  Some of these could be folded into their
  * callers, but they are left separate in order to ease introduction of
  * multiple flavors of pointers to match the multiple flavors of RCU
- * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
- * the future.
+ * (e.g., __rcu_sched, and __srcu), should this make sense in the future.
  */
 
 #ifdef __CHECKER__
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index bcfbc40a7239..ac26c27ccde8 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -56,19 +56,23 @@ static inline void cond_synchronize_sched(unsigned long oldstate)
 	might_sleep();
 }
 
-extern void rcu_barrier_bh(void);
-extern void rcu_barrier_sched(void);
-
 static inline void synchronize_rcu_expedited(void)
 {
 	synchronize_sched();	/* Only one CPU, so pretty fast anyway!!! */
 }
 
+extern void rcu_barrier_sched(void);
+
 static inline void rcu_barrier(void)
 {
 	rcu_barrier_sched();  /* Only one CPU, so only one list of callbacks! */
 }
 
+static inline void rcu_barrier_bh(void)
+{
+	rcu_barrier();
+}
+
 static inline void synchronize_rcu_bh(void)
 {
 	synchronize_sched();
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 664b580695d6..c789c302a2c9 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,7 +45,11 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }
 
-void synchronize_rcu_bh(void);
+static inline void synchronize_rcu_bh(void)
+{
+	synchronize_rcu();
+}
+
 void synchronize_sched_expedited(void);
 void synchronize_rcu_expedited(void);
 
@@ -69,7 +73,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
  */
 static inline void synchronize_rcu_bh_expedited(void)
 {
-	synchronize_sched_expedited();
+	synchronize_rcu_expedited();
 }
 
 void rcu_barrier(void);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index befc9321a89c..cadcf63c4889 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -51,64 +51,22 @@ static struct rcu_ctrlblk rcu_sched_ctrlblk = {
 	.curtail	= &rcu_sched_ctrlblk.rcucblist,
 };
 
-static struct rcu_ctrlblk rcu_bh_ctrlblk = {
-	.donetail	= &rcu_bh_ctrlblk.rcucblist,
-	.curtail	= &rcu_bh_ctrlblk.rcucblist,
-};
-
-void rcu_barrier_bh(void)
-{
-	wait_rcu_gp(call_rcu_bh);
-}
-EXPORT_SYMBOL(rcu_barrier_bh);
-
 void rcu_barrier_sched(void)
 {
 	wait_rcu_gp(call_rcu_sched);
 }
 EXPORT_SYMBOL(rcu_barrier_sched);
 
-/*
- * Helper function for rcu_sched_qs() and rcu_bh_qs().
- * Also irqs are disabled to avoid confusion due to interrupt handlers
- * invoking call_rcu().
- */
-static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
-{
-	if (rcp->donetail != rcp->curtail) {
-		rcp->donetail = rcp->curtail;
-		return 1;
-	}
-
-	return 0;
-}
-
-/*
- * Record an rcu quiescent state.  And an rcu_bh quiescent state while we
- * are at it, given that any rcu quiescent state is also an rcu_bh
- * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
- */
+/* Record an rcu quiescent state.  */
 void rcu_sched_qs(void)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
-	    rcu_qsctr_help(&rcu_bh_ctrlblk))
-		raise_softirq(RCU_SOFTIRQ);
-	local_irq_restore(flags);
-}
-
-/*
- * Record an rcu_bh quiescent state.
- */
-void rcu_bh_qs(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	if (rcu_qsctr_help(&rcu_bh_ctrlblk))
+	if (rcu_sched_ctrlblk.donetail != rcu_sched_ctrlblk.curtail) {
+		rcu_sched_ctrlblk.donetail = rcu_sched_ctrlblk.curtail;
 		raise_softirq(RCU_SOFTIRQ);
+	}
 	local_irq_restore(flags);
 }
 
@@ -122,32 +80,27 @@ void rcu_check_callbacks(int user)
 {
 	if (user)
 		rcu_sched_qs();
-	if (user || !in_softirq())
-		rcu_bh_qs();
 }
 
-/*
- * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
- * whose grace period has elapsed.
- */
-static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
+/* Invoke the RCU callbacks whose grace period has elapsed.  */
+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
 {
 	struct rcu_head *next, *list;
 	unsigned long flags;
 
 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
-	if (rcp->donetail == &rcp->rcucblist) {
+	if (rcu_sched_ctrlblk.donetail == &rcu_sched_ctrlblk.rcucblist) {
 		/* No callbacks ready, so just leave. */
 		local_irq_restore(flags);
 		return;
 	}
-	list = rcp->rcucblist;
-	rcp->rcucblist = *rcp->donetail;
-	*rcp->donetail = NULL;
-	if (rcp->curtail == rcp->donetail)
-		rcp->curtail = &rcp->rcucblist;
-	rcp->donetail = &rcp->rcucblist;
+	list = rcu_sched_ctrlblk.rcucblist;
+	rcu_sched_ctrlblk.rcucblist = *rcu_sched_ctrlblk.donetail;
+	*rcu_sched_ctrlblk.donetail = NULL;
+	if (rcu_sched_ctrlblk.curtail == rcu_sched_ctrlblk.donetail)
+		rcu_sched_ctrlblk.curtail = &rcu_sched_ctrlblk.rcucblist;
+	rcu_sched_ctrlblk.donetail = &rcu_sched_ctrlblk.rcucblist;
 	local_irq_restore(flags);
 
 	/* Invoke the callbacks on the local list. */
@@ -162,19 +115,13 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 	}
 }
 
-static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
-{
-	__rcu_process_callbacks(&rcu_sched_ctrlblk);
-	__rcu_process_callbacks(&rcu_bh_ctrlblk);
-}
-
 /*
  * Wait for a grace period to elapse.  But it is illegal to invoke
  * synchronize_sched() from within an RCU read-side critical section.
  * Therefore, any legal call to synchronize_sched() is a quiescent
  * state, and so on a UP system, synchronize_sched() need do nothing.
- * Ditto for synchronize_rcu_bh().  (But Lai Jiangshan points out the
- * benefits of doing might_sleep() to reduce latency.)
+ * (But Lai Jiangshan points out the benefits of doing might_sleep()
+ * to reduce latency.)
  *
  * Cool, huh?  (Due to Josh Triplett.)
  */
@@ -188,11 +135,11 @@ void synchronize_sched(void)
 EXPORT_SYMBOL_GPL(synchronize_sched);
 
 /*
- * Helper function for call_rcu() and call_rcu_bh().
+ * Post an RCU callback to be invoked after the end of an RCU-sched grace
+ * period.  But since we have but one CPU, that would be after any
+ * quiescent state.
  */
-static void __call_rcu(struct rcu_head *head,
-		       rcu_callback_t func,
-		       struct rcu_ctrlblk *rcp)
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
 	unsigned long flags;
 
@@ -201,8 +148,8 @@ static void __call_rcu(struct rcu_head *head,
 	head->next = NULL;
 
 	local_irq_save(flags);
-	*rcp->curtail = head;
-	rcp->curtail = &head->next;
+	*rcu_sched_ctrlblk.curtail = head;
+	rcu_sched_ctrlblk.curtail = &head->next;
 	local_irq_restore(flags);
 
 	if (unlikely(is_idle_task(current))) {
@@ -210,28 +157,8 @@ static void __call_rcu(struct rcu_head *head,
 		resched_cpu(0);
 	}
 }
-
-/*
- * Post an RCU callback to be invoked after the end of an RCU-sched grace
- * period.  But since we have but one CPU, that would be after any
- * quiescent state.
- */
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, &rcu_sched_ctrlblk);
-}
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
-/*
- * Post an RCU bottom-half callback to be invoked after any subsequent
- * quiescent state.
- */
-void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, &rcu_bh_ctrlblk);
-}
-EXPORT_SYMBOL_GPL(call_rcu_bh);
-
 void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index cb35a417d947..aedf81a0abd8 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -108,7 +108,6 @@ struct rcu_state sname##_state = { \
 }
 
 RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
-RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
 
 static struct rcu_state *const rcu_state_p;
 LIST_HEAD(rcu_struct_flavors);
@@ -244,17 +243,6 @@ void rcu_sched_qs(void)
 			   this_cpu_ptr(&rcu_sched_data), true);
 }
 
-void rcu_bh_qs(void)
-{
-	RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");
-	if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
-		trace_rcu_grace_period(TPS("rcu_bh"),
-				       __this_cpu_read(rcu_bh_data.gp_seq),
-				       TPS("cpuqs"));
-		__this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
-	}
-}
-
 void rcu_softirq_qs(void)
 {
 	rcu_sched_qs();
@@ -581,7 +569,7 @@ EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
  */
 unsigned long rcu_bh_get_gp_seq(void)
 {
-	return READ_ONCE(rcu_bh_state.gp_seq);
+	return READ_ONCE(rcu_state_p->gp_seq);
 }
 EXPORT_SYMBOL_GPL(rcu_bh_get_gp_seq);
 
@@ -621,7 +609,7 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  */
 void rcu_bh_force_quiescent_state(void)
 {
-	force_quiescent_state(&rcu_bh_state);
+	force_quiescent_state(rcu_state_p);
 }
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 
@@ -680,10 +668,8 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 
 	switch (test_type) {
 	case RCU_FLAVOR:
-		rsp = rcu_state_p;
-		break;
 	case RCU_BH_FLAVOR:
-		rsp = &rcu_bh_state;
+		rsp = rcu_state_p;
 		break;
 	case RCU_SCHED_FLAVOR:
 		rsp = &rcu_sched_state;
@@ -2673,26 +2659,15 @@ void rcu_check_callbacks(int user)
 		 * nested interrupt.  In this case, the CPU is in
 		 * a quiescent state, so note it.
 		 *
-		 * No memory barrier is required here because both
-		 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
-		 * variables that other CPUs neither access nor modify,
-		 * at least not while the corresponding CPU is online.
+		 * No memory barrier is required here because
+		 * rcu_sched_qs() references only CPU-local variables
+		 * that other CPUs neither access nor modify, at least
+		 * not while the corresponding CPU is online.
 		 */
 
 		rcu_sched_qs();
-		rcu_bh_qs();
 		rcu_note_voluntary_context_switch(current);
 
-	} else if (!in_softirq()) {
-
-		/*
-		 * Get here if this CPU did not take its interrupt from
-		 * softirq, in other words, if it is not interrupting
-		 * a rcu_bh read-side critical section.  This is an _bh
-		 * critical section, so note it.
-		 */
-
-		rcu_bh_qs();
 	}
 	rcu_preempt_check_callbacks();
 	if (rcu_pending())
@@ -3079,34 +3054,6 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
-/**
- * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_bh() assumes
- * that the read-side critical sections end on completion of a softirq
- * handler. This means that read-side critical sections in process
- * context must not be interrupted by softirqs. This interface is to be
- * used when most of the read-side critical sections are in softirq context.
- * RCU read-side critical sections are delimited by:
- *
- * - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context, OR
- * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
- *
- * These may be nested.
- *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
- */
-void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, &rcu_bh_state, -1, 0);
-}
-EXPORT_SYMBOL_GPL(call_rcu_bh);
-
 /*
  * Queue an RCU callback for lazy invocation after a grace period.
  * This will likely be later named something like "call_rcu_lazy()",
@@ -3191,33 +3138,6 @@ void synchronize_sched(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_sched);
 
-/**
- * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
- *
- * Control will return to the caller some time after a full rcu_bh grace
- * period has elapsed, in other words after all currently executing rcu_bh
- * read-side critical sections have completed.  RCU read-side critical
- * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
- * and may be nested.
- *
- * See the description of synchronize_sched() for more detailed information
- * on memory ordering guarantees.
- */
-void synchronize_rcu_bh(void)
-{
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
-	if (rcu_blocking_is_gp())
-		return;
-	if (rcu_gp_is_expedited())
-		synchronize_rcu_bh_expedited();
-	else
-		wait_rcu_gp(call_rcu_bh);
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-
 /**
  * get_state_synchronize_rcu - Snapshot current RCU state
  *
@@ -3529,7 +3449,7 @@ static void _rcu_barrier(struct rcu_state *rsp)
  */
 void rcu_barrier_bh(void)
 {
-	_rcu_barrier(&rcu_bh_state);
+	_rcu_barrier(rcu_state_p);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
 
@@ -4180,7 +4100,6 @@ void __init rcu_init(void)
 
 	rcu_bootup_announce();
 	rcu_init_geometry();
-	rcu_init_one(&rcu_bh_state);
 	rcu_init_one(&rcu_sched_state);
 	if (dump_tree)
 		rcu_dump_rcu_node_tree(&rcu_sched_state);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0d7107fb3dec..1ff742a3c8d1 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1320,7 +1320,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 static void rcu_kthread_do_work(void)
 {
 	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
-	rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
 	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ebd69694144a..7a0720a20003 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -301,7 +301,6 @@ restart:
 		pending >>= softirq_bit;
 	}
 
-	rcu_bh_qs();
 	if (__this_cpu_read(ksoftirqd) == current)
 		rcu_softirq_qs();
 	local_irq_disable();
-- 
cgit v1.2.3


From 82fcecfa81855924cc69f3078113cf63dd6c2964 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 2 Jul 2018 09:04:27 -0700
Subject: rcu: Update comments and help text for no more RCU-bh updaters

This commit updates comments and help text to account for the fact that
RCU-bh update-side functions are now simple wrappers for their RCU or
RCU-sched counterparts.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h      | 12 ++++--------
 include/linux/rcupdate_wait.h |  6 +++---
 include/linux/rcutree.h       | 14 ++------------
 kernel/rcu/Kconfig            | 10 +++++-----
 kernel/rcu/tree.c             | 17 +++++++++--------
 kernel/rcu/update.c           |  2 +-
 6 files changed, 24 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9ebfd436cec7..8d5740edd63c 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -688,14 +688,10 @@ static inline void rcu_read_unlock(void)
 /**
  * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
  *
- * This is equivalent of rcu_read_lock(), but to be used when updates
- * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
- * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
- * softirq handler to be a quiescent state, a process in RCU read-side
- * critical section must be protected by disabling softirqs. Read-side
- * critical sections in interrupt context can use just rcu_read_lock(),
- * though this should at least be commented to avoid confusing people
- * reading the code.
+ * This is equivalent of rcu_read_lock(), but also disables softirqs.
+ * Note that synchronize_rcu() and friends may be used for the update
+ * side, although synchronize_rcu_bh() is available as a wrapper in the
+ * short term.  Longer term, the _bh update-side API will be eliminated.
  *
  * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
  * must occur in the same context, for example, it is illegal to invoke
diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index 57f371344152..bc104699560e 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -36,13 +36,13 @@ do {									\
  * @...: List of call_rcu() functions for the flavors to wait on.
  *
  * This macro waits concurrently for multiple flavors of RCU grace periods.
- * For example, synchronize_rcu_mult(call_rcu, call_rcu_bh) would wait
- * on concurrent RCU and RCU-bh grace periods.  Waiting on a give SRCU
+ * For example, synchronize_rcu_mult(call_rcu, call_rcu_sched) would wait
+ * on concurrent RCU and RCU-sched grace periods.  Waiting on a give SRCU
  * domain requires you to write a wrapper function for that SRCU domain's
  * call_srcu() function, supplying the corresponding srcu_struct.
  *
  * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
- * or RCU-bh, given that anywhere synchronize_rcu_mult() can be called
+ * or RCU-sched, given that anywhere synchronize_rcu_mult() can be called
  * is automatically a grace period.
  */
 #define synchronize_rcu_mult(...) \
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c789c302a2c9..f7a41323aa54 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -58,18 +58,8 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 /**
  * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
  *
- * Wait for an RCU-bh grace period to elapse, but use a "big hammer"
- * approach to force the grace period to end quickly.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.  In fact,
- * if you are using synchronize_rcu_bh_expedited() in a loop, please
- * restructure your code to batch your updates, and then use a single
- * synchronize_rcu_bh() instead.
- *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
+ * This is a transitional API and will soon be removed, with all
+ * callers converted to synchronize_rcu_expedited().
  */
 static inline void synchronize_rcu_bh_expedited(void)
 {
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 9210379c0353..a0b7f0103ca9 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -229,11 +229,11 @@ config RCU_NOCB_CPU
 	  CPUs specified at boot time by the rcu_nocbs parameter.
 	  For each such CPU, a kthread ("rcuox/N") will be created to
 	  invoke callbacks, where the "N" is the CPU being offloaded,
-	  and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
-	  "s" for RCU-sched.  Nothing prevents this kthread from running
-	  on the specified CPUs, but (1) the kthreads may be preempted
-	  between each callback, and (2) affinity or cgroups can be used
-	  to force the kthreads to run on whatever set of CPUs is desired.
+	  and where the "p" for RCU-preempt and "s" for RCU-sched.
+	  Nothing prevents this kthread from running on the specified
+	  CPUs, but (1) the kthreads may be preempted between each
+	  callback, and (2) affinity or cgroups can be used to force
+	  the kthreads to run on whatever set of CPUs is desired.
 
 	  Say Y here if you want to help to debug reduced OS jitter.
 	  Say N here if you are unsure.
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index aedf81a0abd8..158c58d47b07 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -565,7 +565,8 @@ unsigned long rcu_sched_get_gp_seq(void)
 EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
 
 /*
- * Return the number of RCU-bh GPs completed thus far for debug & stats.
+ * Return the number of RCU GPs completed thus far for debug & stats.
+ * This is a transitional API and will soon be removed.
  */
 unsigned long rcu_bh_get_gp_seq(void)
 {
@@ -3069,13 +3070,13 @@ void kfree_call_rcu(struct rcu_head *head,
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
 /*
- * Because a context switch is a grace period for RCU-sched and RCU-bh,
- * any blocking grace-period wait automatically implies a grace period
- * if there is only one CPU online at any point time during execution
- * of either synchronize_sched() or synchronize_rcu_bh().  It is OK to
- * occasionally incorrectly indicate that there are multiple CPUs online
- * when there was in fact only one the whole time, as this just adds
- * some overhead: RCU still operates correctly.
+ * Because a context switch is a grace period for RCU-sched, any blocking
+ * grace-period wait automatically implies a grace period if there
+ * is only one CPU online at any point time during execution of either
+ * synchronize_sched() or synchronize_rcu_bh().  It is OK to occasionally
+ * incorrectly indicate that there are multiple CPUs online when there
+ * was in fact only one the whole time, as this just adds some overhead:
+ * RCU still operates correctly.
  */
 static int rcu_blocking_is_gp(void)
 {
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39cb23d22109..9ea87d0aa386 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -298,7 +298,7 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_held);
  *
  * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
  *
- * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * Note that rcu_read_lock_bh() is disallowed if the CPU is either idle or
  * offline from an RCU perspective, so check for those as well.
  */
 int rcu_read_lock_bh_held(void)
-- 
cgit v1.2.3


From 45975c7d21a1c0aba97e3d8007e2a7c123145748 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 2 Jul 2018 14:30:37 -0700
Subject: rcu: Define RCU-sched API in terms of RCU for Tree RCU PREEMPT builds

Now that RCU-preempt knows about preemption disabling, its implementation
of synchronize_rcu() works for synchronize_sched(), and likewise for the
other RCU-sched update-side API members.  This commit therefore confines
the RCU-sched update-side code to CONFIG_PREEMPT=n builds, and defines
RCU-sched's update-side API members in terms of those of RCU-preempt.

This means that any given build of the Linux kernel has only one
update-side flavor of RCU, namely RCU-preempt for CONFIG_PREEMPT=y builds
and RCU-sched for CONFIG_PREEMPT=n builds.  This in turn means that kernels
built with CONFIG_RCU_NOCB_CPU=y have only one rcuo kthread per CPU.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
---
 include/linux/rcupdate.h |  14 +--
 include/linux/rcutiny.h  |   7 ++
 include/linux/rcutree.h  |   7 +-
 kernel/rcu/tree.c        | 301 ++++++++++++++++-------------------------------
 kernel/rcu/tree.h        |   9 +-
 kernel/rcu/tree_exp.h    | 153 ++++++++++++------------
 kernel/rcu/tree_plugin.h | 297 +++++++++++++++++-----------------------------
 7 files changed, 308 insertions(+), 480 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8d5740edd63c..94474bb6b5c4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -49,11 +49,11 @@
 
 /* Exported common interfaces */
 
-#ifdef CONFIG_PREEMPT_RCU
-void call_rcu(struct rcu_head *head, rcu_callback_t func);
-#else /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TINY_RCU
 #define	call_rcu	call_rcu_sched
-#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+#else
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
+#endif
 
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 void synchronize_sched(void);
@@ -92,11 +92,6 @@ static inline void __rcu_read_unlock(void)
 		preempt_enable();
 }
 
-static inline void synchronize_rcu(void)
-{
-	synchronize_sched();
-}
-
 static inline int rcu_preempt_depth(void)
 {
 	return 0;
@@ -107,7 +102,6 @@ static inline int rcu_preempt_depth(void)
 /* Internal to kernel */
 void rcu_init(void);
 extern int rcu_scheduler_active __read_mostly;
-void rcu_sched_qs(void);
 void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ac26c27ccde8..df2c0895c5e7 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -36,6 +36,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
+static inline void synchronize_rcu(void)
+{
+	synchronize_sched();
+}
+
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
@@ -94,6 +99,8 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 	call_rcu(head, func);
 }
 
+void rcu_sched_qs(void);
+
 static inline void rcu_softirq_qs(void)
 {
 	rcu_sched_qs();
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index f7a41323aa54..0c44720f0e84 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,14 +45,19 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }
 
+void synchronize_rcu(void);
 static inline void synchronize_rcu_bh(void)
 {
 	synchronize_rcu();
 }
 
-void synchronize_sched_expedited(void);
 void synchronize_rcu_expedited(void);
 
+static inline void synchronize_sched_expedited(void)
+{
+	synchronize_rcu_expedited();
+}
+
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
 /**
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 61c15de884b0..5f79315f094e 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -92,24 +92,29 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var
 
 #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
 DEFINE_RCU_TPS(sname) \
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \
-struct rcu_state sname##_state = { \
-	.level = { &sname##_state.node[0] }, \
-	.rda = &sname##_data, \
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data); \
+struct rcu_state rcu_state = { \
+	.level = { &rcu_state.node[0] }, \
+	.rda = &rcu_data, \
 	.call = cr, \
 	.gp_state = RCU_GP_IDLE, \
 	.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \
-	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
+	.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex), \
 	.name = RCU_STATE_NAME(sname), \
 	.abbr = sabbr, \
-	.exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
-	.exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
-	.ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \
+	.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), \
+	.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), \
+	.ofl_lock = __SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), \
 }
 
-RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
+#ifdef CONFIG_PREEMPT_RCU
+RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
+#else
+RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu);
+#endif
 
-static struct rcu_state *const rcu_state_p;
+static struct rcu_state *const rcu_state_p = &rcu_state;
+static struct rcu_data __percpu *const rcu_data_p = &rcu_data;
 LIST_HEAD(rcu_struct_flavors);
 
 /* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -220,31 +225,9 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
 	return rcu_seq_state(rcu_seq_current(&rsp->gp_seq));
 }
 
-/*
- * Note a quiescent state.  Because we do not need to know
- * how many quiescent states passed, just if there was at least
- * one since the start of the grace period, this just sets a flag.
- * The caller must have disabled preemption.
- */
-void rcu_sched_qs(void)
-{
-	RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");
-	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
-		return;
-	trace_rcu_grace_period(TPS("rcu_sched"),
-			       __this_cpu_read(rcu_sched_data.gp_seq),
-			       TPS("cpuqs"));
-	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
-	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
-		return;
-	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
-	rcu_report_exp_rdp(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
-}
-
 void rcu_softirq_qs(void)
 {
-	rcu_sched_qs();
-	rcu_preempt_qs();
+	rcu_qs();
 	rcu_preempt_deferred_qs(current);
 }
 
@@ -418,31 +401,18 @@ static void rcu_momentary_dyntick_idle(void)
 	rcu_preempt_deferred_qs(current);
 }
 
-/*
- * Note a context switch.  This is a quiescent state for RCU-sched,
- * and requires special handling for preemptible RCU.
- * The caller must have disabled interrupts.
+/**
+ * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
+ *
+ * If the current CPU is idle or running at a first-level (not nested)
+ * interrupt from idle, return true.  The caller must have at least
+ * disabled preemption.
  */
-void rcu_note_context_switch(bool preempt)
+static int rcu_is_cpu_rrupt_from_idle(void)
 {
-	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	trace_rcu_utilization(TPS("Start context switch"));
-	rcu_sched_qs();
-	rcu_preempt_note_context_switch(preempt);
-	/* Load rcu_urgent_qs before other flags. */
-	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
-		goto out;
-	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
-		rcu_momentary_dyntick_idle();
-	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
-	if (!preempt)
-		rcu_tasks_qs(current);
-out:
-	trace_rcu_utilization(TPS("End context switch"));
-	barrier(); /* Avoid RCU read-side critical sections leaking up. */
+	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
+	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
 }
-EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 /*
  * Register a quiescent state for all RCU flavors.  If there is an
@@ -476,8 +446,8 @@ void rcu_all_qs(void)
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
-	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)))
-		rcu_sched_qs();
+	if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
+		rcu_qs();
 	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 	preempt_enable();
@@ -558,7 +528,7 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
  */
 unsigned long rcu_sched_get_gp_seq(void)
 {
-	return READ_ONCE(rcu_sched_state.gp_seq);
+	return rcu_get_gp_seq();
 }
 EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
 
@@ -590,7 +560,7 @@ EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
  */
 unsigned long rcu_exp_batches_completed_sched(void)
 {
-	return rcu_sched_state.expedited_sequence;
+	return rcu_state.expedited_sequence;
 }
 EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
 
@@ -617,7 +587,7 @@ EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
  */
 void rcu_sched_force_quiescent_state(void)
 {
-	force_quiescent_state(&rcu_sched_state);
+	rcu_force_quiescent_state();
 }
 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
 
@@ -668,10 +638,8 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 	switch (test_type) {
 	case RCU_FLAVOR:
 	case RCU_BH_FLAVOR:
-		rsp = rcu_state_p;
-		break;
 	case RCU_SCHED_FLAVOR:
-		rsp = &rcu_sched_state;
+		rsp = rcu_state_p;
 		break;
 	default:
 		break;
@@ -1107,19 +1075,6 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
 
 #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
 
-/**
- * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
- *
- * If the current CPU is idle or running at a first-level (not nested)
- * interrupt from idle, return true.  The caller must have at least
- * disabled preemption.
- */
-static int rcu_is_cpu_rrupt_from_idle(void)
-{
-	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
-	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
-}
-
 /*
  * We are reporting a quiescent state on behalf of some other CPU, so
  * it is our responsibility to check for and handle potential overflow
@@ -2364,7 +2319,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
 	struct rcu_node *rnp_p;
 
 	raw_lockdep_assert_held_rcu_node(rnp);
-	if (WARN_ON_ONCE(rcu_state_p == &rcu_sched_state) ||
+	if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)) ||
 	    WARN_ON_ONCE(rsp != rcu_state_p) ||
 	    WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
 	    rnp->qsmask != 0) {
@@ -2650,25 +2605,7 @@ void rcu_check_callbacks(int user)
 {
 	trace_rcu_utilization(TPS("Start scheduler-tick"));
 	increment_cpu_stall_ticks();
-	if (user || rcu_is_cpu_rrupt_from_idle()) {
-
-		/*
-		 * Get here if this CPU took its interrupt from user
-		 * mode or from the idle loop, and if this is not a
-		 * nested interrupt.  In this case, the CPU is in
-		 * a quiescent state, so note it.
-		 *
-		 * No memory barrier is required here because
-		 * rcu_sched_qs() references only CPU-local variables
-		 * that other CPUs neither access nor modify, at least
-		 * not while the corresponding CPU is online.
-		 */
-
-		rcu_sched_qs();
-		rcu_note_voluntary_context_switch(current);
-
-	}
-	rcu_preempt_check_callbacks();
+	rcu_flavor_check_callbacks(user);
 	if (rcu_pending())
 		invoke_rcu_core();
 
@@ -2694,7 +2631,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
 		mask = 0;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->qsmask == 0) {
-			if (rcu_state_p == &rcu_sched_state ||
+			if (!IS_ENABLED(CONFIG_PREEMPT) ||
 			    rsp != rcu_state_p ||
 			    rcu_preempt_blocked_readers_cgp(rnp)) {
 				/*
@@ -3028,28 +2965,56 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 }
 
 /**
- * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
  * @func: actual callback function to be invoked after the grace period
  *
  * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. call_rcu_sched() assumes
- * that the read-side critical sections end on enabling of preemption
- * or on voluntary preemption.
- * RCU read-side critical sections are delimited by:
- *
- * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
- * - anything that disables preemption.
- *
- *  These may be nested.
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(), and
+ * may be nested.  In addition, regions of code across which interrupts,
+ * preemption, or softirqs have been disabled also serve as RCU read-side
+ * critical sections.  This includes hardware interrupt handlers, softirq
+ * handlers, and NMI handlers.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
+ */
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
+{
+	__call_rcu(head, func, rcu_state_p, -1, 0);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+/**
+ * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
  *
- * See the description of call_rcu() for more detailed information on
- * memory ordering guarantees.
+ * This is transitional.
  */
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
-	__call_rcu(head, func, &rcu_sched_state, -1, 0);
+	call_rcu(head, func);
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);
 
@@ -3067,73 +3032,14 @@ void kfree_call_rcu(struct rcu_head *head,
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
-/*
- * Because a context switch is a grace period for RCU-sched, any blocking
- * grace-period wait automatically implies a grace period if there
- * is only one CPU online at any point time during execution of either
- * synchronize_sched() or synchronize_rcu_bh().  It is OK to occasionally
- * incorrectly indicate that there are multiple CPUs online when there
- * was in fact only one the whole time, as this just adds some overhead:
- * RCU still operates correctly.
- */
-static int rcu_blocking_is_gp(void)
-{
-	int ret;
-
-	might_sleep();  /* Check for RCU read-side critical section. */
-	preempt_disable();
-	ret = num_online_cpus() <= 1;
-	preempt_enable();
-	return ret;
-}
-
 /**
  * synchronize_sched - wait until an rcu-sched grace period has elapsed.
  *
- * Control will return to the caller some time after a full rcu-sched
- * grace period has elapsed, in other words after all currently executing
- * rcu-sched read-side critical sections have completed.   These read-side
- * critical sections are delimited by rcu_read_lock_sched() and
- * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
- * local_irq_disable(), and so on may be used in place of
- * rcu_read_lock_sched().
- *
- * This means that all preempt_disable code sequences, including NMI and
- * non-threaded hardware-interrupt handlers, in progress on entry will
- * have completed before this primitive returns.  However, this does not
- * guarantee that softirq handlers will have completed, since in some
- * kernels, these handlers can run in process context, and can block.
- *
- * Note that this guarantee implies further memory-ordering guarantees.
- * On systems with more than one CPU, when synchronize_sched() returns,
- * each CPU is guaranteed to have executed a full memory barrier since the
- * end of its last RCU-sched read-side critical section whose beginning
- * preceded the call to synchronize_sched().  In addition, each CPU having
- * an RCU read-side critical section that extends beyond the return from
- * synchronize_sched() is guaranteed to have executed a full memory barrier
- * after the beginning of synchronize_sched() and before the beginning of
- * that RCU read-side critical section.  Note that these guarantees include
- * CPUs that are offline, idle, or executing in user mode, as well as CPUs
- * that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked synchronize_sched(), which returned
- * to its caller on CPU B, then both CPU A and CPU B are guaranteed
- * to have executed a full memory barrier during the execution of
- * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
- * again only if the system has more than one CPU).
+ * This is transitional.
  */
 void synchronize_sched(void)
 {
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_sched() in RCU-sched read-side critical section");
-	if (rcu_blocking_is_gp())
-		return;
-	if (rcu_gp_is_expedited())
-		synchronize_sched_expedited();
-	else
-		wait_rcu_gp(call_rcu_sched);
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(synchronize_sched);
 
@@ -3181,41 +3087,23 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
 /**
  * get_state_synchronize_sched - Snapshot current RCU-sched state
  *
- * Returns a cookie that is used by a later call to cond_synchronize_sched()
- * to determine whether or not a full grace period has elapsed in the
- * meantime.
+ * This is transitional, and only used by rcutorture.
  */
 unsigned long get_state_synchronize_sched(void)
 {
-	/*
-	 * Any prior manipulation of RCU-protected data must happen
-	 * before the load from ->gp_seq.
-	 */
-	smp_mb();  /* ^^^ */
-	return rcu_seq_snap(&rcu_sched_state.gp_seq);
+	return get_state_synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
 
 /**
  * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
- *
  * @oldstate: return value from earlier call to get_state_synchronize_sched()
  *
- * If a full RCU-sched grace period has elapsed since the earlier call to
- * get_state_synchronize_sched(), just return.  Otherwise, invoke
- * synchronize_sched() to wait for a full grace period.
- *
- * Yes, this function does not take counter wrap into account.  But
- * counter wrap is harmless.  If the counter wraps, we have waited for
- * more than 2 billion grace periods (and way more on a 64-bit system!),
- * so waiting for one additional grace period should be just fine.
+ * This is transitional and only used by rcutorture.
  */
 void cond_synchronize_sched(unsigned long oldstate)
 {
-	if (!rcu_seq_done(&rcu_sched_state.gp_seq, oldstate))
-		synchronize_sched();
-	else
-		smp_mb(); /* Ensure GP ends before subsequent accesses. */
+	cond_synchronize_rcu(oldstate);
 }
 EXPORT_SYMBOL_GPL(cond_synchronize_sched);
 
@@ -3452,12 +3340,28 @@ void rcu_barrier_bh(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
 
+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ *
+ * Note that this primitive does not necessarily wait for an RCU grace period
+ * to complete.  For example, if there are no RCU callbacks queued anywhere
+ * in the system, then rcu_barrier() is within its rights to return
+ * immediately, without waiting for anything, much less an RCU grace period.
+ */
+void rcu_barrier(void)
+{
+	_rcu_barrier(rcu_state_p);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
 /**
  * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
+ *
+ * This is transitional.
  */
 void rcu_barrier_sched(void)
 {
-	_rcu_barrier(&rcu_sched_state);
+	rcu_barrier();
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
@@ -3756,7 +3660,7 @@ void rcu_report_dead(unsigned int cpu)
 
 	/* QS for any half-done expedited RCU-sched GP. */
 	preempt_disable();
-	rcu_report_exp_rdp(&rcu_sched_state, this_cpu_ptr(rcu_sched_state.rda));
+	rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(rcu_state.rda));
 	preempt_enable();
 	rcu_preempt_deferred_qs(current);
 	for_each_rcu_flavor(rsp)
@@ -4098,10 +4002,9 @@ void __init rcu_init(void)
 
 	rcu_bootup_announce();
 	rcu_init_geometry();
-	rcu_init_one(&rcu_sched_state);
+	rcu_init_one(&rcu_state);
 	if (dump_tree)
-		rcu_dump_rcu_node_tree(&rcu_sched_state);
-	__rcu_init_preempt();
+		rcu_dump_rcu_node_tree(&rcu_state);
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 
 	/*
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index e02c882861eb..38658ca87dcb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -225,9 +225,6 @@ struct rcu_data {
 
 	/* 5) _rcu_barrier(), OOM callbacks, and expediting. */
 	struct rcu_head barrier_head;
-#ifdef CONFIG_RCU_FAST_NO_HZ
-	struct rcu_head oom_head;
-#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 	int exp_dynticks_snap;		/* Double-check need for IPI. */
 
 	/* 6) Callback offloading. */
@@ -433,8 +430,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
 
 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
-static void rcu_preempt_qs(void);
-static void rcu_preempt_note_context_switch(bool preempt);
+static void rcu_qs(void);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
@@ -444,9 +440,8 @@ static int rcu_print_task_stall(struct rcu_node *rnp);
 static int rcu_print_task_exp_stall(struct rcu_node *rnp);
 static void rcu_preempt_check_blocked_tasks(struct rcu_state *rsp,
 					    struct rcu_node *rnp);
-static void rcu_preempt_check_callbacks(void);
+static void rcu_flavor_check_callbacks(int user);
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
-static void __init __rcu_init_preempt(void);
 static void dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp,
 			    int ncheck);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 0f8f225c1b46..5619edfd414e 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -265,7 +265,7 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp)
 	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, true);
 }
 
-/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
+/* Common code for work-done checking. */
 static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
 {
 	if (rcu_exp_gp_seq_done(rsp, s)) {
@@ -337,45 +337,6 @@ fastpath:
 	return false;
 }
 
-/* Invoked on each online non-idle CPU for expedited quiescent state. */
-static void sync_sched_exp_handler(void *data)
-{
-	struct rcu_data *rdp;
-	struct rcu_node *rnp;
-	struct rcu_state *rsp = data;
-
-	rdp = this_cpu_ptr(rsp->rda);
-	rnp = rdp->mynode;
-	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
-	    __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
-		return;
-	if (rcu_is_cpu_rrupt_from_idle()) {
-		rcu_report_exp_rdp(&rcu_sched_state,
-				   this_cpu_ptr(&rcu_sched_data));
-		return;
-	}
-	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
-	/* Store .exp before .rcu_urgent_qs. */
-	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
-	resched_cpu(smp_processor_id());
-}
-
-/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
-static void sync_sched_exp_online_cleanup(int cpu)
-{
-	struct rcu_data *rdp;
-	int ret;
-	struct rcu_node *rnp;
-	struct rcu_state *rsp = &rcu_sched_state;
-
-	rdp = per_cpu_ptr(rsp->rda, cpu);
-	rnp = rdp->mynode;
-	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
-		return;
-	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
-	WARN_ON_ONCE(ret);
-}
-
 /*
  * Select the CPUs within the specified rcu_node that the upcoming
  * expedited grace period needs to wait for.
@@ -691,39 +652,6 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
 	mutex_unlock(&rsp->exp_mutex);
 }
 
-/**
- * synchronize_sched_expedited - Brute-force RCU-sched grace period
- *
- * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
- * approach to force the grace period to end quickly.  This consumes
- * significant time on all CPUs and is unfriendly to real-time workloads,
- * so is thus not recommended for any sort of common-case code.  In fact,
- * if you are using synchronize_sched_expedited() in a loop, please
- * restructure your code to batch your updates, and then use a single
- * synchronize_sched() instead.
- *
- * This implementation can be thought of as an application of sequence
- * locking to expedited grace periods, but using the sequence counter to
- * determine when someone else has already done the work instead of for
- * retrying readers.
- */
-void synchronize_sched_expedited(void)
-{
-	struct rcu_state *rsp = &rcu_sched_state;
-
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_sched_expedited() in RCU read-side critical section");
-
-	/* If only one CPU, this is automatically a grace period. */
-	if (rcu_blocking_is_gp())
-		return;
-
-	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
-}
-EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
-
 #ifdef CONFIG_PREEMPT_RCU
 
 /*
@@ -801,6 +729,11 @@ static void sync_rcu_exp_handler(void *info)
 		resched_cpu(rdp->cpu);
 }
 
+/* PREEMPT=y, so no RCU-sched to clean up after. */
+static void sync_sched_exp_online_cleanup(int cpu)
+{
+}
+
 /**
  * synchronize_rcu_expedited - Brute-force RCU grace period
  *
@@ -818,6 +751,8 @@ static void sync_rcu_exp_handler(void *info)
  * you are using synchronize_rcu_expedited() in a loop, please restructure
  * your code to batch your updates, and then Use a single synchronize_rcu()
  * instead.
+ *
+ * This has the same semantics as (but is more brutal than) synchronize_rcu().
  */
 void synchronize_rcu_expedited(void)
 {
@@ -836,13 +771,79 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
+/* Invoked on each online non-idle CPU for expedited quiescent state. */
+static void sync_sched_exp_handler(void *data)
+{
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp = data;
+
+	rdp = this_cpu_ptr(rsp->rda);
+	rnp = rdp->mynode;
+	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
+	    __this_cpu_read(rcu_data.cpu_no_qs.b.exp))
+		return;
+	if (rcu_is_cpu_rrupt_from_idle()) {
+		rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
+		return;
+	}
+	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
+	/* Store .exp before .rcu_urgent_qs. */
+	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
+	resched_cpu(smp_processor_id());
+}
+
+/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
+static void sync_sched_exp_online_cleanup(int cpu)
+{
+	struct rcu_data *rdp;
+	int ret;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp = &rcu_state;
+
+	rdp = per_cpu_ptr(rsp->rda, cpu);
+	rnp = rdp->mynode;
+	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
+		return;
+	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
+	WARN_ON_ONCE(ret);
+}
+
 /*
- * Wait for an rcu-preempt grace period, but make it happen quickly.
- * But because preemptible RCU does not exist, map to rcu-sched.
+ * Because a context switch is a grace period for RCU-sched, any blocking
+ * grace-period wait automatically implies a grace period if there
+ * is only one CPU online at any point time during execution of either
+ * synchronize_sched() or synchronize_rcu_bh().  It is OK to occasionally
+ * incorrectly indicate that there are multiple CPUs online when there
+ * was in fact only one the whole time, as this just adds some overhead:
+ * RCU still operates correctly.
  */
+static int rcu_blocking_is_gp(void)
+{
+	int ret;
+
+	might_sleep();  /* Check for RCU read-side critical section. */
+	preempt_disable();
+	ret = num_online_cpus() <= 1;
+	preempt_enable();
+	return ret;
+}
+
+/* PREEMPT=n implementation of synchronize_rcu_expedited(). */
 void synchronize_rcu_expedited(void)
 {
-	synchronize_sched_expedited();
+	struct rcu_state *rsp = &rcu_state;
+
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_sched_expedited() in RCU read-side critical section");
+
+	/* If only one CPU, this is automatically a grace period. */
+	if (rcu_blocking_is_gp())
+		return;
+
+	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 9f0d054e6c20..2c81f8dd63b4 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -123,10 +123,6 @@ static void __init rcu_bootup_announce_oddness(void)
 
 #ifdef CONFIG_PREEMPT_RCU
 
-RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
-static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
-static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
-
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake);
 static void rcu_read_unlock_special(struct task_struct *t);
@@ -303,15 +299,15 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
  *
  * Callers to this function must disable preemption.
  */
-static void rcu_preempt_qs(void)
+static void rcu_qs(void)
 {
-	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
 	if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_preempt"),
 				       __this_cpu_read(rcu_data_p->gp_seq),
 				       TPS("cpuqs"));
 		__this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
-		barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
+		barrier(); /* Coordinate with rcu_flavor_check_callbacks(). */
 		current->rcu_read_unlock_special.b.need_qs = false;
 	}
 }
@@ -329,12 +325,14 @@ static void rcu_preempt_qs(void)
  *
  * Caller must disable interrupts.
  */
-static void rcu_preempt_note_context_switch(bool preempt)
+void rcu_note_context_switch(bool preempt)
 {
 	struct task_struct *t = current;
 	struct rcu_data *rdp = this_cpu_ptr(rcu_state_p->rda);
 	struct rcu_node *rnp;
 
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
+	trace_rcu_utilization(TPS("Start context switch"));
 	lockdep_assert_irqs_disabled();
 	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
 	if (t->rcu_read_lock_nesting > 0 &&
@@ -381,10 +379,13 @@ static void rcu_preempt_note_context_switch(bool preempt)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_preempt_qs();
+	rcu_qs();
 	if (rdp->deferred_qs)
 		rcu_report_exp_rdp(rcu_state_p, rdp);
+	trace_rcu_utilization(TPS("End context switch"));
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 /*
  * Check for preempted RCU readers blocking the current grace period
@@ -493,7 +494,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 		return;
 	}
 	if (special.b.need_qs) {
-		rcu_preempt_qs();
+		rcu_qs();
 		t->rcu_read_unlock_special.b.need_qs = false;
 		if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
 			local_irq_restore(flags);
@@ -596,7 +597,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
  */
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
-	return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs ||
+	return (this_cpu_ptr(&rcu_data)->deferred_qs ||
 		READ_ONCE(t->rcu_read_unlock_special.s)) &&
 	       t->rcu_read_lock_nesting <= 0;
 }
@@ -781,11 +782,14 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
  *
  * Caller must disable hard irqs.
  */
-static void rcu_preempt_check_callbacks(void)
+static void rcu_flavor_check_callbacks(int user)
 {
-	struct rcu_state *rsp = &rcu_preempt_state;
+	struct rcu_state *rsp = &rcu_state;
 	struct task_struct *t = current;
 
+	if (user || rcu_is_cpu_rrupt_from_idle()) {
+		rcu_note_voluntary_context_switch(current);
+	}
 	if (t->rcu_read_lock_nesting > 0 ||
 	    (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
 		/* No QS, force context switch if deferred. */
@@ -795,7 +799,7 @@ static void rcu_preempt_check_callbacks(void)
 		rcu_preempt_deferred_qs(t); /* Report deferred QS. */
 		return;
 	} else if (!t->rcu_read_lock_nesting) {
-		rcu_preempt_qs(); /* Report immediate QS. */
+		rcu_qs(); /* Report immediate QS. */
 		return;
 	}
 
@@ -808,44 +812,6 @@ static void rcu_preempt_check_callbacks(void)
 		t->rcu_read_unlock_special.b.need_qs = true;
 }
 
-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, rcu_state_p, -1, 0);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
 /**
  * synchronize_rcu - wait until a grace period has elapsed.
  *
@@ -856,14 +822,28 @@ EXPORT_SYMBOL_GPL(call_rcu);
  * concurrently with new RCU read-side critical sections that began while
  * synchronize_rcu() was waiting.  RCU read-side critical sections are
  * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
+ * In addition, regions of code across which interrupts, preemption, or
+ * softirqs have been disabled also serve as RCU read-side critical
+ * sections.  This includes hardware interrupt handlers, softirq handlers,
+ * and NMI handlers.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_rcu() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-sched read-side critical section whose beginning
+ * preceded the call to synchronize_rcu().  In addition, each CPU having
+ * an RCU read-side critical section that extends beyond the return from
+ * synchronize_rcu() is guaranteed to have executed a full memory barrier
+ * after the beginning of synchronize_rcu() and before the beginning of
+ * that RCU read-side critical section.  Note that these guarantees include
+ * CPUs that are offline, idle, or executing in user mode, as well as CPUs
+ * that are executing in the kernel.
  *
- * See the description of synchronize_sched() for more detailed
- * information on memory-ordering guarantees.  However, please note
- * that -only- the memory-ordering guarantees apply.  For example,
- * synchronize_rcu() is -not- guaranteed to wait on things like code
- * protected by preempt_disable(), instead, synchronize_rcu() is -only-
- * guaranteed to wait on RCU read-side critical sections, that is, sections
- * of code protected by rcu_read_lock().
+ * Furthermore, if CPU A invoked synchronize_rcu(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
  */
 void synchronize_rcu(void)
 {
@@ -880,28 +860,6 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- *
- * Note that this primitive does not necessarily wait for an RCU grace period
- * to complete.  For example, if there are no RCU callbacks queued anywhere
- * in the system, then rcu_barrier() is within its rights to return
- * immediately, without waiting for anything, much less an RCU grace period.
- */
-void rcu_barrier(void)
-{
-	_rcu_barrier(rcu_state_p);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
-/*
- * Initialize preemptible RCU's state structures.
- */
-static void __init __rcu_init_preempt(void)
-{
-	rcu_init_one(rcu_state_p);
-}
-
 /*
  * Check for a task exiting while in a preemptible-RCU read-side
  * critical section, clean up if so.  No need to issue warnings,
@@ -964,8 +922,6 @@ dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)
 
 #else /* #ifdef CONFIG_PREEMPT_RCU */
 
-static struct rcu_state *const rcu_state_p = &rcu_sched_state;
-
 /*
  * Tell them what RCU they are running.
  */
@@ -975,18 +931,48 @@ static void __init rcu_bootup_announce(void)
 	rcu_bootup_announce_oddness();
 }
 
-/* Because preemptible RCU does not exist, we can ignore its QSes. */
-static void rcu_preempt_qs(void)
+/*
+ * Note a quiescent state for PREEMPT=n.  Because we do not need to know
+ * how many quiescent states passed, just if there was at least one since
+ * the start of the grace period, this just sets a flag.  The caller must
+ * have disabled preemption.
+ */
+static void rcu_qs(void)
 {
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
+	if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
+		return;
+	trace_rcu_grace_period(TPS("rcu_sched"),
+			       __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
+	__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
+	if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
+		return;
+	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
+	rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
 }
 
 /*
- * Because preemptible RCU does not exist, we never have to check for
- * CPUs being in quiescent states.
+ * Note a PREEMPT=n context switch.  The caller must have disabled interrupts.
  */
-static void rcu_preempt_note_context_switch(bool preempt)
+void rcu_note_context_switch(bool preempt)
 {
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
+	trace_rcu_utilization(TPS("Start context switch"));
+	rcu_qs();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
+		goto out;
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
+		rcu_momentary_dyntick_idle();
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+	if (!preempt)
+		rcu_tasks_qs(current);
+out:
+	trace_rcu_utilization(TPS("End context switch"));
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);
 
 /*
  * Because preemptible RCU does not exist, there are never any preempted
@@ -1054,29 +1040,48 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
 }
 
 /*
- * Because preemptible RCU does not exist, it never has any callbacks
- * to check.
+ * Check to see if this CPU is in a non-context-switch quiescent state
+ * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
+ * Also schedule RCU core processing.
+ *
+ * This function must be called from hardirq context.  It is normally
+ * invoked from the scheduling-clock interrupt.
  */
-static void rcu_preempt_check_callbacks(void)
+static void rcu_flavor_check_callbacks(int user)
 {
-}
+	if (user || rcu_is_cpu_rrupt_from_idle()) {
 
-/*
- * Because preemptible RCU does not exist, rcu_barrier() is just
- * another name for rcu_barrier_sched().
- */
-void rcu_barrier(void)
-{
-	rcu_barrier_sched();
+		/*
+		 * Get here if this CPU took its interrupt from user
+		 * mode or from the idle loop, and if this is not a
+		 * nested interrupt.  In this case, the CPU is in
+		 * a quiescent state, so note it.
+		 *
+		 * No memory barrier is required here because rcu_qs()
+		 * references only CPU-local variables that other CPUs
+		 * neither access nor modify, at least not while the
+		 * corresponding CPU is online.
+		 */
+
+		rcu_qs();
+	}
 }
-EXPORT_SYMBOL_GPL(rcu_barrier);
 
-/*
- * Because preemptible RCU does not exist, it need not be initialized.
- */
-static void __init __rcu_init_preempt(void)
+/* PREEMPT=n implementation of synchronize_rcu(). */
+void synchronize_rcu(void)
 {
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_rcu() in RCU-sched read-side critical section");
+	if (rcu_blocking_is_gp())
+		return;
+	if (rcu_gp_is_expedited())
+		synchronize_rcu_expedited();
+	else
+		wait_rcu_gp(call_rcu);
 }
+EXPORT_SYMBOL_GPL(synchronize_rcu);
 
 /*
  * Because preemptible RCU does not exist, tasks cannot possibly exit
@@ -1319,8 +1324,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
 
 static void rcu_kthread_do_work(void)
 {
-	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
-	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
+	rcu_do_batch(&rcu_state, this_cpu_ptr(&rcu_data));
 }
 
 static void rcu_cpu_kthread_setup(unsigned int cpu)
@@ -1727,87 +1731,6 @@ static void rcu_idle_count_callbacks_posted(void)
 	__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
 }
 
-/*
- * Data for flushing lazy RCU callbacks at OOM time.
- */
-static atomic_t oom_callback_count;
-static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
-
-/*
- * RCU OOM callback -- decrement the outstanding count and deliver the
- * wake-up if we are the last one.
- */
-static void rcu_oom_callback(struct rcu_head *rhp)
-{
-	if (atomic_dec_and_test(&oom_callback_count))
-		wake_up(&oom_callback_wq);
-}
-
-/*
- * Post an rcu_oom_notify callback on the current CPU if it has at
- * least one lazy callback.  This will unnecessarily post callbacks
- * to CPUs that already have a non-lazy callback at the end of their
- * callback list, but this is an infrequent operation, so accept some
- * extra overhead to keep things simple.
- */
-static void rcu_oom_notify_cpu(void *unused)
-{
-	struct rcu_state *rsp;
-	struct rcu_data *rdp;
-
-	for_each_rcu_flavor(rsp) {
-		rdp = raw_cpu_ptr(rsp->rda);
-		if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
-			atomic_inc(&oom_callback_count);
-			rsp->call(&rdp->oom_head, rcu_oom_callback);
-		}
-	}
-}
-
-/*
- * If low on memory, ensure that each CPU has a non-lazy callback.
- * This will wake up CPUs that have only lazy callbacks, in turn
- * ensuring that they free up the corresponding memory in a timely manner.
- * Because an uncertain amount of memory will be freed in some uncertain
- * timeframe, we do not claim to have freed anything.
- */
-static int rcu_oom_notify(struct notifier_block *self,
-			  unsigned long notused, void *nfreed)
-{
-	int cpu;
-
-	/* Wait for callbacks from earlier instance to complete. */
-	wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
-	smp_mb(); /* Ensure callback reuse happens after callback invocation. */
-
-	/*
-	 * Prevent premature wakeup: ensure that all increments happen
-	 * before there is a chance of the counter reaching zero.
-	 */
-	atomic_set(&oom_callback_count, 1);
-
-	for_each_online_cpu(cpu) {
-		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
-		cond_resched_tasks_rcu_qs();
-	}
-
-	/* Unconditionally decrement: no need to wake ourselves up. */
-	atomic_dec(&oom_callback_count);
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block rcu_oom_nb = {
-	.notifier_call = rcu_oom_notify
-};
-
-static int __init rcu_register_oom_notifier(void)
-{
-	register_oom_notifier(&rcu_oom_nb);
-	return 0;
-}
-early_initcall(rcu_register_oom_notifier);
-
 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
 
 #ifdef CONFIG_RCU_FAST_NO_HZ
-- 
cgit v1.2.3


From 709fdce7545c978e69f52eb19082ea3af44332f5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 3 Jul 2018 10:44:44 -0700
Subject: rcu: Express Tiny RCU updates in terms of RCU rather than RCU-sched

This commit renames Tiny RCU functions so that the lowest level of
functionality is RCU (e.g., synchronize_rcu()) rather than RCU-sched
(e.g., synchronize_sched()).  This provides greater naming compatibility
with Tree RCU, which will in turn permit more LoC removal once
the RCU-sched and RCU-bh update-side API is removed.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Fix Tiny call_rcu()'s EXPORT_SYMBOL() in response to a bug
  report from kbuild test robot. ]
---
 include/linux/rcupdate.h | 12 +++++-------
 include/linux/rcutiny.h  | 34 +++++++++++++++++++---------------
 include/linux/rcutree.h  |  1 -
 kernel/rcu/tiny.c        | 48 ++++++++++++++++++++++++------------------------
 4 files changed, 48 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 94474bb6b5c4..1207c6c9bd8b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -49,15 +49,14 @@
 
 /* Exported common interfaces */
 
-#ifdef CONFIG_TINY_RCU
-#define	call_rcu	call_rcu_sched
-#else
-void call_rcu(struct rcu_head *head, rcu_callback_t func);
+#ifndef CONFIG_TINY_RCU
+void synchronize_sched(void);
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
 #endif
 
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
-void synchronize_sched(void);
+void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
+void synchronize_rcu(void);
 
 static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 {
@@ -68,7 +67,6 @@ static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
 
 void __rcu_read_lock(void);
 void __rcu_read_unlock(void);
-void synchronize_rcu(void);
 
 /*
  * Defined as a macro as it is a very low level header included from
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index df2c0895c5e7..e66fb8bc2127 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -36,9 +36,9 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
-static inline void synchronize_rcu(void)
+static inline void synchronize_sched(void)
 {
-	synchronize_sched();
+	synchronize_rcu();
 }
 
 static inline unsigned long get_state_synchronize_rcu(void)
@@ -61,16 +61,11 @@ static inline void cond_synchronize_sched(unsigned long oldstate)
 	might_sleep();
 }
 
-static inline void synchronize_rcu_expedited(void)
-{
-	synchronize_sched();	/* Only one CPU, so pretty fast anyway!!! */
-}
+extern void rcu_barrier(void);
 
-extern void rcu_barrier_sched(void);
-
-static inline void rcu_barrier(void)
+static inline void rcu_barrier_sched(void)
 {
-	rcu_barrier_sched();  /* Only one CPU, so only one list of callbacks! */
+	rcu_barrier();  /* Only one CPU, so only one list of callbacks! */
 }
 
 static inline void rcu_barrier_bh(void)
@@ -88,27 +83,36 @@ static inline void synchronize_rcu_bh_expedited(void)
 	synchronize_sched();
 }
 
+static inline void synchronize_rcu_expedited(void)
+{
+	synchronize_sched();
+}
+
 static inline void synchronize_sched_expedited(void)
 {
 	synchronize_sched();
 }
 
-static inline void kfree_call_rcu(struct rcu_head *head,
-				  rcu_callback_t func)
+static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+
+static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	call_rcu(head, func);
 }
 
-void rcu_sched_qs(void);
+void rcu_qs(void);
 
 static inline void rcu_softirq_qs(void)
 {
-	rcu_sched_qs();
+	rcu_qs();
 }
 
 #define rcu_note_context_switch(preempt) \
 	do { \
-		rcu_sched_qs(); \
+		rcu_qs(); \
 		rcu_tasks_qs(current); \
 	} while (0)
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 0c44720f0e84..6d30a0809300 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,7 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }
 
-void synchronize_rcu(void);
 static inline void synchronize_rcu_bh(void)
 {
 	synchronize_rcu();
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index cadcf63c4889..30826fb6e438 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -46,25 +46,25 @@ struct rcu_ctrlblk {
 };
 
 /* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_sched_ctrlblk = {
-	.donetail	= &rcu_sched_ctrlblk.rcucblist,
-	.curtail	= &rcu_sched_ctrlblk.rcucblist,
+static struct rcu_ctrlblk rcu_ctrlblk = {
+	.donetail	= &rcu_ctrlblk.rcucblist,
+	.curtail	= &rcu_ctrlblk.rcucblist,
 };
 
-void rcu_barrier_sched(void)
+void rcu_barrier(void)
 {
-	wait_rcu_gp(call_rcu_sched);
+	wait_rcu_gp(call_rcu);
 }
-EXPORT_SYMBOL(rcu_barrier_sched);
+EXPORT_SYMBOL(rcu_barrier);
 
 /* Record an rcu quiescent state.  */
-void rcu_sched_qs(void)
+void rcu_qs(void)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	if (rcu_sched_ctrlblk.donetail != rcu_sched_ctrlblk.curtail) {
-		rcu_sched_ctrlblk.donetail = rcu_sched_ctrlblk.curtail;
+	if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
+		rcu_ctrlblk.donetail = rcu_ctrlblk.curtail;
 		raise_softirq(RCU_SOFTIRQ);
 	}
 	local_irq_restore(flags);
@@ -79,7 +79,7 @@ void rcu_sched_qs(void)
 void rcu_check_callbacks(int user)
 {
 	if (user)
-		rcu_sched_qs();
+		rcu_qs();
 }
 
 /* Invoke the RCU callbacks whose grace period has elapsed.  */
@@ -90,17 +90,17 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
 
 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
-	if (rcu_sched_ctrlblk.donetail == &rcu_sched_ctrlblk.rcucblist) {
+	if (rcu_ctrlblk.donetail == &rcu_ctrlblk.rcucblist) {
 		/* No callbacks ready, so just leave. */
 		local_irq_restore(flags);
 		return;
 	}
-	list = rcu_sched_ctrlblk.rcucblist;
-	rcu_sched_ctrlblk.rcucblist = *rcu_sched_ctrlblk.donetail;
-	*rcu_sched_ctrlblk.donetail = NULL;
-	if (rcu_sched_ctrlblk.curtail == rcu_sched_ctrlblk.donetail)
-		rcu_sched_ctrlblk.curtail = &rcu_sched_ctrlblk.rcucblist;
-	rcu_sched_ctrlblk.donetail = &rcu_sched_ctrlblk.rcucblist;
+	list = rcu_ctrlblk.rcucblist;
+	rcu_ctrlblk.rcucblist = *rcu_ctrlblk.donetail;
+	*rcu_ctrlblk.donetail = NULL;
+	if (rcu_ctrlblk.curtail == rcu_ctrlblk.donetail)
+		rcu_ctrlblk.curtail = &rcu_ctrlblk.rcucblist;
+	rcu_ctrlblk.donetail = &rcu_ctrlblk.rcucblist;
 	local_irq_restore(flags);
 
 	/* Invoke the callbacks on the local list. */
@@ -125,21 +125,21 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
  *
  * Cool, huh?  (Due to Josh Triplett.)
  */
-void synchronize_sched(void)
+void synchronize_rcu(void)
 {
 	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
 			 lock_is_held(&rcu_lock_map) ||
 			 lock_is_held(&rcu_sched_lock_map),
 			 "Illegal synchronize_sched() in RCU read-side critical section");
 }
-EXPORT_SYMBOL_GPL(synchronize_sched);
+EXPORT_SYMBOL_GPL(synchronize_rcu);
 
 /*
  * Post an RCU callback to be invoked after the end of an RCU-sched grace
  * period.  But since we have but one CPU, that would be after any
  * quiescent state.
  */
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
 {
 	unsigned long flags;
 
@@ -148,16 +148,16 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 	head->next = NULL;
 
 	local_irq_save(flags);
-	*rcu_sched_ctrlblk.curtail = head;
-	rcu_sched_ctrlblk.curtail = &head->next;
+	*rcu_ctrlblk.curtail = head;
+	rcu_ctrlblk.curtail = &head->next;
 	local_irq_restore(flags);
 
 	if (unlikely(is_idle_task(current))) {
-		/* force scheduling for rcu_sched_qs() */
+		/* force scheduling for rcu_qs() */
 		resched_cpu(0);
 	}
 }
-EXPORT_SYMBOL_GPL(call_rcu_sched);
+EXPORT_SYMBOL_GPL(call_rcu);
 
 void __init rcu_init(void)
 {
-- 
cgit v1.2.3


From 4c7e9c1434c6fc960774a5475f2fbccbf557fdeb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 6 Jul 2018 09:54:25 -0700
Subject: rcu: Consolidate RCU-bh update-side function definitions

This commit saves a few lines by consolidating the RCU-bh function
definitions at the end of include/linux/rcupdate.h.  This consolidation
also makes it easier to remove them all when the time comes.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 27 ++++++++++++++++++++++-----
 include/linux/rcutiny.h  | 15 ---------------
 include/linux/rcutree.h  | 17 -----------------
 kernel/rcu/tree.c        |  9 ---------
 4 files changed, 22 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 1207c6c9bd8b..e530f5739033 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -58,11 +58,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
 void synchronize_rcu(void);
 
-static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
-
 #ifdef CONFIG_PREEMPT_RCU
 
 void __rcu_read_lock(void);
@@ -875,4 +870,26 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 
 
+/* Transitional pre-consolidation compatibility definitions. */
+
+static inline void synchronize_rcu_bh(void)
+{
+	synchronize_rcu();
+}
+
+static inline void synchronize_rcu_bh_expedited(void)
+{
+	synchronize_rcu_expedited();
+}
+
+static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+
+static inline void rcu_barrier_bh(void)
+{
+	rcu_barrier();
+}
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e66fb8bc2127..df82bada9b19 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -68,21 +68,6 @@ static inline void rcu_barrier_sched(void)
 	rcu_barrier();  /* Only one CPU, so only one list of callbacks! */
 }
 
-static inline void rcu_barrier_bh(void)
-{
-	rcu_barrier();
-}
-
-static inline void synchronize_rcu_bh(void)
-{
-	synchronize_sched();
-}
-
-static inline void synchronize_rcu_bh_expedited(void)
-{
-	synchronize_sched();
-}
-
 static inline void synchronize_rcu_expedited(void)
 {
 	synchronize_sched();
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 6d30a0809300..94820156aa62 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,11 +45,6 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }
 
-static inline void synchronize_rcu_bh(void)
-{
-	synchronize_rcu();
-}
-
 void synchronize_rcu_expedited(void);
 
 static inline void synchronize_sched_expedited(void)
@@ -59,19 +54,7 @@ static inline void synchronize_sched_expedited(void)
 
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
-/**
- * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period
- *
- * This is a transitional API and will soon be removed, with all
- * callers converted to synchronize_rcu_expedited().
- */
-static inline void synchronize_rcu_bh_expedited(void)
-{
-	synchronize_rcu_expedited();
-}
-
 void rcu_barrier(void);
-void rcu_barrier_bh(void);
 void rcu_barrier_sched(void);
 bool rcu_eqs_special_set(int cpu);
 unsigned long get_state_synchronize_rcu(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 53ba7747878c..8d5dadaf3c53 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3252,15 +3252,6 @@ static void _rcu_barrier(void)
 	mutex_unlock(&rcu_state.barrier_mutex);
 }
 
-/**
- * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
- */
-void rcu_barrier_bh(void)
-{
-	_rcu_barrier();
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_bh);
-
 /**
  * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
  *
-- 
cgit v1.2.3


From a8bb74acd8efe2eb934d524ae20859980975b602 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 6 Jul 2018 11:46:47 -0700
Subject: rcu: Consolidate RCU-sched update-side function definitions

This commit saves a few lines by consolidating the RCU-sched function
definitions at the end of include/linux/rcupdate.h.  This consolidation
also makes it easier to remove them all when the time comes.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 38 +++++++++++++++++++++++++------
 include/linux/rcutiny.h  | 32 +-------------------------
 include/linux/rcutree.h  |  9 --------
 kernel/rcu/tree.c        | 58 ------------------------------------------------
 4 files changed, 32 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e530f5739033..12103e1bbe67 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -48,12 +48,6 @@
 #define ulong2long(a)		(*(long *)(&(a)))
 
 /* Exported common interfaces */
-
-#ifndef CONFIG_TINY_RCU
-void synchronize_sched(void);
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
-#endif
-
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
 void synchronize_rcu(void);
@@ -170,7 +164,7 @@ void exit_tasks_rcu_finish(void);
 #define rcu_tasks_qs(t)	do { } while (0)
 #define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
 #define call_rcu_tasks call_rcu_sched
-#define synchronize_rcu_tasks synchronize_sched
+#define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
@@ -892,4 +886,34 @@ static inline void rcu_barrier_bh(void)
 	rcu_barrier();
 }
 
+static inline void synchronize_sched(void)
+{
+	synchronize_rcu();
+}
+
+static inline void synchronize_sched_expedited(void)
+{
+	synchronize_rcu_expedited();
+}
+
+static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
+{
+	call_rcu(head, func);
+}
+
+static inline void rcu_barrier_sched(void)
+{
+	rcu_barrier();
+}
+
+static inline unsigned long get_state_synchronize_sched(void)
+{
+	return get_state_synchronize_rcu();
+}
+
+static inline void cond_synchronize_sched(unsigned long oldstate)
+{
+	cond_synchronize_rcu(oldstate);
+}
+
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index df82bada9b19..7fa4fb9e899e 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -36,11 +36,6 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
-static inline void synchronize_sched(void)
-{
-	synchronize_rcu();
-}
-
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
@@ -51,36 +46,11 @@ static inline void cond_synchronize_rcu(unsigned long oldstate)
 	might_sleep();
 }
 
-static inline unsigned long get_state_synchronize_sched(void)
-{
-	return 0;
-}
-
-static inline void cond_synchronize_sched(unsigned long oldstate)
-{
-	might_sleep();
-}
-
 extern void rcu_barrier(void);
 
-static inline void rcu_barrier_sched(void)
-{
-	rcu_barrier();  /* Only one CPU, so only one list of callbacks! */
-}
-
 static inline void synchronize_rcu_expedited(void)
 {
-	synchronize_sched();
-}
-
-static inline void synchronize_sched_expedited(void)
-{
-	synchronize_sched();
-}
-
-static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
+	synchronize_rcu();
 }
 
 static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 94820156aa62..d09a9abe9440 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -46,21 +46,12 @@ static inline void rcu_virt_note_context_switch(int cpu)
 }
 
 void synchronize_rcu_expedited(void);
-
-static inline void synchronize_sched_expedited(void)
-{
-	synchronize_rcu_expedited();
-}
-
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 
 void rcu_barrier(void);
-void rcu_barrier_sched(void);
 bool rcu_eqs_special_set(int cpu);
 unsigned long get_state_synchronize_rcu(void);
 void cond_synchronize_rcu(unsigned long oldstate);
-unsigned long get_state_synchronize_sched(void);
-void cond_synchronize_sched(unsigned long oldstate);
 
 void rcu_idle_enter(void);
 void rcu_idle_exit(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8d5dadaf3c53..1a2551a4d583 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2950,19 +2950,6 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
-/**
- * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * This is transitional.
- */
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
-EXPORT_SYMBOL_GPL(call_rcu_sched);
-
 /*
  * Queue an RCU callback for lazy invocation after a grace period.
  * This will likely be later named something like "call_rcu_lazy()",
@@ -2976,17 +2963,6 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);
 
-/**
- * synchronize_sched - wait until an rcu-sched grace period has elapsed.
- *
- * This is transitional.
- */
-void synchronize_sched(void)
-{
-	synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(synchronize_sched);
-
 /**
  * get_state_synchronize_rcu - Snapshot current RCU state
  *
@@ -3028,29 +3004,6 @@ void cond_synchronize_rcu(unsigned long oldstate)
 }
 EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
 
-/**
- * get_state_synchronize_sched - Snapshot current RCU-sched state
- *
- * This is transitional, and only used by rcutorture.
- */
-unsigned long get_state_synchronize_sched(void)
-{
-	return get_state_synchronize_rcu();
-}
-EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
-
-/**
- * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
- * @oldstate: return value from earlier call to get_state_synchronize_sched()
- *
- * This is transitional and only used by rcutorture.
- */
-void cond_synchronize_sched(unsigned long oldstate)
-{
-	cond_synchronize_rcu(oldstate);
-}
-EXPORT_SYMBOL_GPL(cond_synchronize_sched);
-
 /*
  * Check to see if there is any immediate RCU-related work to be done by
  * the current CPU, for the specified type of RCU, returning 1 if so and
@@ -3266,17 +3219,6 @@ void rcu_barrier(void)
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
-/**
- * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
- *
- * This is transitional.
- */
-void rcu_barrier_sched(void)
-{
-	rcu_barrier();
-}
-EXPORT_SYMBOL_GPL(rcu_barrier_sched);
-
 /*
  * Propagate ->qsinitmask bits up the rcu_node tree to account for the
  * first CPU in a given leaf rcu_node structure coming online.  The caller
-- 
cgit v1.2.3


From 2bd8b1a2afc4463cc503665e98faa5909d1ac462 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 7 Jul 2018 18:12:26 -0700
Subject: rcu: Clean up flavor-related definitions and comments in rcupdate.h

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 12103e1bbe67..d6d543b60a9f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -119,11 +119,10 @@ static inline void rcu_init_nohz(void) { }
  * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
  * @a: Code that RCU needs to pay attention to.
  *
- * RCU, RCU-bh, and RCU-sched read-side critical sections are forbidden
- * in the inner idle loop, that is, between the rcu_idle_enter() and
- * the rcu_idle_exit() -- RCU will happily ignore any such read-side
- * critical sections.  However, things like powertop need tracepoints
- * in the inner idle loop.
+ * RCU read-side critical sections are forbidden in the inner idle loop,
+ * that is, between the rcu_idle_enter() and the rcu_idle_exit() -- RCU
+ * will happily ignore any such read-side critical sections.  However,
+ * things like powertop need tracepoints in the inner idle loop.
  *
  * This macro provides the way out:  RCU_NONIDLE(do_something_with_RCU())
  * will tell RCU that it needs to pay attention, invoke its argument
@@ -163,7 +162,7 @@ void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define rcu_tasks_qs(t)	do { } while (0)
 #define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
-#define call_rcu_tasks call_rcu_sched
+#define call_rcu_tasks call_rcu
 #define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
@@ -309,8 +308,8 @@ static inline void rcu_preempt_sleep_check(void) { }
  * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
  * and rcu_assign_pointer().  Some of these could be folded into their
  * callers, but they are left separate in order to ease introduction of
- * multiple flavors of pointers to match the multiple flavors of RCU
- * (e.g., __rcu_sched, and __srcu), should this make sense in the future.
+ * multiple pointers markings to match different RCU implementations
+ * (e.g., __srcu), should this make sense in the future.
  */
 
 #ifdef __CHECKER__
@@ -670,9 +669,8 @@ static inline void rcu_read_unlock(void)
  * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
  *
  * This is equivalent of rcu_read_lock(), but also disables softirqs.
- * Note that synchronize_rcu() and friends may be used for the update
- * side, although synchronize_rcu_bh() is available as a wrapper in the
- * short term.  Longer term, the _bh update-side API will be eliminated.
+ * Note that anything else that disables softirqs can also serve as
+ * an RCU read-side critical section.
  *
  * Note that rcu_read_lock_bh() and the matching rcu_read_unlock_bh()
  * must occur in the same context, for example, it is illegal to invoke
@@ -705,10 +703,9 @@ static inline void rcu_read_unlock_bh(void)
 /**
  * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
  *
- * This is equivalent of rcu_read_lock(), but to be used when updates
- * are being done using call_rcu_sched() or synchronize_rcu_sched().
- * Read-side critical sections can also be introduced by anything that
- * disables preemption, including local_irq_disable() and friends.
+ * This is equivalent of rcu_read_lock(), but disables preemption.
+ * Read-side critical sections can also be introduced by anything else
+ * that disables preemption, including local_irq_disable() and friends.
  *
  * Note that rcu_read_lock_sched() and the matching rcu_read_unlock_sched()
  * must occur in the same context, for example, it is illegal to invoke
-- 
cgit v1.2.3


From aff5f0369e312b0ab0ca7a2a12dd64b7e39c7091 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 7 Jul 2018 18:12:26 -0700
Subject: rcu: Clean up flavor-related definitions and comments in rculist.h

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rculist.h | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4786c2235b98..e91ec9ddcd30 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -182,7 +182,7 @@ static inline void list_replace_rcu(struct list_head *old,
  * @list:	the RCU-protected list to splice
  * @prev:	points to the last element of the existing list
  * @next:	points to the first element of the existing list
- * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
+ * @sync:	synchronize_rcu, synchronize_rcu_expedited, ...
  *
  * The list pointed to by @prev and @next can be RCU-read traversed
  * concurrently with this function.
@@ -240,7 +240,7 @@ static inline void __list_splice_init_rcu(struct list_head *list,
  *                        designed for stacks.
  * @list:	the RCU-protected list to splice
  * @head:	the place in the existing list to splice the first list into
- * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
+ * @sync:	synchronize_rcu, synchronize_rcu_expedited, ...
  */
 static inline void list_splice_init_rcu(struct list_head *list,
 					struct list_head *head,
@@ -255,7 +255,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  *                             list, designed for queues.
  * @list:	the RCU-protected list to splice
  * @head:	the place in the existing list to splice the first list into
- * @sync:	function to sync: synchronize_rcu(), synchronize_sched(), ...
+ * @sync:	synchronize_rcu, synchronize_rcu_expedited, ...
  */
 static inline void list_splice_tail_init_rcu(struct list_head *list,
 					     struct list_head *head,
@@ -359,13 +359,12 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @type:       the type of the struct this is embedded in.
  * @member:     the name of the list_head within the struct.
  *
- * This primitive may safely run concurrently with the _rcu list-mutation
- * primitives such as list_add_rcu(), but requires some implicit RCU
- * read-side guarding.  One example is running within a special
- * exception-time environment where preemption is disabled and where
- * lockdep cannot be invoked (in which case updaters must use RCU-sched,
- * as in synchronize_sched(), call_rcu_sched(), and friends).  Another
- * example is when items are added to the list, but never deleted.
+ * This primitive may safely run concurrently with the _rcu
+ * list-mutation primitives such as list_add_rcu(), but requires some
+ * implicit RCU read-side guarding.  One example is running within a special
+ * exception-time environment where preemption is disabled and where lockdep
+ * cannot be invoked.  Another example is when items are added to the list,
+ * but never deleted.
  */
 #define list_entry_lockless(ptr, type, member) \
 	container_of((typeof(ptr))READ_ONCE(ptr), type, member)
@@ -376,13 +375,12 @@ static inline void list_splice_tail_init_rcu(struct list_head *list,
  * @head:	the head for your list.
  * @member:	the name of the list_struct within the struct.
  *
- * This primitive may safely run concurrently with the _rcu list-mutation
- * primitives such as list_add_rcu(), but requires some implicit RCU
- * read-side guarding.  One example is running within a special
- * exception-time environment where preemption is disabled and where
- * lockdep cannot be invoked (in which case updaters must use RCU-sched,
- * as in synchronize_sched(), call_rcu_sched(), and friends).  Another
- * example is when items are added to the list, but never deleted.
+ * This primitive may safely run concurrently with the _rcu
+ * list-mutation primitives such as list_add_rcu(), but requires some
+ * implicit RCU read-side guarding.  One example is running within a special
+ * exception-time environment where preemption is disabled and where lockdep
+ * cannot be invoked.  Another example is when items are added to the list,
+ * but never deleted.
  */
 #define list_for_each_entry_lockless(pos, head, member) \
 	for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \
-- 
cgit v1.2.3


From df8561a0d7e4f5cb72d0aa6be43e154b2027bba6 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 7 Jul 2018 18:12:26 -0700
Subject: rcu: Clean up flavor-related definitions and comments in
 rcupdate_wait.h

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate_wait.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h
index bc104699560e..8a16c3eb3dd0 100644
--- a/include/linux/rcupdate_wait.h
+++ b/include/linux/rcupdate_wait.h
@@ -33,17 +33,17 @@ do {									\
 
 /**
  * synchronize_rcu_mult - Wait concurrently for multiple grace periods
- * @...: List of call_rcu() functions for the flavors to wait on.
+ * @...: List of call_rcu() functions for different grace periods to wait on
  *
- * This macro waits concurrently for multiple flavors of RCU grace periods.
- * For example, synchronize_rcu_mult(call_rcu, call_rcu_sched) would wait
- * on concurrent RCU and RCU-sched grace periods.  Waiting on a give SRCU
+ * This macro waits concurrently for multiple types of RCU grace periods.
+ * For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait
+ * on concurrent RCU and RCU-tasks grace periods.  Waiting on a give SRCU
  * domain requires you to write a wrapper function for that SRCU domain's
  * call_srcu() function, supplying the corresponding srcu_struct.
  *
- * If Tiny RCU, tell _wait_rcu_gp() not to bother waiting for RCU
- * or RCU-sched, given that anywhere synchronize_rcu_mult() can be called
- * is automatically a grace period.
+ * If Tiny RCU, tell _wait_rcu_gp() does not bother waiting for RCU,
+ * given that anywhere synchronize_rcu_mult() can be called is automatically
+ * a grace period.
  */
 #define synchronize_rcu_mult(...) \
 	_wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__)
-- 
cgit v1.2.3


From 4d232dfe1df35254298e7986c4de8c9f63f58c79 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 10 Jul 2018 12:53:40 -0700
Subject: rcu: Remove !PREEMPT code from rcu_note_voluntary_context_switch()

Because RCU-tasks exists only in PREEMPT kernels and because RCU-sched
no longer exists in PREEMPT kernels, it is no longer necessary for the
rcu_note_voluntary_context_switch() macro to do anything for !PREEMPT
kernels.  This commit therefore removes !PREEMPT-related code from
this macro, namely, the rcu_all_qs().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d6d543b60a9f..e4f821165d0b 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -150,18 +150,14 @@ static inline void rcu_init_nohz(void) { }
 		if (READ_ONCE((t)->rcu_tasks_holdout)) \
 			WRITE_ONCE((t)->rcu_tasks_holdout, false); \
 	} while (0)
-#define rcu_note_voluntary_context_switch(t) \
-	do { \
-		rcu_all_qs(); \
-		rcu_tasks_qs(t); \
-	} while (0)
+#define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t)
 void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
 void synchronize_rcu_tasks(void);
 void exit_tasks_rcu_start(void);
 void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define rcu_tasks_qs(t)	do { } while (0)
-#define rcu_note_voluntary_context_switch(t)		rcu_all_qs()
+#define rcu_note_voluntary_context_switch(t) do { } while (0)
 #define call_rcu_tasks call_rcu
 #define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
-- 
cgit v1.2.3


From 395a2f097ebdddf2bfa286b6119f1b231025c2f1 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 10 Jul 2018 14:00:14 -0700
Subject: rcu: Define rcu_all_qs() only in !PREEMPT builds

Now that rcu_all_qs() is used only in !PREEMPT builds, move it to
tree_plugin.h so that it is defined only in those builds.  This in
turn means that rcu_momentary_dyntick_idle() is only used in !PREEMPT
builds, but it is simply marked __maybe_unused in order to keep it
near the rest of the dyntick-idle code.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutree.h  |  2 ++
 kernel/rcu/tree.c        | 41 +----------------------------------------
 kernel/rcu/tree_plugin.h | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index d09a9abe9440..7f83179177d1 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -66,7 +66,9 @@ void rcu_scheduler_starting(void);
 extern int rcu_scheduler_active __read_mostly;
 void rcu_end_inkernel_boot(void);
 bool rcu_is_watching(void);
+#ifndef CONFIG_PREEMPT
 void rcu_all_qs(void);
+#endif
 
 /* RCUtree hotplug events */
 int rcutree_prepare_cpu(unsigned int cpu);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c8761e7c7c00..e140aaa78527 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -356,7 +356,7 @@ bool rcu_eqs_special_set(int cpu)
  *
  * The caller must have disabled interrupts and must not be idle.
  */
-static void rcu_momentary_dyntick_idle(void)
+static void __maybe_unused rcu_momentary_dyntick_idle(void)
 {
 	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 	int special;
@@ -381,45 +381,6 @@ static int rcu_is_cpu_rrupt_from_idle(void)
 	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
 }
 
-/*
- * Register an urgently needed quiescent state.  If there is an
- * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
- * dyntick-idle quiescent state visible to other CPUs, which will in
- * some cases serve for expedited as well as normal grace periods.
- * Either way, register a lightweight quiescent state.
- *
- * The barrier() calls are redundant in the common case when this is
- * called externally, but just in case this is called from within this
- * file.
- *
- */
-void rcu_all_qs(void)
-{
-	unsigned long flags;
-
-	if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))
-		return;
-	preempt_disable();
-	/* Load rcu_urgent_qs before other flags. */
-	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
-		preempt_enable();
-		return;
-	}
-	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
-	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
-		local_irq_save(flags);
-		rcu_momentary_dyntick_idle();
-		local_irq_restore(flags);
-	}
-	if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
-		rcu_qs();
-	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
-	barrier(); /* Avoid RCU read-side critical sections leaking up. */
-	preempt_enable();
-}
-EXPORT_SYMBOL_GPL(rcu_all_qs);
-
 #define DEFAULT_RCU_BLIMIT 10     /* Maximum callbacks per rcu_do_batch. */
 static long blimit = DEFAULT_RCU_BLIMIT;
 #define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index cd4c1b979446..7add1c297500 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -947,6 +947,45 @@ static void rcu_qs(void)
 	rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
 }
 
+/*
+ * Register an urgently needed quiescent state.  If there is an
+ * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
+ * dyntick-idle quiescent state visible to other CPUs, which will in
+ * some cases serve for expedited as well as normal grace periods.
+ * Either way, register a lightweight quiescent state.
+ *
+ * The barrier() calls are redundant in the common case when this is
+ * called externally, but just in case this is called from within this
+ * file.
+ *
+ */
+void rcu_all_qs(void)
+{
+	unsigned long flags;
+
+	if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))
+		return;
+	preempt_disable();
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
+		preempt_enable();
+		return;
+	}
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
+		local_irq_save(flags);
+		rcu_momentary_dyntick_idle();
+		local_irq_restore(flags);
+	}
+	if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
+		rcu_qs();
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(rcu_all_qs);
+
 /*
  * Note a PREEMPT=n context switch.  The caller must have disabled interrupts.
  */
-- 
cgit v1.2.3


From 74de6960c99d8df0d09fb29a7b014cb9c5571e2b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 24 Jul 2018 15:28:09 -0700
Subject: rcu: Provide functions for determining if call_rcu() has been invoked

This commit adds rcu_head_init() and rcu_head_after_call_rcu() functions
to help RCU users detect when another CPU has passed the specified
rcu_head structure and function to call_rcu().  The rcu_head_init()
should be invoked before making the structure visible to RCU readers,
and then the rcu_head_after_call_rcu() may be invoked from within
an RCU read-side critical section on an rcu_head structure that
was obtained during a traversal of the data structure in question.
The rcu_head_after_call_rcu() function will return true if the rcu_head
structure has already been passed (with the specified function) to
call_rcu(), otherwise it will return false.

If rcu_head_init() has not been invoked on the rcu_head structure
or if the rcu_head (AKA callback) has already been invoked, then
rcu_head_after_call_rcu() will do WARN_ON_ONCE().

Reported-by: NeilBrown <neilb@suse.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
[ paulmck: Apply neilb naming feedback. ]
---
 include/linux/rcupdate.h | 40 ++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/rcu.h         |  5 ++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e4f821165d0b..4db8bcacc51a 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -857,6 +857,46 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */
 
 
+/* Has the specified rcu_head structure been handed to call_rcu()? */
+
+/*
+ * rcu_head_init - Initialize rcu_head for rcu_head_after_call_rcu()
+ * @rhp: The rcu_head structure to initialize.
+ *
+ * If you intend to invoke rcu_head_after_call_rcu() to test whether a
+ * given rcu_head structure has already been passed to call_rcu(), then
+ * you must also invoke this rcu_head_init() function on it just after
+ * allocating that structure.  Calls to this function must not race with
+ * calls to call_rcu(), rcu_head_after_call_rcu(), or callback invocation.
+ */
+static inline void rcu_head_init(struct rcu_head *rhp)
+{
+	rhp->func = (rcu_callback_t)~0L;
+}
+
+/*
+ * rcu_head_after_call_rcu - Has this rcu_head been passed to call_rcu()?
+ * @rhp: The rcu_head structure to test.
+ * @func: The function passed to call_rcu() along with @rhp.
+ *
+ * Returns @true if the @rhp has been passed to call_rcu() with @func,
+ * and @false otherwise.  Emits a warning in any other case, including
+ * the case where @rhp has already been invoked after a grace period.
+ * Calls to this function must not race with callback invocation.  One way
+ * to avoid such races is to enclose the call to rcu_head_after_call_rcu()
+ * in an RCU read-side critical section that includes a read-side fetch
+ * of the pointer to the structure containing @rhp.
+ */
+static inline bool
+rcu_head_after_call_rcu(struct rcu_head *rhp, rcu_callback_t f)
+{
+	if (READ_ONCE(rhp->func) == f)
+		return true;
+	WARN_ON_ONCE(READ_ONCE(rhp->func) != (rcu_callback_t)~0L);
+	return false;
+}
+
+
 /* Transitional pre-consolidation compatibility definitions. */
 
 static inline void synchronize_rcu_bh(void)
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 5dec94509a7e..4c56c1d98fb3 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -224,6 +224,7 @@ void kfree(const void *);
  */
 static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 {
+	rcu_callback_t f;
 	unsigned long offset = (unsigned long)head->func;
 
 	rcu_lock_acquire(&rcu_callback_map);
@@ -234,7 +235,9 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
 		return true;
 	} else {
 		RCU_TRACE(trace_rcu_invoke_callback(rn, head);)
-		head->func(head);
+		f = head->func;
+		WRITE_ONCE(head->func, (rcu_callback_t)0L);
+		f(head);
 		rcu_lock_release(&rcu_callback_map);
 		return false;
 	}
-- 
cgit v1.2.3


From 31ab604bf3232374e6471a2df9a83c4e75538390 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2018 16:15:54 -0700
Subject: rcu: Remove unused rcu_dynticks_snap() from Tiny RCU

The rcu_dynticks_snap() function is defined in include/linux/rcutiny.h,
but is no longer used by Tiny RCU.  This commit therefore removes it.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcutiny.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7fa4fb9e899e..f183683bdf79 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,12 +27,6 @@
 
 #include <linux/ktime.h>
 
-struct rcu_dynticks;
-static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
-{
-	return 0;
-}
-
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }
 
-- 
cgit v1.2.3


From e0fcba9ac02af5aeb1e1c3e842eab987f817c309 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 14 Aug 2018 08:45:54 -0700
Subject: srcu: Make call_srcu() available during very early boot

Event tracing is moving to SRCU in order to take advantage of the fact
that SRCU may be safely used from idle and even offline CPUs.  However,
event tracing can invoke call_srcu() very early in the boot process,
even before workqueue_init_early() is invoked (let alone rcu_init()).
Therefore, call_srcu()'s attempts to queue work fail miserably.

This commit therefore detects this situation, and refrains from attempting
to queue work before rcu_init() time, but does everything else that it
would have done, and in addition, adds the srcu_struct to a global list.
The rcu_init() function now invokes a new srcu_init() function, which
is empty if CONFIG_SRCU=n.  Otherwise, srcu_init() queues work for
each srcu_struct on the list.  This all happens early enough in boot
that there is but a single CPU with interrupts disabled, which allows
synchronization to be dispensed with.

Of course, the queued work won't actually be invoked until after
workqueue_init() is invoked, which happens shortly after the scheduler
is up and running.  This means that although call_srcu() may be invoked
any time after per-CPU variables have been set up, there is still a very
narrow window when synchronize_srcu() won't work, and this window
extends from the time that the scheduler starts until the time that
workqueue_init() returns.  This can be fixed in a manner similar to
the fix for synchronize_rcu_expedited() and friends, but until someone
actually needs to use synchronize_srcu() during this window, this fix
is added churn for no benefit.

Finally, note that Tree SRCU's new srcu_init() function invokes
queue_work() rather than the queue_delayed_work() function that is
invoked post-boot.  The reason is that queue_delayed_work() will (as you
would expect) post a timer, and timers have not yet been initialized.
So use of queue_work() avoids the complaints about use of uninitialized
spinlocks that would otherwise result.  Besides, some delay is already
provide by the aforementioned fact that the queued work won't actually
be invoked until after the scheduler is up and running.

Requested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcutiny.h |  2 ++
 include/linux/srcutree.h | 14 ++++++++------
 kernel/rcu/rcu.h         |  6 ++++++
 kernel/rcu/srcutiny.c    | 29 +++++++++++++++++++++++++++--
 kernel/rcu/srcutree.c    | 26 ++++++++++++++++++++++++--
 kernel/rcu/tiny.c        |  1 +
 kernel/rcu/tree.c        |  1 +
 kernel/rcu/update.c      |  9 +++++++++
 8 files changed, 78 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index f41d2fb09f87..2b5c0822e683 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -36,6 +36,7 @@ struct srcu_struct {
 	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
 	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
+	struct list_head srcu_boot_entry; /* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -48,6 +49,7 @@ void srcu_drive_gp(struct work_struct *wp);
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
 	.srcu_cb_tail = &name.srcu_cb_head,				\
 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
+	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
 
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 745d4ca4dd50..9cfa4610113a 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -94,6 +94,7 @@ struct srcu_struct {
 						/*  callback for the barrier */
 						/*  operation. */
 	struct delayed_work work;
+	struct list_head srcu_boot_entry;	/* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -105,12 +106,13 @@ struct srcu_struct {
 #define SRCU_STATE_SCAN2	2
 
 #define __SRCU_STRUCT_INIT(name, pcpu_name)				\
-	{								\
-		.sda = &pcpu_name,					\
-		.lock = __SPIN_LOCK_UNLOCKED(name.lock),		\
-		.srcu_gp_seq_needed = 0 - 1,				\
-		__SRCU_DEP_MAP_INIT(name)				\
-	}
+{									\
+	.sda = &pcpu_name,						\
+	.lock = __SPIN_LOCK_UNLOCKED(name.lock),			\
+	.srcu_gp_seq_needed = -1UL,					\
+	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
+	__SRCU_DEP_MAP_INIT(name)					\
+}
 
 /*
  * Define and initialize a srcu struct at build time.
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 4d04683c31b2..e1b5aec5ec1c 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -435,6 +435,12 @@ do {									\
 
 #endif /* #if defined(SRCU) || !defined(TINY_RCU) */
 
+#ifdef CONFIG_SRCU
+void srcu_init(void);
+#else /* #ifdef CONFIG_SRCU */
+static inline void srcu_init(void) { }
+#endif /* #else #ifdef CONFIG_SRCU */
+
 #ifdef CONFIG_TINY_RCU
 /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
 static inline bool rcu_gp_is_normal(void) { return true; }
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 04fc2ed71af8..d233f0c63f6f 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -34,6 +34,8 @@
 #include "rcu.h"
 
 int rcu_scheduler_active __read_mostly;
+static LIST_HEAD(srcu_boot_list);
+static bool srcu_init_done;
 
 static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
@@ -46,6 +48,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	sp->srcu_gp_waiting = false;
 	sp->srcu_idx = 0;
 	INIT_WORK(&sp->srcu_work, srcu_drive_gp);
+	INIT_LIST_HEAD(&sp->srcu_boot_entry);
 	return 0;
 }
 
@@ -179,8 +182,12 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	*sp->srcu_cb_tail = rhp;
 	sp->srcu_cb_tail = &rhp->next;
 	local_irq_restore(flags);
-	if (!READ_ONCE(sp->srcu_gp_running))
-		schedule_work(&sp->srcu_work);
+	if (!READ_ONCE(sp->srcu_gp_running)) {
+		if (likely(srcu_init_done))
+			schedule_work(&sp->srcu_work);
+		else if (list_empty(&sp->srcu_boot_entry))
+			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
+	}
 }
 EXPORT_SYMBOL_GPL(call_srcu);
 
@@ -204,3 +211,21 @@ void __init rcu_scheduler_starting(void)
 {
 	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
 }
+
+/*
+ * Queue work for srcu_struct structures with early boot callbacks.
+ * The work won't actually execute until the workqueue initialization
+ * phase that takes place after the scheduler starts.
+ */
+void __init srcu_init(void)
+{
+	struct srcu_struct *sp;
+
+	srcu_init_done = true;
+	while (!list_empty(&srcu_boot_list)) {
+		sp = list_first_entry(&srcu_boot_list,
+				      struct srcu_struct, srcu_boot_entry);
+		list_del_init(&sp->srcu_boot_entry);
+		schedule_work(&sp->srcu_work);
+	}
+}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 6c9866a854b1..2e7f6b460150 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -51,6 +51,10 @@ module_param(exp_holdoff, ulong, 0444);
 static ulong counter_wrap_check = (ULONG_MAX >> 2);
 module_param(counter_wrap_check, ulong, 0444);
 
+/* Early-boot callback-management, so early that no lock is required! */
+static LIST_HEAD(srcu_boot_list);
+static bool __read_mostly srcu_init_done;
+
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
@@ -182,6 +186,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
 	mutex_init(&sp->srcu_barrier_mutex);
 	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
+	INIT_LIST_HEAD(&sp->srcu_boot_entry);
 	if (!is_static)
 		sp->sda = alloc_percpu(struct srcu_data);
 	init_srcu_struct_nodes(sp, is_static);
@@ -235,7 +240,6 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
 {
 	unsigned long flags;
 
-	WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT);
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
 	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
@@ -701,7 +705,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 	    rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
 		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
 		srcu_gp_start(sp);
-		queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp));
+		if (likely(srcu_init_done))
+			queue_delayed_work(rcu_gp_wq, &sp->work,
+					   srcu_get_delay(sp));
+		else if (list_empty(&sp->srcu_boot_entry))
+			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
 	}
 	spin_unlock_irqrestore_rcu_node(sp, flags);
 }
@@ -1308,3 +1316,17 @@ static int __init srcu_bootup_announce(void)
 	return 0;
 }
 early_initcall(srcu_bootup_announce);
+
+void __init srcu_init(void)
+{
+	struct srcu_struct *sp;
+
+	srcu_init_done = true;
+	while (!list_empty(&srcu_boot_list)) {
+		sp = list_first_entry(&srcu_boot_list,
+				      struct srcu_struct, srcu_boot_entry);
+		check_init_srcu_struct(sp);
+		list_del_init(&sp->srcu_boot_entry);
+		queue_work(rcu_gp_wq, &sp->work.work);
+	}
+}
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index befc9321a89c..101ed5bb836c 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -236,4 +236,5 @@ void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 	rcu_early_boot_tests();
+	srcu_init();
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0b760c1369f7..43c806291208 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4164,6 +4164,7 @@ void __init rcu_init(void)
 	WARN_ON(!rcu_gp_wq);
 	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
 	WARN_ON(!rcu_par_gp_wq);
+	srcu_init();
 }
 
 #include "tree_exp.h"
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39cb23d22109..7d057d0aaec4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -888,11 +888,16 @@ static void test_callback(struct rcu_head *r)
 	pr_info("RCU test callback executed %d\n", rcu_self_test_counter);
 }
 
+DEFINE_STATIC_SRCU(early_srcu);
+
 static void early_boot_test_call_rcu(void)
 {
 	static struct rcu_head head;
+	static struct rcu_head shead;
 
 	call_rcu(&head, test_callback);
+	if (IS_ENABLED(CONFIG_SRCU))
+		call_srcu(&early_srcu, &shead, test_callback);
 }
 
 static void early_boot_test_call_rcu_bh(void)
@@ -930,6 +935,10 @@ static int rcu_verify_early_boot_tests(void)
 	if (rcu_self_test) {
 		early_boot_test_counter++;
 		rcu_barrier();
+		if (IS_ENABLED(CONFIG_SRCU)) {
+			early_boot_test_counter++;
+			srcu_barrier(&early_srcu);
+		}
 	}
 	if (rcu_self_test_bh) {
 		early_boot_test_counter++;
-- 
cgit v1.2.3


From 4e6ea4ef56f9425cd239ffdb6be45b3aeeb347fd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 14 Aug 2018 14:41:49 -0700
Subject: srcu: Make early-boot call_srcu() reuse workqueue lists

Allocating a list_head structure that is almost never used, and, when
used, is used only during early boot (rcu_init() and earlier), is a bit
wasteful.  This commit therefore eliminates that list_head in favor of
the one in the work_struct structure.  This is safe because the work_struct
structure cannot be used until after rcu_init() returns.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Lai Jiangshan <jiangshanlai@gmail.com>
Tested-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/srcutiny.h |  2 --
 include/linux/srcutree.h |  3 +--
 kernel/rcu/srcutiny.c    | 10 +++++-----
 kernel/rcu/srcutree.c    | 11 +++++------
 4 files changed, 11 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 2b5c0822e683..f41d2fb09f87 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -36,7 +36,6 @@ struct srcu_struct {
 	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
 	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
-	struct list_head srcu_boot_entry; /* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -49,7 +48,6 @@ void srcu_drive_gp(struct work_struct *wp);
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
 	.srcu_cb_tail = &name.srcu_cb_head,				\
 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
-	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
 
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 9cfa4610113a..0ae91b3a7406 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -94,7 +94,6 @@ struct srcu_struct {
 						/*  callback for the barrier */
 						/*  operation. */
 	struct delayed_work work;
-	struct list_head srcu_boot_entry;	/* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -110,7 +109,7 @@ struct srcu_struct {
 	.sda = &pcpu_name,						\
 	.lock = __SPIN_LOCK_UNLOCKED(name.lock),			\
 	.srcu_gp_seq_needed = -1UL,					\
-	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
+	.work = __DELAYED_WORK_INITIALIZER(name.work, NULL, 0),		\
 	__SRCU_DEP_MAP_INIT(name)					\
 }
 
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index d233f0c63f6f..b46e6683f8c9 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -48,7 +48,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	sp->srcu_gp_waiting = false;
 	sp->srcu_idx = 0;
 	INIT_WORK(&sp->srcu_work, srcu_drive_gp);
-	INIT_LIST_HEAD(&sp->srcu_boot_entry);
+	INIT_LIST_HEAD(&sp->srcu_work.entry);
 	return 0;
 }
 
@@ -185,8 +185,8 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	if (!READ_ONCE(sp->srcu_gp_running)) {
 		if (likely(srcu_init_done))
 			schedule_work(&sp->srcu_work);
-		else if (list_empty(&sp->srcu_boot_entry))
-			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
+		else if (list_empty(&sp->srcu_work.entry))
+			list_add(&sp->srcu_work.entry, &srcu_boot_list);
 	}
 }
 EXPORT_SYMBOL_GPL(call_srcu);
@@ -224,8 +224,8 @@ void __init srcu_init(void)
 	srcu_init_done = true;
 	while (!list_empty(&srcu_boot_list)) {
 		sp = list_first_entry(&srcu_boot_list,
-				      struct srcu_struct, srcu_boot_entry);
-		list_del_init(&sp->srcu_boot_entry);
+				      struct srcu_struct, srcu_work.entry);
+		list_del_init(&sp->srcu_work.entry);
 		schedule_work(&sp->srcu_work);
 	}
 }
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 2e7f6b460150..86c7fd0a1bfe 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -186,7 +186,6 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
 	mutex_init(&sp->srcu_barrier_mutex);
 	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
-	INIT_LIST_HEAD(&sp->srcu_boot_entry);
 	if (!is_static)
 		sp->sda = alloc_percpu(struct srcu_data);
 	init_srcu_struct_nodes(sp, is_static);
@@ -708,8 +707,8 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 		if (likely(srcu_init_done))
 			queue_delayed_work(rcu_gp_wq, &sp->work,
 					   srcu_get_delay(sp));
-		else if (list_empty(&sp->srcu_boot_entry))
-			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
+		else if (list_empty(&sp->work.work.entry))
+			list_add(&sp->work.work.entry, &srcu_boot_list);
 	}
 	spin_unlock_irqrestore_rcu_node(sp, flags);
 }
@@ -1323,10 +1322,10 @@ void __init srcu_init(void)
 
 	srcu_init_done = true;
 	while (!list_empty(&srcu_boot_list)) {
-		sp = list_first_entry(&srcu_boot_list,
-				      struct srcu_struct, srcu_boot_entry);
+		sp = list_first_entry(&srcu_boot_list, struct srcu_struct,
+				      work.work.entry);
 		check_init_srcu_struct(sp);
-		list_del_init(&sp->srcu_boot_entry);
+		list_del_init(&sp->work.work.entry);
 		queue_work(rcu_gp_wq, &sp->work.work);
 	}
 }
-- 
cgit v1.2.3


From ef394f3fbecbe61d69450ad8cf0fa8f713c8ce8f Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 31 Aug 2018 14:34:02 +0200
Subject: regulator: da9063: fix DT probing with constraints

Commit 1c892e38ce59 ("regulator: da9063: Handle less LDOs on DA9063L")
reordered the da9063_regulator_info[] array, but not the DA9063_ID_*
regulator ids and not the da9063_matches[] array, because ids are used
as indices in the array initializer. This mismatch between regulator id
and da9063_regulator_info[] array index causes the driver probe to fail
because constraints from DT are not applied to the correct regulator:

  da9063 0-0058: Device detected (chip-ID: 0x61, var-ID: 0x50)
  DA9063_BMEM: Bringing 900000uV into 3300000-3300000uV
  DA9063_LDO9: Bringing 3300000uV into 2500000-2500000uV
  DA9063_LDO1: Bringing 900000uV into 3300000-3300000uV
  DA9063_LDO1: failed to apply 3300000-3300000uV constraint(-22)

This patch reorders the DA9063_ID_* as apparently intended, and with
them the entries in the da90630_matches[] array.

Fixes: 1c892e38ce59 ("regulator: da9063: Handle less LDOs on DA9063L")
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/da9063/pdata.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
index 8a125701ef7b..50bed4f89c1a 100644
--- a/include/linux/mfd/da9063/pdata.h
+++ b/include/linux/mfd/da9063/pdata.h
@@ -21,7 +21,7 @@
 /*
  * Regulator configuration
  */
-/* DA9063 regulator IDs */
+/* DA9063 and DA9063L regulator IDs */
 enum {
 	/* BUCKs */
 	DA9063_ID_BCORE1,
@@ -37,18 +37,20 @@ enum {
 	DA9063_ID_BMEM_BIO_MERGED,
 	/* When two BUCKs are merged, they cannot be reused separately */
 
-	/* LDOs */
+	/* LDOs on both DA9063 and DA9063L */
+	DA9063_ID_LDO3,
+	DA9063_ID_LDO7,
+	DA9063_ID_LDO8,
+	DA9063_ID_LDO9,
+	DA9063_ID_LDO11,
+
+	/* DA9063-only LDOs */
 	DA9063_ID_LDO1,
 	DA9063_ID_LDO2,
-	DA9063_ID_LDO3,
 	DA9063_ID_LDO4,
 	DA9063_ID_LDO5,
 	DA9063_ID_LDO6,
-	DA9063_ID_LDO7,
-	DA9063_ID_LDO8,
-	DA9063_ID_LDO9,
 	DA9063_ID_LDO10,
-	DA9063_ID_LDO11,
 };
 
 /* Regulators platform data */
-- 
cgit v1.2.3


From 6b06546206868f723f2061d703a3c3c378dcbf4c Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Fri, 31 Aug 2018 16:22:42 -0400
Subject: Revert "blk-throttle: fix race between blkcg_bio_issue_check() and
 cgroup_rmdir()"

This reverts commit 4c6994806f708559c2812b73501406e21ae5dcd0.

Destroying blkgs is tricky because of the nature of the relationship. A
blkg should go away when either a blkcg or a request_queue goes away.
However, blkg's pin the blkcg to ensure they remain valid. To break this
cycle, when a blkcg is offlined, blkgs put back their css ref. This
eventually lets css_free() get called which frees the blkcg.

The above commit (4c6994806f70) breaks this order of events by trying to
destroy blkgs in css_free(). As the blkgs still hold references to the
blkcg, css_free() is never called.

The race between blkcg_bio_issue_check() and cgroup_rmdir() will be
addressed in the following patch by delaying destruction of a blkg until
all writeback associated with the blkcg has been finished.

Fixes: 4c6994806f70 ("blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Cc: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 78 ++++++++++------------------------------------
 include/linux/blk-cgroup.h |  1 -
 2 files changed, 16 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 694595b29b8f..2998e4f095d1 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	}
 }
 
-static void blkg_pd_offline(struct blkcg_gq *blkg)
-{
-	int i;
-
-	lockdep_assert_held(blkg->q->queue_lock);
-	lockdep_assert_held(&blkg->blkcg->lock);
-
-	for (i = 0; i < BLKCG_MAX_POLS; i++) {
-		struct blkcg_policy *pol = blkcg_policy[i];
-
-		if (blkg->pd[i] && !blkg->pd[i]->offline &&
-		    pol->pd_offline_fn) {
-			pol->pd_offline_fn(blkg->pd[i]);
-			blkg->pd[i]->offline = true;
-		}
-	}
-}
-
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
 	struct blkcg_gq *parent = blkg->parent;
+	int i;
 
 	lockdep_assert_held(blkg->q->queue_lock);
 	lockdep_assert_held(&blkcg->lock);
@@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	WARN_ON_ONCE(list_empty(&blkg->q_node));
 	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
 
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
+		struct blkcg_policy *pol = blkcg_policy[i];
+
+		if (blkg->pd[i] && pol->pd_offline_fn)
+			pol->pd_offline_fn(blkg->pd[i]);
+	}
+
 	if (parent) {
 		blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
 		blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
@@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q)
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
-		blkg_pd_offline(blkg);
 		blkg_destroy(blkg);
 		spin_unlock(&blkcg->lock);
 	}
@@ -1058,54 +1047,21 @@ static struct cftype blkcg_legacy_files[] = {
  * @css: css of interest
  *
  * This function is called when @css is about to go away and responsible
- * for offlining all blkgs pd and killing all wbs associated with @css.
- * blkgs pd offline should be done while holding both q and blkcg locks.
- * As blkcg lock is nested inside q lock, this function performs reverse
- * double lock dancing.
+ * for shooting down all blkgs associated with @css.  blkgs should be
+ * removed while holding both q and blkcg locks.  As blkcg lock is nested
+ * inside q lock, this function performs reverse double lock dancing.
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
 static void blkcg_css_offline(struct cgroup_subsys_state *css)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
-	struct blkcg_gq *blkg;
 
 	spin_lock_irq(&blkcg->lock);
 
-	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
-		struct request_queue *q = blkg->q;
-
-		if (spin_trylock(q->queue_lock)) {
-			blkg_pd_offline(blkg);
-			spin_unlock(q->queue_lock);
-		} else {
-			spin_unlock_irq(&blkcg->lock);
-			cpu_relax();
-			spin_lock_irq(&blkcg->lock);
-		}
-	}
-
-	spin_unlock_irq(&blkcg->lock);
-
-	wb_blkcg_offline(blkcg);
-}
-
-/**
- * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
- * @blkcg: blkcg of interest
- *
- * This function is called when blkcg css is about to free and responsible for
- * destroying all blkgs associated with @blkcg.
- * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
- * is nested inside q lock, this function performs reverse double lock dancing.
- */
-static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
-{
-	spin_lock_irq(&blkcg->lock);
 	while (!hlist_empty(&blkcg->blkg_list)) {
 		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
-						    struct blkcg_gq,
-						    blkcg_node);
+						struct blkcg_gq, blkcg_node);
 		struct request_queue *q = blkg->q;
 
 		if (spin_trylock(q->queue_lock)) {
@@ -1117,7 +1073,10 @@ static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
 			spin_lock_irq(&blkcg->lock);
 		}
 	}
+
 	spin_unlock_irq(&blkcg->lock);
+
+	wb_blkcg_offline(blkcg);
 }
 
 static void blkcg_css_free(struct cgroup_subsys_state *css)
@@ -1125,8 +1084,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
 	struct blkcg *blkcg = css_to_blkcg(css);
 	int i;
 
-	blkcg_destroy_all_blkgs(blkcg);
-
 	mutex_lock(&blkcg_pol_mutex);
 
 	list_del(&blkcg->all_blkcgs_node);
@@ -1480,11 +1437,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
 
 	list_for_each_entry(blkg, &q->blkg_list, q_node) {
 		if (blkg->pd[pol->plid]) {
-			if (!blkg->pd[pol->plid]->offline &&
-			    pol->pd_offline_fn) {
+			if (pol->pd_offline_fn)
 				pol->pd_offline_fn(blkg->pd[pol->plid]);
-				blkg->pd[pol->plid]->offline = true;
-			}
 			pol->pd_free_fn(blkg->pd[pol->plid]);
 			blkg->pd[pol->plid] = NULL;
 		}
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 34aec30e06c7..1615cdd4c797 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -89,7 +89,6 @@ struct blkg_policy_data {
 	/* the blkg and policy id this per-policy data belongs to */
 	struct blkcg_gq			*blkg;
 	int				plid;
-	bool				offline;
 };
 
 /*
-- 
cgit v1.2.3


From 59b57717fff8b562825d9d25e0180ad7e8048ca9 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Fri, 31 Aug 2018 16:22:43 -0400
Subject: blkcg: delay blkg destruction until after writeback has finished

Currently, blkcg destruction relies on a sequence of events:
  1. Destruction starts. blkcg_css_offline() is called and blkgs
     release their reference to the blkcg. This immediately destroys
     the cgwbs (writeback).
  2. With blkgs giving up their reference, the blkcg ref count should
     become zero and eventually call blkcg_css_free() which finally
     frees the blkcg.

Jiufei Xue reported that there is a race between blkcg_bio_issue_check()
and cgroup_rmdir(). To remedy this, blkg destruction becomes contingent
on the completion of all writeback associated with the blkcg. A count of
the number of cgwbs is maintained and once that goes to zero, blkg
destruction can follow. This should prevent premature blkg destruction
related to writeback.

The new process for blkcg cleanup is as follows:
  1. Destruction starts. blkcg_css_offline() is called which offlines
     writeback. Blkg destruction is delayed on the cgwb_refcnt count to
     avoid punting potentially large amounts of outstanding writeback
     to root while maintaining any ongoing policies. Here, the base
     cgwb_refcnt is put back.
  2. When the cgwb_refcnt becomes zero, blkcg_destroy_blkgs() is called
     and handles destruction of blkgs. This is where the css reference
     held by each blkg is released.
  3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
     This finally frees the blkg.

It seems in the past blk-throttle didn't do the most understandable
things with taking data from a blkg while associating with current. So,
the simplification and unification of what blk-throttle is doing caused
this.

Fixes: 08e18eab0c579 ("block: add bi_blkg to the bio for cgroups")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Cc: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 53 +++++++++++++++++++++++++++++++++++++++-------
 include/linux/blk-cgroup.h | 44 ++++++++++++++++++++++++++++++++++++++
 mm/backing-dev.c           |  5 +++++
 3 files changed, 94 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2998e4f095d1..c19f9078da1e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1042,21 +1042,59 @@ static struct cftype blkcg_legacy_files[] = {
 	{ }	/* terminate */
 };
 
+/*
+ * blkcg destruction is a three-stage process.
+ *
+ * 1. Destruction starts.  The blkcg_css_offline() callback is invoked
+ *    which offlines writeback.  Here we tie the next stage of blkg destruction
+ *    to the completion of writeback associated with the blkcg.  This lets us
+ *    avoid punting potentially large amounts of outstanding writeback to root
+ *    while maintaining any ongoing policies.  The next stage is triggered when
+ *    the nr_cgwbs count goes to zero.
+ *
+ * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
+ *    and handles the destruction of blkgs.  Here the css reference held by
+ *    the blkg is put back eventually allowing blkcg_css_free() to be called.
+ *    This work may occur in cgwb_release_workfn() on the cgwb_release
+ *    workqueue.  Any submitted ios that fail to get the blkg ref will be
+ *    punted to the root_blkg.
+ *
+ * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
+ *    This finally frees the blkcg.
+ */
+
 /**
  * blkcg_css_offline - cgroup css_offline callback
  * @css: css of interest
  *
- * This function is called when @css is about to go away and responsible
- * for shooting down all blkgs associated with @css.  blkgs should be
- * removed while holding both q and blkcg locks.  As blkcg lock is nested
- * inside q lock, this function performs reverse double lock dancing.
- *
- * This is the blkcg counterpart of ioc_release_fn().
+ * This function is called when @css is about to go away.  Here the cgwbs are
+ * offlined first and only once writeback associated with the blkcg has
+ * finished do we start step 2 (see above).
  */
 static void blkcg_css_offline(struct cgroup_subsys_state *css)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
 
+	/* this prevents anyone from attaching or migrating to this blkcg */
+	wb_blkcg_offline(blkcg);
+
+	/* put the base cgwb reference allowing step 2 to be triggered */
+	blkcg_cgwb_put(blkcg);
+}
+
+/**
+ * blkcg_destroy_blkgs - responsible for shooting down blkgs
+ * @blkcg: blkcg of interest
+ *
+ * blkgs should be removed while holding both q and blkcg locks.  As blkcg lock
+ * is nested inside q lock, this function performs reverse double lock dancing.
+ * Destroying the blkgs releases the reference held on the blkcg's css allowing
+ * blkcg_css_free to eventually be called.
+ *
+ * This is the blkcg counterpart of ioc_release_fn().
+ */
+void blkcg_destroy_blkgs(struct blkcg *blkcg)
+{
 	spin_lock_irq(&blkcg->lock);
 
 	while (!hlist_empty(&blkcg->blkg_list)) {
@@ -1075,8 +1113,6 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
 	}
 
 	spin_unlock_irq(&blkcg->lock);
-
-	wb_blkcg_offline(blkcg);
 }
 
 static void blkcg_css_free(struct cgroup_subsys_state *css)
@@ -1146,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 	INIT_HLIST_HEAD(&blkcg->blkg_list);
 #ifdef CONFIG_CGROUP_WRITEBACK
 	INIT_LIST_HEAD(&blkcg->cgwb_list);
+	refcount_set(&blkcg->cgwb_refcnt, 1);
 #endif
 	list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
 
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1615cdd4c797..6d766a19f2bb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -56,6 +56,7 @@ struct blkcg {
 	struct list_head		all_blkcgs_node;
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct list_head		cgwb_list;
+	refcount_t			cgwb_refcnt;
 #endif
 };
 
@@ -386,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
 	return cpd ? cpd->blkcg : NULL;
 }
 
+extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+/**
+ * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
+ * @blkcg: blkcg of interest
+ *
+ * This is used to track the number of active wb's related to a blkcg.
+ */
+static inline void blkcg_cgwb_get(struct blkcg *blkcg)
+{
+	refcount_inc(&blkcg->cgwb_refcnt);
+}
+
+/**
+ * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
+ * @blkcg: blkcg of interest
+ *
+ * This is used to track the number of active wb's related to a blkcg.
+ * When this count goes to zero, all active wb has finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ * This work may occur in cgwb_release_workfn() on the cgwb_release
+ * workqueue.
+ */
+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
+{
+	if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
+		blkcg_destroy_blkgs(blkcg);
+}
+
+#else
+
+static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
+
+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
+{
+	/* wb isn't being accounted, so trigger destruction right away */
+	blkcg_destroy_blkgs(blkcg);
+}
+
+#endif
+
 /**
  * blkg_path - format cgroup path of blkg
  * @blkg: blkg of interest
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f5981e9d6ae2..8a8bb8796c6c 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work)
 {
 	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
 						release_work);
+	struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
 
 	mutex_lock(&wb->bdi->cgwb_release_mutex);
 	wb_shutdown(wb);
@@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work)
 	css_put(wb->blkcg_css);
 	mutex_unlock(&wb->bdi->cgwb_release_mutex);
 
+	/* triggers blkg destruction if cgwb_refcnt becomes zero */
+	blkcg_cgwb_put(blkcg);
+
 	fprop_local_destroy_percpu(&wb->memcg_completions);
 	percpu_ref_exit(&wb->refcnt);
 	wb_exit(wb);
@@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
 			list_add(&wb->memcg_node, memcg_cgwb_list);
 			list_add(&wb->blkcg_node, blkcg_cgwb_list);
+			blkcg_cgwb_get(blkcg);
 			css_get(memcg_css);
 			css_get(blkcg_css);
 		}
-- 
cgit v1.2.3


From c43c5e9f524ec914e7e202f9c5ab91779629ccc6 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 3 Sep 2018 10:15:33 +0200
Subject: timekeeping: Fix declaration of
 read_persistent_wall_and_boot_offset()

It is read_persistent_wall_and_boot_offset() and not
read_persistent_clock_and_boot_offset()

Fixes: 3eca993740b8eb40f51 ("timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset()")
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Link: https://lkml.kernel.org/r/20180903081533.34366-1-borntraeger@de.ibm.com
---
 include/linux/timekeeping.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 5d738804e3d6..a5a3cfc3c2fa 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -258,8 +258,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
 extern int persistent_clock_is_local;
 
 extern void read_persistent_clock64(struct timespec64 *ts);
-void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock,
-					   struct timespec64 *boot_offset);
+void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock,
+					  struct timespec64 *boot_offset);
 extern int update_persistent_clock64(struct timespec64 now);
 
 /*
-- 
cgit v1.2.3


From 4ac695464763ecf696eaba563eff1c2ab994f6d8 Mon Sep 17 00:00:00 2001
From: Eric Long <eric.long@spreadtrum.com>
Date: Tue, 28 Aug 2018 19:09:07 +0800
Subject: dmaengine: sprd: Support DMA link-list mode

The Spreadtrum DMA can support the link-list transaction mode, which means
DMA controller can do transaction one by one automatically once we linked
these transaction by link-list register.

Signed-off-by: Eric Long <eric.long@spreadtrum.com>
Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/sprd-dma.c       | 81 ++++++++++++++++++++++++++++++++++++++++----
 include/linux/dma/sprd-dma.h | 69 +++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
index 55df0d41355b..38d4e4f07c66 100644
--- a/drivers/dma/sprd-dma.c
+++ b/drivers/dma/sprd-dma.c
@@ -68,6 +68,7 @@
 
 /* SPRD_DMA_CHN_CFG register definition */
 #define SPRD_DMA_CHN_EN			BIT(0)
+#define SPRD_DMA_LINKLIST_EN		BIT(4)
 #define SPRD_DMA_WAIT_BDONE_OFFSET	24
 #define SPRD_DMA_DONOT_WAIT_BDONE	1
 
@@ -103,7 +104,7 @@
 #define SPRD_DMA_REQ_MODE_MASK		GENMASK(1, 0)
 #define SPRD_DMA_FIX_SEL_OFFSET		21
 #define SPRD_DMA_FIX_EN_OFFSET		20
-#define SPRD_DMA_LLIST_END_OFFSET	19
+#define SPRD_DMA_LLIST_END		BIT(19)
 #define SPRD_DMA_FRG_LEN_MASK		GENMASK(16, 0)
 
 /* SPRD_DMA_CHN_BLK_LEN register definition */
@@ -164,6 +165,7 @@ struct sprd_dma_desc {
 struct sprd_dma_chn {
 	struct virt_dma_chan	vc;
 	void __iomem		*chn_base;
+	struct sprd_dma_linklist	linklist;
 	struct dma_slave_config	slave_cfg;
 	u32			chn_num;
 	u32			dev_id;
@@ -582,7 +584,8 @@ static int sprd_dma_get_step(enum dma_slave_buswidth buswidth)
 }
 
 static int sprd_dma_fill_desc(struct dma_chan *chan,
-			      struct sprd_dma_desc *sdesc,
+			      struct sprd_dma_chn_hw *hw,
+			      unsigned int sglen, int sg_index,
 			      dma_addr_t src, dma_addr_t dst, u32 len,
 			      enum dma_transfer_direction dir,
 			      unsigned long flags,
@@ -590,7 +593,6 @@ static int sprd_dma_fill_desc(struct dma_chan *chan,
 {
 	struct sprd_dma_dev *sdev = to_sprd_dma_dev(chan);
 	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
-	struct sprd_dma_chn_hw *hw = &sdesc->chn_hw;
 	u32 req_mode = (flags >> SPRD_DMA_REQ_SHIFT) & SPRD_DMA_REQ_MODE_MASK;
 	u32 int_mode = flags & SPRD_DMA_INT_MASK;
 	int src_datawidth, dst_datawidth, src_step, dst_step;
@@ -670,12 +672,52 @@ static int sprd_dma_fill_desc(struct dma_chan *chan,
 	temp |= (src_step & SPRD_DMA_TRSF_STEP_MASK) << SPRD_DMA_SRC_TRSF_STEP_OFFSET;
 	hw->trsf_step = temp;
 
+	/* link-list configuration */
+	if (schan->linklist.phy_addr) {
+		if (sg_index == sglen - 1)
+			hw->frg_len |= SPRD_DMA_LLIST_END;
+
+		hw->cfg |= SPRD_DMA_LINKLIST_EN;
+
+		/* link-list index */
+		temp = (sg_index + 1) % sglen;
+		/* Next link-list configuration's physical address offset */
+		temp = temp * sizeof(*hw) + SPRD_DMA_CHN_SRC_ADDR;
+		/*
+		 * Set the link-list pointer point to next link-list
+		 * configuration's physical address.
+		 */
+		hw->llist_ptr = schan->linklist.phy_addr + temp;
+	} else {
+		hw->llist_ptr = 0;
+	}
+
 	hw->frg_step = 0;
 	hw->src_blk_step = 0;
 	hw->des_blk_step = 0;
 	return 0;
 }
 
+static int sprd_dma_fill_linklist_desc(struct dma_chan *chan,
+				       unsigned int sglen, int sg_index,
+				       dma_addr_t src, dma_addr_t dst, u32 len,
+				       enum dma_transfer_direction dir,
+				       unsigned long flags,
+				       struct dma_slave_config *slave_cfg)
+{
+	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
+	struct sprd_dma_chn_hw *hw;
+
+	if (!schan->linklist.virt_addr)
+		return -EINVAL;
+
+	hw = (struct sprd_dma_chn_hw *)(schan->linklist.virt_addr +
+					sg_index * sizeof(*hw));
+
+	return sprd_dma_fill_desc(chan, hw, sglen, sg_index, src, dst, len,
+				  dir, flags, slave_cfg);
+}
+
 static struct dma_async_tx_descriptor *
 sprd_dma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 			 size_t len, unsigned long flags)
@@ -744,10 +786,20 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	u32 len = 0;
 	int ret, i;
 
-	/* TODO: now we only support one sg for each DMA configuration. */
-	if (!is_slave_direction(dir) || sglen > 1)
+	if (!is_slave_direction(dir))
 		return NULL;
 
+	if (context) {
+		struct sprd_dma_linklist *ll_cfg =
+			(struct sprd_dma_linklist *)context;
+
+		schan->linklist.phy_addr = ll_cfg->phy_addr;
+		schan->linklist.virt_addr = ll_cfg->virt_addr;
+	} else {
+		schan->linklist.phy_addr = 0;
+		schan->linklist.virt_addr = 0;
+	}
+
 	sdesc = kzalloc(sizeof(*sdesc), GFP_NOWAIT);
 	if (!sdesc)
 		return NULL;
@@ -762,10 +814,25 @@ sprd_dma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			src = slave_cfg->src_addr;
 			dst = sg_dma_address(sg);
 		}
+
+		/*
+		 * The link-list mode needs at least 2 link-list
+		 * configurations. If there is only one sg, it doesn't
+		 * need to fill the link-list configuration.
+		 */
+		if (sglen < 2)
+			break;
+
+		ret = sprd_dma_fill_linklist_desc(chan, sglen, i, src, dst, len,
+						  dir, flags, slave_cfg);
+		if (ret) {
+			kfree(sdesc);
+			return NULL;
+		}
 	}
 
-	ret = sprd_dma_fill_desc(chan, sdesc, src, dst, len, dir, flags,
-				 slave_cfg);
+	ret = sprd_dma_fill_desc(chan, &sdesc->chn_hw, 0, 0, src, dst, len,
+				 dir, flags, slave_cfg);
 	if (ret) {
 		kfree(sdesc);
 		return NULL;
diff --git a/include/linux/dma/sprd-dma.h b/include/linux/dma/sprd-dma.h
index b0115e340fbc..b42b80e52cc2 100644
--- a/include/linux/dma/sprd-dma.h
+++ b/include/linux/dma/sprd-dma.h
@@ -58,4 +58,73 @@ enum sprd_dma_int_type {
 	SPRD_DMA_CFGERR_INT,
 };
 
+/*
+ * struct sprd_dma_linklist - DMA link-list address structure
+ * @virt_addr: link-list virtual address to configure link-list node
+ * @phy_addr: link-list physical address to link DMA transfer
+ *
+ * The Spreadtrum DMA controller supports the link-list mode, that means slaves
+ * can supply several groups configurations (each configuration represents one
+ * DMA transfer) saved in memory, and DMA controller will link these groups
+ * configurations by writing the physical address of each configuration into the
+ * link-list register.
+ *
+ * Just as shown below, the link-list pointer register will be pointed to the
+ * physical address of 'configuration 1', and the 'configuration 1' link-list
+ * pointer will be pointed to 'configuration 2', and so on.
+ * Once trigger the DMA transfer, the DMA controller will load 'configuration
+ * 1' to its registers automatically, after 'configuration 1' transaction is
+ * done, DMA controller will load 'configuration 2' automatically, until all
+ * DMA transactions are done.
+ *
+ * Note: The last link-list pointer should point to the physical address
+ * of 'configuration 1', which can avoid DMA controller loads incorrect
+ * configuration when the last configuration transaction is done.
+ *
+ *     DMA controller                    linklist memory
+ * ======================             -----------------------
+ *|                      |           |    configuration 1    |<---
+ *|   DMA controller     |   ------->|                       |   |
+ *|                      |   |       |                       |   |
+ *|                      |   |       |                       |   |
+ *|                      |   |       |                       |   |
+ *| linklist pointer reg |----   ----|    linklist pointer   |   |
+ * ======================        |    -----------------------    |
+ *                               |                               |
+ *                               |    -----------------------    |
+ *                               |   |    configuration 2    |   |
+ *                               --->|                       |   |
+ *                                   |                       |   |
+ *                                   |                       |   |
+ *                                   |                       |   |
+ *                               ----|    linklist pointer   |   |
+ *                               |    -----------------------    |
+ *                               |                               |
+ *                               |    -----------------------    |
+ *                               |   |    configuration 3    |   |
+ *                               --->|                       |   |
+ *                                   |                       |   |
+ *                                   |           .           |   |
+ *                                               .               |
+ *                                               .               |
+ *                                               .               |
+ *                               |               .               |
+ *                               |    -----------------------    |
+ *                               |   |    configuration n    |   |
+ *                               --->|                       |   |
+ *                                   |                       |   |
+ *                                   |                       |   |
+ *                                   |                       |   |
+ *                                   |    linklist pointer   |----
+ *                                    -----------------------
+ *
+ * To support the link-list mode, DMA slaves should allocate one segment memory
+ * from always-on IRAM or dma coherent memory to store these groups of DMA
+ * configuration, and pass the virtual and physical address to DMA controller.
+ */
+struct sprd_dma_linklist {
+	unsigned long virt_addr;
+	phys_addr_t phy_addr;
+};
+
 #endif
-- 
cgit v1.2.3


From 9da3f2b74054406f87dff7101a569217ffceb29b Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 28 Aug 2018 22:14:20 +0200
Subject: x86/fault: BUG() when uaccess helpers fault on kernel addresses

There have been multiple kernel vulnerabilities that permitted userspace to
pass completely unchecked pointers through to userspace accessors:

 - the waitid() bug - commit 96ca579a1ecc ("waitid(): Add missing
   access_ok() checks")
 - the sg/bsg read/write APIs
 - the infiniband read/write APIs

These don't happen all that often, but when they do happen, it is hard to
test for them properly; and it is probably also hard to discover them with
fuzzing. Even when an unmapped kernel address is supplied to such buggy
code, it just returns -EFAULT instead of doing a proper BUG() or at least
WARN().

Try to make such misbehaving code a bit more visible by refusing to do a
fixup in the pagefault handler code when a userspace accessor causes a #PF
on a kernel address and the current context isn't whitelisted.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: kernel-hardening@lists.openwall.com
Cc: dvyukov@google.com
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Cc: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: Borislav Petkov <bp@alien8.de>
Link: https://lkml.kernel.org/r/20180828201421.157735-7-jannh@google.com
---
 arch/x86/mm/extable.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/namespace.c        |  2 ++
 include/linux/sched.h |  6 ++++++
 mm/maccess.c          |  6 ++++++
 4 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 856fa409c536..6521134057e8 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -117,11 +117,67 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL_GPL(ex_handler_fprestore);
 
+/* Helper to check whether a uaccess fault indicates a kernel bug. */
+static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
+			  unsigned long fault_addr)
+{
+	/* This is the normal case: #PF with a fault address in userspace. */
+	if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
+		return false;
+
+	/*
+	 * This code can be reached for machine checks, but only if the #MC
+	 * handler has already decided that it looks like a candidate for fixup.
+	 * This e.g. happens when attempting to access userspace memory which
+	 * the CPU can't access because of uncorrectable bad memory.
+	 */
+	if (trapnr == X86_TRAP_MC)
+		return false;
+
+	/*
+	 * There are two remaining exception types we might encounter here:
+	 *  - #PF for faulting accesses to kernel addresses
+	 *  - #GP for faulting accesses to noncanonical addresses
+	 * Complain about anything else.
+	 */
+	if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
+		WARN(1, "unexpected trap %d in uaccess\n", trapnr);
+		return false;
+	}
+
+	/*
+	 * This is a faulting memory access in kernel space, on a kernel
+	 * address, in a usercopy function. This can e.g. be caused by improper
+	 * use of helpers like __put_user and by improper attempts to access
+	 * userspace addresses in KERNEL_DS regions.
+	 * The one (semi-)legitimate exception are probe_kernel_{read,write}(),
+	 * which can be invoked from places like kgdb, /dev/mem (for reading)
+	 * and privileged BPF code (for reading).
+	 * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
+	 * to tell us that faulting on kernel addresses, and even noncanonical
+	 * addresses, in a userspace accessor does not necessarily imply a
+	 * kernel bug, root might just be doing weird stuff.
+	 */
+	if (current->kernel_uaccess_faults_ok)
+		return false;
+
+	/* This is bad. Refuse the fixup so that we go into die(). */
+	if (trapnr == X86_TRAP_PF) {
+		pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
+			 fault_addr);
+	} else {
+		pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
+	}
+	return true;
+}
+
 __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
 				  struct pt_regs *regs, int trapnr,
 				  unsigned long error_code,
 				  unsigned long fault_addr)
 {
+	if (bogus_uaccess(regs, trapnr, fault_addr))
+		return false;
 	regs->ip = ex_fixup_addr(fixup);
 	return true;
 }
@@ -132,6 +188,8 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
 			      unsigned long error_code,
 			      unsigned long fault_addr)
 {
+	if (bogus_uaccess(regs, trapnr, fault_addr))
+		return false;
 	/* Special hack for uaccess_err */
 	current->thread.uaccess_err = 1;
 	regs->ip = ex_fixup_addr(fixup);
diff --git a/fs/namespace.c b/fs/namespace.c
index 99186556f8d3..d86830c86ce8 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2642,6 +2642,7 @@ static long exact_copy_from_user(void *to, const void __user * from,
 	if (!access_ok(VERIFY_READ, from, n))
 		return n;
 
+	current->kernel_uaccess_faults_ok++;
 	while (n) {
 		if (__get_user(c, f)) {
 			memset(t, 0, n);
@@ -2651,6 +2652,7 @@ static long exact_copy_from_user(void *to, const void __user * from,
 		f++;
 		n--;
 	}
+	current->kernel_uaccess_faults_ok--;
 	return n;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9..56dd65f1be4f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -739,6 +739,12 @@ struct task_struct {
 	unsigned			use_memdelay:1;
 #endif
 
+	/*
+	 * May usercopy functions fault on kernel addresses?
+	 * This is not just a single bit because this can potentially nest.
+	 */
+	unsigned int			kernel_uaccess_faults_ok;
+
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
 	struct restart_block		restart_block;
diff --git a/mm/maccess.c b/mm/maccess.c
index ec00be51a24f..f3416632e5a4 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -30,8 +30,10 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
 
 	set_fs(KERNEL_DS);
 	pagefault_disable();
+	current->kernel_uaccess_faults_ok++;
 	ret = __copy_from_user_inatomic(dst,
 			(__force const void __user *)src, size);
+	current->kernel_uaccess_faults_ok--;
 	pagefault_enable();
 	set_fs(old_fs);
 
@@ -58,7 +60,9 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
 
 	set_fs(KERNEL_DS);
 	pagefault_disable();
+	current->kernel_uaccess_faults_ok++;
 	ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
+	current->kernel_uaccess_faults_ok--;
 	pagefault_enable();
 	set_fs(old_fs);
 
@@ -94,11 +98,13 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
 
 	set_fs(KERNEL_DS);
 	pagefault_disable();
+	current->kernel_uaccess_faults_ok++;
 
 	do {
 		ret = __get_user(*dst++, (const char __user __force *)src++);
 	} while (dst[-1] && ret == 0 && src - unsafe_addr < count);
 
+	current->kernel_uaccess_faults_ok--;
 	dst[-1] = '\0';
 	pagefault_enable();
 	set_fs(old_fs);
-- 
cgit v1.2.3


From 1e6cb72399fd58b38a1c11055ef18fe01f535cda Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 1 Sep 2018 10:41:11 +0300
Subject: fsnotify: add super block object type

Add the infrastructure to attach a mark to a super_block struct
and detach all attached marks when super block is destroyed.

This is going to be used by fanotify backend to setup super block
marks.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fdinfo.c               |  5 +++++
 fs/notify/fsnotify.c             |  8 +++++++-
 fs/notify/fsnotify.h             | 11 +++++++++++
 fs/notify/mark.c                 |  4 ++++
 fs/super.c                       |  2 +-
 include/linux/fs.h               |  5 +++++
 include/linux/fsnotify_backend.h | 17 ++++++++++++++---
 7 files changed, 47 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 86fcf5814279..25385e336ac7 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -131,6 +131,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 
 		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
 			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
+	} else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_SB) {
+		struct super_block *sb = fsnotify_conn_sb(mark->connector);
+
+		seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n",
+			   sb->s_dev, mflags, mark->mask, mark->ignored_mask);
 	}
 }
 
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index ababdbfab537..2971803d151c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -48,7 +48,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
  * Called during unmount with no locks held, so needs to be safe against
  * concurrent modifiers. We temporarily drop sb->s_inode_list_lock and CAN block.
  */
-void fsnotify_unmount_inodes(struct super_block *sb)
+static void fsnotify_unmount_inodes(struct super_block *sb)
 {
 	struct inode *inode, *iput_inode = NULL;
 
@@ -98,6 +98,12 @@ void fsnotify_unmount_inodes(struct super_block *sb)
 		iput(iput_inode);
 }
 
+void fsnotify_sb_delete(struct super_block *sb)
+{
+	fsnotify_unmount_inodes(sb);
+	fsnotify_clear_marks_by_sb(sb);
+}
+
 /*
  * Given an inode, first check if we care what happens to our children.  Inotify
  * and dnotify both tell their parents about events.  If we care about any event
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 7902653dd577..5a00121fb219 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -21,6 +21,12 @@ static inline struct mount *fsnotify_conn_mount(
 	return container_of(conn->obj, struct mount, mnt_fsnotify_marks);
 }
 
+static inline struct super_block *fsnotify_conn_sb(
+				struct fsnotify_mark_connector *conn)
+{
+	return container_of(conn->obj, struct super_block, s_fsnotify_marks);
+}
+
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
@@ -43,6 +49,11 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 {
 	fsnotify_destroy_marks(&real_mount(mnt)->mnt_fsnotify_marks);
 }
+/* run the list of all marks associated with sb and destroy them */
+static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
+{
+	fsnotify_destroy_marks(&sb->s_fsnotify_marks);
+}
 /* Wait until all marks queued for destruction are destroyed */
 extern void fsnotify_wait_marks_destroyed(void);
 
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 59cdb27826de..b5172ccb2e60 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -115,6 +115,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn)
 		return &fsnotify_conn_inode(conn)->i_fsnotify_mask;
 	else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT)
 		return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask;
+	else if (conn->type == FSNOTIFY_OBJ_TYPE_SB)
+		return &fsnotify_conn_sb(conn)->s_fsnotify_mask;
 	return NULL;
 }
 
@@ -192,6 +194,8 @@ static struct inode *fsnotify_detach_connector_from_object(
 		inode->i_fsnotify_mask = 0;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
+	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
+		fsnotify_conn_sb(conn)->s_fsnotify_mask = 0;
 	}
 
 	rcu_assign_pointer(*(conn->obj), NULL);
diff --git a/fs/super.c b/fs/super.c
index f3a8c008e164..ca53a08497ed 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -442,7 +442,7 @@ void generic_shutdown_super(struct super_block *sb)
 		sync_filesystem(sb);
 		sb->s_flags &= ~SB_ACTIVE;
 
-		fsnotify_unmount_inodes(sb);
+		fsnotify_sb_delete(sb);
 		cgroup_writeback_umount();
 
 		evict_inodes(sb);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 33322702c910..2c14801d0aa3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1464,6 +1464,11 @@ struct super_block {
 
 	spinlock_t		s_inode_wblist_lock;
 	struct list_head	s_inodes_wb;	/* writeback inodes */
+
+#ifdef CONFIG_FSNOTIFY
+	__u32			s_fsnotify_mask;
+	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
+#endif
 } __randomize_layout;
 
 /* Helper functions so that in most cases filesystems will
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index b8f4182f42f1..81b88fc9df31 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -206,12 +206,14 @@ struct fsnotify_group {
 enum fsnotify_obj_type {
 	FSNOTIFY_OBJ_TYPE_INODE,
 	FSNOTIFY_OBJ_TYPE_VFSMOUNT,
+	FSNOTIFY_OBJ_TYPE_SB,
 	FSNOTIFY_OBJ_TYPE_COUNT,
 	FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT
 };
 
 #define FSNOTIFY_OBJ_TYPE_INODE_FL	(1U << FSNOTIFY_OBJ_TYPE_INODE)
 #define FSNOTIFY_OBJ_TYPE_VFSMOUNT_FL	(1U << FSNOTIFY_OBJ_TYPE_VFSMOUNT)
+#define FSNOTIFY_OBJ_TYPE_SB_FL		(1U << FSNOTIFY_OBJ_TYPE_SB)
 #define FSNOTIFY_OBJ_ALL_TYPES_MASK	((1U << FSNOTIFY_OBJ_TYPE_COUNT) - 1)
 
 static inline bool fsnotify_valid_obj_type(unsigned int type)
@@ -255,6 +257,7 @@ static inline struct fsnotify_mark *fsnotify_iter_##name##_mark( \
 
 FSNOTIFY_ITER_FUNCS(inode, INODE)
 FSNOTIFY_ITER_FUNCS(vfsmount, VFSMOUNT)
+FSNOTIFY_ITER_FUNCS(sb, SB)
 
 #define fsnotify_foreach_obj_type(type) \
 	for (type = 0; type < FSNOTIFY_OBJ_TYPE_COUNT; type++)
@@ -267,8 +270,8 @@ struct fsnotify_mark_connector;
 typedef struct fsnotify_mark_connector __rcu *fsnotify_connp_t;
 
 /*
- * Inode / vfsmount point to this structure which tracks all marks attached to
- * the inode / vfsmount. The reference to inode / vfsmount is held by this
+ * Inode/vfsmount/sb point to this structure which tracks all marks attached to
+ * the inode/vfsmount/sb. The reference to inode/vfsmount/sb is held by this
  * structure. We destroy this structure when there are no more marks attached
  * to it. The structure is protected by fsnotify_mark_srcu.
  */
@@ -335,6 +338,7 @@ extern int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int dat
 extern int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask);
 extern void __fsnotify_inode_delete(struct inode *inode);
 extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt);
+extern void fsnotify_sb_delete(struct super_block *sb);
 extern u32 fsnotify_get_cookie(void);
 
 static inline int fsnotify_inode_watches_children(struct inode *inode)
@@ -455,9 +459,13 @@ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *gr
 {
 	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE_FL);
 }
+/* run all the marks in a group, and clear all of the sn marks */
+static inline void fsnotify_clear_sb_marks_by_group(struct fsnotify_group *group)
+{
+	fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_SB_FL);
+}
 extern void fsnotify_get_mark(struct fsnotify_mark *mark);
 extern void fsnotify_put_mark(struct fsnotify_mark *mark);
-extern void fsnotify_unmount_inodes(struct super_block *sb);
 extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
 extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
 
@@ -484,6 +492,9 @@ static inline void __fsnotify_inode_delete(struct inode *inode)
 static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt)
 {}
 
+static inline void fsnotify_sb_delete(struct super_block *sb)
+{}
+
 static inline void fsnotify_update_flags(struct dentry *dentry)
 {}
 
-- 
cgit v1.2.3


From 3edd79cf5a44b12dbb13bc320f5788aed6562b36 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 3 Sep 2018 16:49:37 +0200
Subject: regulator: Fix 'do-nothing' value for regulators without suspend
 state

Some regulators don't have all states defined and in such cases regulator
core should not assume anything. However in current implementation
of of_get_regulation_constraints() DO_NOTHING_IN_SUSPEND enable value was
set only for regulators which had suspend node defined, otherwise the
default 0 value was used, what means DISABLE_IN_SUSPEND. This lead to
broken system suspend/resume on boards, which had simple regulator
constraints definition (without suspend state nodes).

To avoid further mismatches between the default and uninitialized values
of the suspend enabled/disabled states, change the values of the them,
so default '0' means DO_NOTHING_IN_SUSPEND.

Fixes: 72069f9957a1: regulator: leave one item to record whether regulator is enabled
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: stable@vger.kernel.org
---
 drivers/regulator/core.c          | 2 +-
 drivers/regulator/of_regulator.c  | 2 --
 include/linux/regulator/machine.h | 6 +++---
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index bb1324f93143..90215f57270f 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3161,7 +3161,7 @@ static inline int regulator_suspend_toggle(struct regulator_dev *rdev,
 	if (!rstate->changeable)
 		return -EPERM;
 
-	rstate->enabled = en;
+	rstate->enabled = (en) ? ENABLE_IN_SUSPEND : DISABLE_IN_SUSPEND;
 
 	return 0;
 }
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 638f17d4c848..210fc20f7de7 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -213,8 +213,6 @@ static void of_get_regulation_constraints(struct device_node *np,
 		else if (of_property_read_bool(suspend_np,
 					"regulator-off-in-suspend"))
 			suspend_state->enabled = DISABLE_IN_SUSPEND;
-		else
-			suspend_state->enabled = DO_NOTHING_IN_SUSPEND;
 
 		if (!of_property_read_u32(np, "regulator-suspend-min-microvolt",
 					  &pval))
diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h
index 3468703d663a..a459a5e973a7 100644
--- a/include/linux/regulator/machine.h
+++ b/include/linux/regulator/machine.h
@@ -48,9 +48,9 @@ struct regulator;
  * DISABLE_IN_SUSPEND	- turn off regulator in suspend states
  * ENABLE_IN_SUSPEND	- keep regulator on in suspend states
  */
-#define DO_NOTHING_IN_SUSPEND	(-1)
-#define DISABLE_IN_SUSPEND	0
-#define ENABLE_IN_SUSPEND	1
+#define DO_NOTHING_IN_SUSPEND	0
+#define DISABLE_IN_SUSPEND	1
+#define ENABLE_IN_SUSPEND	2
 
 /* Regulator active discharge flags */
 enum regulator_active_discharge {
-- 
cgit v1.2.3


From 4d18975c78f2d5c91792356501cf369e67594241 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Sat, 1 Sep 2018 16:08:45 +0200
Subject: fbdev: add remove_conflicting_pci_framebuffers()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Almost all PCI drivers using remove_conflicting_framebuffers() wrap it
with the same code.

v2: add kerneldoc for DRM helper
v3: propagate remove_conflicting_framebuffers() return value
  + move kerneldoc to where function is implemented

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/7db1c278276de420eb45a1b71d06b5eb6bbd49ef.1535810304.git.mirq-linux@rere.qmqm.pl
---
 drivers/video/fbdev/core/fbmem.c | 35 +++++++++++++++++++++++++++++++++++
 include/drm/drm_fb_helper.h      | 12 ++++++++++++
 include/linux/fb.h               |  2 ++
 3 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 6e9961b71442..5ffadc8e681d 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -34,6 +34,7 @@
 #include <linux/fb.h>
 #include <linux/fbcon.h>
 #include <linux/mem_encrypt.h>
+#include <linux/pci.h>
 
 #include <asm/fb.h>
 
@@ -1830,6 +1831,40 @@ int remove_conflicting_framebuffers(struct apertures_struct *a,
 }
 EXPORT_SYMBOL(remove_conflicting_framebuffers);
 
+/**
+ * remove_conflicting_pci_framebuffers - remove firmware-configured framebuffers for PCI devices
+ * @pdev: PCI device
+ * @resource_id: index of PCI BAR configuring framebuffer memory
+ * @name: requesting driver name
+ *
+ * This function removes framebuffer devices (eg. initialized by firmware)
+ * using memory range configured for @pdev's BAR @resource_id.
+ *
+ * The function assumes that PCI device with shadowed ROM drives a primary
+ * display and so kicks out vga16fb.
+ */
+int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, int res_id, const char *name)
+{
+	struct apertures_struct *ap;
+	bool primary = false;
+	int err;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return -ENOMEM;
+
+	ap->ranges[0].base = pci_resource_start(pdev, res_id);
+	ap->ranges[0].size = pci_resource_len(pdev, res_id);
+#ifdef CONFIG_X86
+	primary = pdev->resource[PCI_ROM_RESOURCE].flags &
+					IORESOURCE_ROM_SHADOW;
+#endif
+	err = remove_conflicting_framebuffers(ap, name, primary);
+	kfree(ap);
+	return err;
+}
+EXPORT_SYMBOL(remove_conflicting_pci_framebuffers);
+
 /**
  *	register_framebuffer - registers a frame buffer device
  *	@fb_info: frame buffer info structure
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 5db08c8f1d25..8b6ab3200a2c 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -615,4 +615,16 @@ drm_fb_helper_remove_conflicting_framebuffers(struct apertures_struct *a,
 #endif
 }
 
+static inline int
+drm_fb_helper_remove_conflicting_pci_framebuffers(struct pci_dev *pdev,
+						  int resource_id,
+						  const char *name)
+{
+#if IS_REACHABLE(CONFIG_FB)
+	return remove_conflicting_pci_framebuffers(pdev, resource_id, name);
+#else
+	return 0;
+#endif
+}
+
 #endif
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3e7e75383d32..3cd375dafd0e 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -632,6 +632,8 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 extern int register_framebuffer(struct fb_info *fb_info);
 extern int unregister_framebuffer(struct fb_info *fb_info);
 extern int unlink_framebuffer(struct fb_info *fb_info);
+extern int remove_conflicting_pci_framebuffers(struct pci_dev *pdev, int res_id,
+					       const char *name);
 extern int remove_conflicting_framebuffers(struct apertures_struct *a,
 					   const char *name, bool primary);
 extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
-- 
cgit v1.2.3


From 9fd0e09a4e86499639653243edfcb417a05c5c46 Mon Sep 17 00:00:00 2001
From: Anthony Wong <anthony.wong@ubuntu.com>
Date: Fri, 31 Aug 2018 20:06:42 +0800
Subject: r8169: add support for NCube 8168 network card

This card identifies itself as:
  Ethernet controller [0200]: NCube Device [10ff:8168] (rev 06)
  Subsystem: TP-LINK Technologies Co., Ltd. Device [7470:3468]

Adding a new entry to rtl8169_pci_tbl makes the card work.

Link: http://launchpad.net/bugs/1788730
Signed-off-by: Anthony Wong <anthony.wong@ubuntu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 1 +
 include/linux/pci_ids.h              | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index ac306797590e..b08d51bf7a20 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -218,6 +218,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8161), 0, 0, RTL_CFG_1 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8167), 0, 0, RTL_CFG_0 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8168), 0, 0, RTL_CFG_1 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_NCUBE,	0x8168), 0, 0, RTL_CFG_1 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8169), 0, 0, RTL_CFG_0 },
 	{ PCI_VENDOR_ID_DLINK,			0x4300,
 		PCI_VENDOR_ID_DLINK, 0x4b10,		 0, 0, RTL_CFG_1 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 99d366cb0e9f..d157983b84cf 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -3084,4 +3084,6 @@
 
 #define PCI_VENDOR_ID_OCZ		0x1b85
 
+#define PCI_VENDOR_ID_NCUBE		0x10ff
+
 #endif /* _LINUX_PCI_IDS_H */
-- 
cgit v1.2.3


From f3569fd613f669c95ad187208ad281995f30cc2a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 7 Aug 2018 14:18:42 -0700
Subject: crypto: shash - Remove VLA usage in unaligned hashing

In the quest to remove all stack VLA usage from the kernel[1], this uses
the newly defined max alignment to perform unaligned hashing to avoid
VLAs, and drops the helper function while adding sanity checks on the
resulting buffer sizes. Additionally, the __aligned_largest macro is
removed since this helper was the only user.

[1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 | 27 ++++++++++++++++-----------
 include/linux/compiler_types.h |  1 -
 2 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/shash.c b/crypto/shash.c
index 86d76b5c626c..d21f04d70dce 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -73,13 +73,6 @@ int crypto_shash_setkey(struct crypto_shash *tfm, const u8 *key,
 }
 EXPORT_SYMBOL_GPL(crypto_shash_setkey);
 
-static inline unsigned int shash_align_buffer_size(unsigned len,
-						   unsigned long mask)
-{
-	typedef u8 __aligned_largest u8_aligned;
-	return len + (mask & ~(__alignof__(u8_aligned) - 1));
-}
-
 static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
 				  unsigned int len)
 {
@@ -88,11 +81,17 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
 	unsigned long alignmask = crypto_shash_alignmask(tfm);
 	unsigned int unaligned_len = alignmask + 1 -
 				     ((unsigned long)data & alignmask);
-	u8 ubuf[shash_align_buffer_size(unaligned_len, alignmask)]
-		__aligned_largest;
+	/*
+	 * We cannot count on __aligned() working for large values:
+	 * https://patchwork.kernel.org/patch/9507697/
+	 */
+	u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2];
 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
 	int err;
 
+	if (WARN_ON(buf + unaligned_len > ubuf + sizeof(ubuf)))
+		return -EINVAL;
+
 	if (unaligned_len > len)
 		unaligned_len = len;
 
@@ -124,11 +123,17 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
 	unsigned long alignmask = crypto_shash_alignmask(tfm);
 	struct shash_alg *shash = crypto_shash_alg(tfm);
 	unsigned int ds = crypto_shash_digestsize(tfm);
-	u8 ubuf[shash_align_buffer_size(ds, alignmask)]
-		__aligned_largest;
+	/*
+	 * We cannot count on __aligned() working for large values:
+	 * https://patchwork.kernel.org/patch/9507697/
+	 */
+	u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE];
 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
 	int err;
 
+	if (WARN_ON(buf + ds > ubuf + sizeof(ubuf)))
+		return -EINVAL;
+
 	err = shash->final(desc, buf);
 	if (err)
 		goto out;
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 3525c179698c..13fee5fe734b 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -198,7 +198,6 @@ struct ftrace_likely_data {
  */
 #define __pure			__attribute__((pure))
 #define __aligned(x)		__attribute__((aligned(x)))
-#define __aligned_largest	__attribute__((aligned))
 #define __printf(a, b)		__attribute__((format(printf, a, b)))
 #define __scanf(a, b)		__attribute__((format(scanf, a, b)))
 #define __maybe_unused		__attribute__((unused))
-- 
cgit v1.2.3


From b76377543b738a6b58b0a7b0a42dd9e16436fee1 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Thu, 30 Aug 2018 11:00:15 -0400
Subject: crc-t10dif: Pick better transform if one becomes available

T10 CRC library is linked into the kernel thanks to block and SCSI. The
crypto accelerators are typically loaded later as modules and are
therefore not available when the T10 CRC library is initialized.

Use the crypto notifier facility to trigger a switch to a better algorithm
if one becomes available after the initial hash has been registered. Use
RCU to protect the original transform while the new one is being set up.

Suggested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org
Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crc-t10dif.h |  1 +
 lib/crc-t10dif.c           | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index 1fe0cfcdea30..6bb0c0bf357b 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -6,6 +6,7 @@
 
 #define CRC_T10DIF_DIGEST_SIZE 2
 #define CRC_T10DIF_BLOCK_SIZE 1
+#define CRC_T10DIF_STRING "crct10dif"
 
 extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer,
 				size_t len);
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index 1ad33e555805..52f577a3868d 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -14,10 +14,47 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <crypto/hash.h>
+#include <crypto/algapi.h>
 #include <linux/static_key.h>
+#include <linux/notifier.h>
 
-static struct crypto_shash *crct10dif_tfm;
+static struct crypto_shash __rcu *crct10dif_tfm;
 static struct static_key crct10dif_fallback __read_mostly;
+DEFINE_MUTEX(crc_t10dif_mutex);
+
+static int crc_t10dif_rehash(struct notifier_block *self, unsigned long val, void *data)
+{
+	struct crypto_alg *alg = data;
+	struct crypto_shash *new, *old;
+
+	if (val != CRYPTO_MSG_ALG_LOADED ||
+	    static_key_false(&crct10dif_fallback) ||
+	    strncmp(alg->cra_name, CRC_T10DIF_STRING, strlen(CRC_T10DIF_STRING)))
+		return 0;
+
+	mutex_lock(&crc_t10dif_mutex);
+	old = rcu_dereference_protected(crct10dif_tfm,
+					lockdep_is_held(&crc_t10dif_mutex));
+	if (!old) {
+		mutex_unlock(&crc_t10dif_mutex);
+		return 0;
+	}
+	new = crypto_alloc_shash("crct10dif", 0, 0);
+	if (IS_ERR(new)) {
+		mutex_unlock(&crc_t10dif_mutex);
+		return 0;
+	}
+	rcu_assign_pointer(crct10dif_tfm, new);
+	mutex_unlock(&crc_t10dif_mutex);
+
+	synchronize_rcu();
+	crypto_free_shash(old);
+	return 0;
+}
+
+static struct notifier_block crc_t10dif_nb = {
+	.notifier_call = crc_t10dif_rehash,
+};
 
 __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
 {
@@ -30,11 +67,14 @@ __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
 	if (static_key_false(&crct10dif_fallback))
 		return crc_t10dif_generic(crc, buffer, len);
 
-	desc.shash.tfm = crct10dif_tfm;
+	rcu_read_lock();
+	desc.shash.tfm = rcu_dereference(crct10dif_tfm);
 	desc.shash.flags = 0;
 	*(__u16 *)desc.ctx = crc;
 
 	err = crypto_shash_update(&desc.shash, buffer, len);
+	rcu_read_unlock();
+
 	BUG_ON(err);
 
 	return *(__u16 *)desc.ctx;
@@ -49,6 +89,7 @@ EXPORT_SYMBOL(crc_t10dif);
 
 static int __init crc_t10dif_mod_init(void)
 {
+	crypto_register_notifier(&crc_t10dif_nb);
 	crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
 	if (IS_ERR(crct10dif_tfm)) {
 		static_key_slow_inc(&crct10dif_fallback);
@@ -59,6 +100,7 @@ static int __init crc_t10dif_mod_init(void)
 
 static void __exit crc_t10dif_mod_fini(void)
 {
+	crypto_unregister_notifier(&crc_t10dif_nb);
 	crypto_free_shash(crct10dif_tfm);
 }
 
-- 
cgit v1.2.3


From bf97279079be21fc843d90489846d2c73c77ed09 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 29 Aug 2018 17:08:53 +0200
Subject: gpio: ts5500: Use SPDX header

Cut some boilerplate, use the SPDX license identifier.

Cc: kernel@savoirfairelinux.com
Cc: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Cc: Jerome Oufella <jerome.oufella@savoirfairelinux.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-ts5500.c                | 5 +----
 include/linux/platform_data/gpio-ts5500.h | 5 +----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-ts5500.c b/drivers/gpio/gpio-ts5500.c
index dd378813f412..d4ea3ee23b9c 100644
--- a/drivers/gpio/gpio-ts5500.c
+++ b/drivers/gpio/gpio-ts5500.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Digital I/O driver for Technologic Systems TS-5500
  *
@@ -16,10 +17,6 @@
  * TS-5600:
  *   Documentation: http://wiki.embeddedarm.com/wiki/TS-5600
  *   Blocks: LCD port (identical to TS-5500 LCD).
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/bitops.h>
diff --git a/include/linux/platform_data/gpio-ts5500.h b/include/linux/platform_data/gpio-ts5500.h
index b10d11c9bb49..94346d4504cb 100644
--- a/include/linux/platform_data/gpio-ts5500.h
+++ b/include/linux/platform_data/gpio-ts5500.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * GPIO (DIO) header for Technologic Systems TS-5500
  *
  * Copyright (c) 2012 Savoir-faire Linux Inc.
  *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _PDATA_GPIO_TS5500_H
-- 
cgit v1.2.3


From 97feacc05dfb601273fd680d7b805b8e09c1bec7 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 29 Aug 2018 17:16:18 +0200
Subject: gpio: ts5500: Delete platform data handling

The TS5500 GPIO driver apparently supports platform data
without making any use of it whatsoever. Delete this code,
last chance to speak up if you think it is needed.

Cc: kernel@savoirfairelinux.com
Cc: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
Cc: Jerome Oufella <jerome.oufella@savoirfairelinux.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/x86/platform/ts5500/ts5500.c         |  1 -
 drivers/gpio/gpio-ts5500.c                |  6 ------
 include/linux/platform_data/gpio-ts5500.h | 24 ------------------------
 3 files changed, 31 deletions(-)
 delete mode 100644 include/linux/platform_data/gpio-ts5500.h

(limited to 'include/linux')

diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c
index fd39301f25ac..7e56fc74093c 100644
--- a/arch/x86/platform/ts5500/ts5500.c
+++ b/arch/x86/platform/ts5500/ts5500.c
@@ -24,7 +24,6 @@
 #include <linux/kernel.h>
 #include <linux/leds.h>
 #include <linux/init.h>
-#include <linux/platform_data/gpio-ts5500.h>
 #include <linux/platform_data/max197.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
diff --git a/drivers/gpio/gpio-ts5500.c b/drivers/gpio/gpio-ts5500.c
index d4ea3ee23b9c..c91890488402 100644
--- a/drivers/gpio/gpio-ts5500.c
+++ b/drivers/gpio/gpio-ts5500.c
@@ -23,7 +23,6 @@
 #include <linux/gpio/driver.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/platform_data/gpio-ts5500.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
@@ -315,7 +314,6 @@ static void ts5500_disable_irq(struct ts5500_priv *priv)
 static int ts5500_dio_probe(struct platform_device *pdev)
 {
 	enum ts5500_blocks block = platform_get_device_id(pdev)->driver_data;
-	struct ts5500_dio_platform_data *pdata = dev_get_platdata(&pdev->dev);
 	struct device *dev = &pdev->dev;
 	const char *name = dev_name(dev);
 	struct ts5500_priv *priv;
@@ -346,10 +344,6 @@ static int ts5500_dio_probe(struct platform_device *pdev)
 	priv->gpio_chip.set = ts5500_gpio_set;
 	priv->gpio_chip.to_irq = ts5500_gpio_to_irq;
 	priv->gpio_chip.base = -1;
-	if (pdata) {
-		priv->gpio_chip.base = pdata->base;
-		priv->strap = pdata->strap;
-	}
 
 	switch (block) {
 	case TS5500_DIO1:
diff --git a/include/linux/platform_data/gpio-ts5500.h b/include/linux/platform_data/gpio-ts5500.h
deleted file mode 100644
index 94346d4504cb..000000000000
--- a/include/linux/platform_data/gpio-ts5500.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * GPIO (DIO) header for Technologic Systems TS-5500
- *
- * Copyright (c) 2012 Savoir-faire Linux Inc.
- *	Vivien Didelot <vivien.didelot@savoirfairelinux.com>
- */
-
-#ifndef _PDATA_GPIO_TS5500_H
-#define _PDATA_GPIO_TS5500_H
-
-/**
- * struct ts5500_dio_platform_data - TS-5500 pin block configuration
- * @base:	The GPIO base number to use.
- * @strap:	The only pin connected to an interrupt in a block is input-only.
- *		If you need a bidirectional line which can trigger an IRQ, you
- *		may strap it with an in/out pin. This flag indicates this case.
- */
-struct ts5500_dio_platform_data {
-	int base;
-	bool strap;
-};
-
-#endif /* _PDATA_GPIO_TS5500_H */
-- 
cgit v1.2.3


From aa7e80b220f3a543eefbe4b7e2c5d2b73e2e2ef7 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Mon, 3 Sep 2018 20:19:28 +0300
Subject: net/mlx5: Fix atomic_mode enum values

The field atomic_mode is 4 bits wide and therefore can hold values
from 0x0 to 0xf. Remove the unnecessary 20 bit shift that made the values
be incorrect. While that, remove unused enum values.

Fixes: 57cda166bbe0 ("net/mlx5: Add DCT command interface")
Signed-off-by: Moni Shoua <monis@mellanox.com>
Reviewed-by: Artemy Kovalyov <artemyko@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/driver.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7a452716de4b..d885e9f0e054 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -163,10 +163,7 @@ enum mlx5_dcbx_oper_mode {
 };
 
 enum mlx5_dct_atomic_mode {
-	MLX5_ATOMIC_MODE_DCT_OFF        = 20,
-	MLX5_ATOMIC_MODE_DCT_NONE       = 0 << MLX5_ATOMIC_MODE_DCT_OFF,
-	MLX5_ATOMIC_MODE_DCT_IB_COMP    = 1 << MLX5_ATOMIC_MODE_DCT_OFF,
-	MLX5_ATOMIC_MODE_DCT_CX         = 2 << MLX5_ATOMIC_MODE_DCT_OFF,
+	MLX5_ATOMIC_MODE_DCT_CX         = 2,
 };
 
 enum {
-- 
cgit v1.2.3


From afaef01c001537fa97a25092d7f54d764dc7d8c1 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:16:58 +0300
Subject: x86/entry: Add STACKLEAK erasing the kernel stack at the end of
 syscalls

The STACKLEAK feature (initially developed by PaX Team) has the following
benefits:

1. Reduces the information that can be revealed through kernel stack leak
   bugs. The idea of erasing the thread stack at the end of syscalls is
   similar to CONFIG_PAGE_POISONING and memzero_explicit() in kernel
   crypto, which all comply with FDP_RIP.2 (Full Residual Information
   Protection) of the Common Criteria standard.

2. Blocks some uninitialized stack variable attacks (e.g. CVE-2017-17712,
   CVE-2010-2963). That kind of bugs should be killed by improving C
   compilers in future, which might take a long time.

This commit introduces the code filling the used part of the kernel
stack with a poison value before returning to userspace. Full
STACKLEAK feature also contains the gcc plugin which comes in a
separate commit.

The STACKLEAK feature is ported from grsecurity/PaX. More information at:
  https://grsecurity.net/
  https://pax.grsecurity.net/

This code is modified from Brad Spengler/PaX Team's code in the last
public patch of grsecurity/PaX based on our understanding of the code.
Changes or omissions from the original code are ours and don't reflect
the original grsecurity/PaX code.

Performance impact:

Hardware: Intel Core i7-4770, 16 GB RAM

Test #1: building the Linux kernel on a single core
        0.91% slowdown

Test #2: hackbench -s 4096 -l 2000 -g 15 -f 25 -P
        4.2% slowdown

So the STACKLEAK description in Kconfig includes: "The tradeoff is the
performance impact: on a single CPU system kernel compilation sees a 1%
slowdown, other systems and workloads may vary and you are advised to
test this feature on your expected workload before deploying it".

Signed-off-by: Alexander Popov <alex.popov@linux.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/x86/x86_64/mm.txt  |  2 ++
 arch/Kconfig                     |  7 +++++
 arch/x86/Kconfig                 |  1 +
 arch/x86/entry/calling.h         | 14 +++++++++
 arch/x86/entry/entry_32.S        |  7 +++++
 arch/x86/entry/entry_64.S        |  3 ++
 arch/x86/entry/entry_64_compat.S |  5 ++++
 include/linux/sched.h            |  4 +++
 include/linux/stackleak.h        | 26 +++++++++++++++++
 kernel/Makefile                  |  4 +++
 kernel/fork.c                    |  3 ++
 kernel/stackleak.c               | 62 ++++++++++++++++++++++++++++++++++++++++
 scripts/gcc-plugins/Kconfig      | 19 ++++++++++++
 13 files changed, 157 insertions(+)
 create mode 100644 include/linux/stackleak.h
 create mode 100644 kernel/stackleak.c

(limited to 'include/linux')

diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 5432a96d31ff..600bc2afa27d 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -24,6 +24,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
 [fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
 ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+STACKLEAK_POISON value in this last hole: ffffffffffff4111
 
 Virtual memory map with 5 level page tables:
 
@@ -50,6 +51,7 @@ ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
 [fixmap start]   - ffffffffff5fffff kernel-internal fixmap range
 ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
 ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+STACKLEAK_POISON value in this last hole: ffffffffffff4111
 
 Architecture defines a 64-bit virtual address. Implementations can support
 less. Currently supported are 48- and 57-bit virtual addresses. Bits 63
diff --git a/arch/Kconfig b/arch/Kconfig
index 6801123932a5..ee79ff56faab 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -419,6 +419,13 @@ config SECCOMP_FILTER
 
 	  See Documentation/userspace-api/seccomp_filter.rst for details.
 
+config HAVE_ARCH_STACKLEAK
+	bool
+	help
+	  An architecture should select this if it has the code which
+	  fills the used part of the kernel stack with the STACKLEAK_POISON
+	  value before returning from system calls.
+
 config HAVE_STACKPROTECTOR
 	bool
 	help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be022f91d..662cb2cc9630 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,6 +127,7 @@ config X86
 	select HAVE_ARCH_PREL32_RELOCATIONS
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_THREAD_STRUCT_WHITELIST
+	select HAVE_ARCH_STACKLEAK
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 352e70cd33e8..20d0885b00fb 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -329,8 +329,22 @@ For 32-bit we have the following conventions - kernel is built with
 
 #endif
 
+.macro STACKLEAK_ERASE_NOCLOBBER
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	PUSH_AND_CLEAR_REGS
+	call stackleak_erase
+	POP_REGS
+#endif
+.endm
+
 #endif /* CONFIG_X86_64 */
 
+.macro STACKLEAK_ERASE
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	call stackleak_erase
+#endif
+.endm
+
 /*
  * This does 'call enter_from_user_mode' unless we can avoid it based on
  * kernel config or using the static jump infrastructure.
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 2767c625a52c..dfb975b4c981 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -46,6 +46,8 @@
 #include <asm/frame.h>
 #include <asm/nospec-branch.h>
 
+#include "calling.h"
+
 	.section .entry.text, "ax"
 
 /*
@@ -711,6 +713,7 @@ ENTRY(ret_from_fork)
 	/* When we fork, we trace the syscall return in the child, too. */
 	movl    %esp, %eax
 	call    syscall_return_slowpath
+	STACKLEAK_ERASE
 	jmp     restore_all
 
 	/* kernel thread */
@@ -885,6 +888,8 @@ ENTRY(entry_SYSENTER_32)
 	ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
 		    "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
 
+	STACKLEAK_ERASE
+
 /* Opportunistic SYSEXIT */
 	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
 
@@ -996,6 +1001,8 @@ ENTRY(entry_INT80_32)
 	call	do_int80_syscall_32
 .Lsyscall_32_done:
 
+	STACKLEAK_ERASE
+
 restore_all:
 	TRACE_IRQS_IRET
 	SWITCH_TO_ENTRY_STACK
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 957dfb693ecc..a5dd28093020 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -329,6 +329,8 @@ syscall_return_via_sysret:
 	 * We are on the trampoline stack.  All regs except RDI are live.
 	 * We can do future final exit work right here.
 	 */
+	STACKLEAK_ERASE_NOCLOBBER
+
 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
 
 	popq	%rdi
@@ -688,6 +690,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 	 * We are on the trampoline stack.  All regs except RDI are live.
 	 * We can do future final exit work right here.
 	 */
+	STACKLEAK_ERASE_NOCLOBBER
 
 	SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
 
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 7d0df78db727..8eaf8952c408 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -261,6 +261,11 @@ GLOBAL(entry_SYSCALL_compat_after_hwframe)
 
 	/* Opportunistic SYSRET */
 sysret32_from_system_call:
+	/*
+	 * We are not going to return to userspace from the trampoline
+	 * stack. So let's erase the thread stack right now.
+	 */
+	STACKLEAK_ERASE
 	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
 	movq	RBX(%rsp), %rbx		/* pt_regs->rbx */
 	movq	RBP(%rsp), %rbp		/* pt_regs->rbp */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9..c1a23acd24e7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1192,6 +1192,10 @@ struct task_struct {
 	void				*security;
 #endif
 
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+	unsigned long			lowest_stack;
+#endif
+
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
new file mode 100644
index 000000000000..628c2b947b89
--- /dev/null
+++ b/include/linux/stackleak.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_STACKLEAK_H
+#define _LINUX_STACKLEAK_H
+
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+
+/*
+ * Check that the poison value points to the unused hole in the
+ * virtual memory map for your platform.
+ */
+#define STACKLEAK_POISON -0xBEEF
+#define STACKLEAK_SEARCH_DEPTH 128
+
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+#include <asm/stacktrace.h>
+
+static inline void stackleak_task_init(struct task_struct *t)
+{
+	t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
+}
+#else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
+static inline void stackleak_task_init(struct task_struct *t) { }
+#endif
+
+#endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 7a63d567fdb5..7343b3a9bff0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -117,6 +117,10 @@ obj-$(CONFIG_HAS_IOMEM) += iomem.o
 obj-$(CONFIG_ZONE_DEVICE) += memremap.o
 obj-$(CONFIG_RSEQ) += rseq.o
 
+obj-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak.o
+KASAN_SANITIZE_stackleak.o := n
+KCOV_INSTRUMENT_stackleak.o := n
+
 $(obj)/configs.o: $(obj)/config_data.h
 
 targets += config_data.gz
diff --git a/kernel/fork.c b/kernel/fork.c
index d896e9ca38b0..47911e49c2b1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -91,6 +91,7 @@
 #include <linux/kcov.h>
 #include <linux/livepatch.h>
 #include <linux/thread_info.h>
+#include <linux/stackleak.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -1880,6 +1881,8 @@ static __latent_entropy struct task_struct *copy_process(
 	if (retval)
 		goto bad_fork_cleanup_io;
 
+	stackleak_task_init(p);
+
 	if (pid != &init_struct_pid) {
 		pid = alloc_pid(p->nsproxy->pid_ns_for_children);
 		if (IS_ERR(pid)) {
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
new file mode 100644
index 000000000000..deba0d8992f9
--- /dev/null
+++ b/kernel/stackleak.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code fills the used part of the kernel stack with a poison value
+ * before returning to userspace. It's part of the STACKLEAK feature
+ * ported from grsecurity/PaX.
+ *
+ * Author: Alexander Popov <alex.popov@linux.com>
+ *
+ * STACKLEAK reduces the information which kernel stack leak bugs can
+ * reveal and blocks some uninitialized stack variable attacks.
+ */
+
+#include <linux/stackleak.h>
+
+asmlinkage void stackleak_erase(void)
+{
+	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
+	unsigned long kstack_ptr = current->lowest_stack;
+	unsigned long boundary = (unsigned long)end_of_stack(current);
+	unsigned int poison_count = 0;
+	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
+
+	/* Check that 'lowest_stack' value is sane */
+	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
+		kstack_ptr = boundary;
+
+	/* Search for the poison value in the kernel stack */
+	while (kstack_ptr > boundary && poison_count <= depth) {
+		if (*(unsigned long *)kstack_ptr == STACKLEAK_POISON)
+			poison_count++;
+		else
+			poison_count = 0;
+
+		kstack_ptr -= sizeof(unsigned long);
+	}
+
+	/*
+	 * One 'long int' at the bottom of the thread stack is reserved and
+	 * should not be poisoned (see CONFIG_SCHED_STACK_END_CHECK=y).
+	 */
+	if (kstack_ptr == boundary)
+		kstack_ptr += sizeof(unsigned long);
+
+	/*
+	 * Now write the poison value to the kernel stack. Start from
+	 * 'kstack_ptr' and move up till the new 'boundary'. We assume that
+	 * the stack pointer doesn't change when we write poison.
+	 */
+	if (on_thread_stack())
+		boundary = current_stack_pointer;
+	else
+		boundary = current_top_of_stack();
+
+	while (kstack_ptr < boundary) {
+		*(unsigned long *)kstack_ptr = STACKLEAK_POISON;
+		kstack_ptr += sizeof(unsigned long);
+	}
+
+	/* Reset the 'lowest_stack' value for the next syscall */
+	current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
+}
+
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index cb0c889e13aa..977b84e69787 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -139,4 +139,23 @@ config GCC_PLUGIN_RANDSTRUCT_PERFORMANCE
 	  in structures.  This reduces the performance hit of RANDSTRUCT
 	  at the cost of weakened randomization.
 
+config GCC_PLUGIN_STACKLEAK
+	bool "Erase the kernel stack before returning from syscalls"
+	depends on GCC_PLUGINS
+	depends on HAVE_ARCH_STACKLEAK
+	help
+	  This option makes the kernel erase the kernel stack before
+	  returning from system calls. That reduces the information which
+	  kernel stack leak bugs can reveal and blocks some uninitialized
+	  stack variable attacks.
+
+	  The tradeoff is the performance impact: on a single CPU system kernel
+	  compilation sees a 1% slowdown, other systems and workloads may vary
+	  and you are advised to test this feature on your expected workload
+	  before deploying it.
+
+	  This plugin was ported from grsecurity/PaX. More information at:
+	   * https://grsecurity.net/
+	   * https://pax.grsecurity.net/
+
 endif
-- 
cgit v1.2.3


From c8d126275a5fa59394fe17109bdb9812fed296b8 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:17:01 +0300
Subject: fs/proc: Show STACKLEAK metrics in the /proc file system

Introduce CONFIG_STACKLEAK_METRICS providing STACKLEAK information about
tasks via the /proc file system. In particular, /proc/<pid>/stack_depth
shows the maximum kernel stack consumption for the current and previous
syscalls. Although this information is not precise, it can be useful for
estimating the STACKLEAK performance impact for your workloads.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/proc/base.c              | 18 ++++++++++++++++++
 include/linux/sched.h       |  1 +
 include/linux/stackleak.h   |  3 +++
 kernel/stackleak.c          |  4 ++++
 scripts/gcc-plugins/Kconfig | 12 ++++++++++++
 5 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ccf86f16d9f0..2a238d68610e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2891,6 +2891,21 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
 }
 #endif /* CONFIG_LIVEPATCH */
 
+#ifdef CONFIG_STACKLEAK_METRICS
+static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
+				struct pid *pid, struct task_struct *task)
+{
+	unsigned long prev_depth = THREAD_SIZE -
+				(task->prev_lowest_stack & (THREAD_SIZE - 1));
+	unsigned long depth = THREAD_SIZE -
+				(task->lowest_stack & (THREAD_SIZE - 1));
+
+	seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n",
+							prev_depth, depth);
+	return 0;
+}
+#endif /* CONFIG_STACKLEAK_METRICS */
+
 /*
  * Thread groups
  */
@@ -2992,6 +3007,9 @@ static const struct pid_entry tgid_base_stuff[] = {
 #ifdef CONFIG_LIVEPATCH
 	ONE("patch_state",  S_IRUSR, proc_pid_patch_state),
 #endif
+#ifdef CONFIG_STACKLEAK_METRICS
+	ONE("stack_depth", S_IRUGO, proc_stack_depth),
+#endif
 };
 
 static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c1a23acd24e7..ae9d10e14b82 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1194,6 +1194,7 @@ struct task_struct {
 
 #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
 	unsigned long			lowest_stack;
+	unsigned long			prev_lowest_stack;
 #endif
 
 	/*
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
index 628c2b947b89..b911b973d328 100644
--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -18,6 +18,9 @@
 static inline void stackleak_task_init(struct task_struct *t)
 {
 	t->lowest_stack = (unsigned long)end_of_stack(t) + sizeof(unsigned long);
+# ifdef CONFIG_STACKLEAK_METRICS
+	t->prev_lowest_stack = t->lowest_stack;
+# endif
 }
 #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
 static inline void stackleak_task_init(struct task_struct *t) { }
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index 628485db37ba..f66239572c89 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -41,6 +41,10 @@ asmlinkage void stackleak_erase(void)
 	if (kstack_ptr == boundary)
 		kstack_ptr += sizeof(unsigned long);
 
+#ifdef CONFIG_STACKLEAK_METRICS
+	current->prev_lowest_stack = kstack_ptr;
+#endif
+
 	/*
 	 * Now write the poison value to the kernel stack. Start from
 	 * 'kstack_ptr' and move up till the new 'boundary'. We assume that
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index c65fdd823591..b0a015ef5268 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -170,4 +170,16 @@ config STACKLEAK_TRACK_MIN_SIZE
 	  a stack frame size greater than or equal to this parameter.
 	  If unsure, leave the default value 100.
 
+config STACKLEAK_METRICS
+	bool "Show STACKLEAK metrics in the /proc file system"
+	depends on GCC_PLUGIN_STACKLEAK
+	depends on PROC_FS
+	help
+	  If this is set, STACKLEAK metrics for every task are available in
+	  the /proc file system. In particular, /proc/<pid>/stack_depth
+	  shows the maximum kernel stack consumption for the current and
+	  previous syscalls. Although this information is not precise, it
+	  can be useful for estimating the STACKLEAK performance impact for
+	  your workloads.
+
 endif
-- 
cgit v1.2.3


From 964c9dff0091893a9a74a88edf984c6da0b779f7 Mon Sep 17 00:00:00 2001
From: Alexander Popov <alex.popov@linux.com>
Date: Fri, 17 Aug 2018 01:17:03 +0300
Subject: stackleak: Allow runtime disabling of kernel stack erasing

Introduce CONFIG_STACKLEAK_RUNTIME_DISABLE option, which provides
'stack_erasing' sysctl. It can be used in runtime to control kernel
stack erasing for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Alexander Popov <alex.popov@linux.com>
Tested-by: Laura Abbott <labbott@redhat.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/sysctl/kernel.txt | 18 ++++++++++++++++++
 include/linux/stackleak.h       |  6 ++++++
 kernel/stackleak.c              | 38 ++++++++++++++++++++++++++++++++++++++
 kernel/sysctl.c                 | 15 ++++++++++++++-
 scripts/gcc-plugins/Kconfig     |  8 ++++++++
 5 files changed, 84 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 37a679501ddc..1b8775298cf7 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -89,6 +89,7 @@ show up in /proc/sys/kernel:
 - shmmni
 - softlockup_all_cpu_backtrace
 - soft_watchdog
+- stack_erasing
 - stop-a                      [ SPARC only ]
 - sysrq                       ==> Documentation/admin-guide/sysrq.rst
 - sysctl_writes_strict
@@ -987,6 +988,23 @@ detect a hard lockup condition.
 
 ==============================================================
 
+stack_erasing
+
+This parameter can be used to control kernel stack erasing at the end
+of syscalls for kernels built with CONFIG_GCC_PLUGIN_STACKLEAK.
+
+That erasing reduces the information which kernel stack leak bugs
+can reveal and blocks some uninitialized stack variable attacks.
+The tradeoff is the performance impact: on a single CPU system kernel
+compilation sees a 1% slowdown, other systems and workloads may vary.
+
+  0: kernel stack erasing is disabled, STACKLEAK_METRICS are not updated.
+
+  1: kernel stack erasing is enabled (default), it is performed before
+     returning to the userspace at the end of syscalls.
+
+==============================================================
+
 tainted:
 
 Non-zero if the kernel has been tainted. Numeric values, which can be
diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h
index b911b973d328..3d5c3271a9a8 100644
--- a/include/linux/stackleak.h
+++ b/include/linux/stackleak.h
@@ -22,6 +22,12 @@ static inline void stackleak_task_init(struct task_struct *t)
 	t->prev_lowest_stack = t->lowest_stack;
 # endif
 }
+
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+int stack_erasing_sysctl(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
 #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */
 static inline void stackleak_task_init(struct task_struct *t) { }
 #endif
diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index f66239572c89..e42892926244 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -12,6 +12,41 @@
 
 #include <linux/stackleak.h>
 
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+#include <linux/jump_label.h>
+#include <linux/sysctl.h>
+
+static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass);
+
+int stack_erasing_sysctl(struct ctl_table *table, int write,
+			void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret = 0;
+	int state = !static_branch_unlikely(&stack_erasing_bypass);
+	int prev_state = state;
+
+	table->data = &state;
+	table->maxlen = sizeof(int);
+	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	state = !!state;
+	if (ret || !write || state == prev_state)
+		return ret;
+
+	if (state)
+		static_branch_disable(&stack_erasing_bypass);
+	else
+		static_branch_enable(&stack_erasing_bypass);
+
+	pr_warn("stackleak: kernel stack erasing is %s\n",
+					state ? "enabled" : "disabled");
+	return ret;
+}
+
+#define skip_erasing()	static_branch_unlikely(&stack_erasing_bypass)
+#else
+#define skip_erasing()	false
+#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
+
 asmlinkage void stackleak_erase(void)
 {
 	/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
@@ -20,6 +55,9 @@ asmlinkage void stackleak_erase(void)
 	unsigned int poison_count = 0;
 	const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
 
+	if (skip_erasing())
+		return;
+
 	/* Check that 'lowest_stack' value is sane */
 	if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
 		kstack_ptr = boundary;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cc02050fd0c4..3ae223f7b5df 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -91,7 +91,9 @@
 #ifdef CONFIG_CHR_DEV_SG
 #include <scsi/sg.h>
 #endif
-
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+#include <linux/stackleak.h>
+#endif
 #ifdef CONFIG_LOCKUP_DETECTOR
 #include <linux/nmi.h>
 #endif
@@ -1232,6 +1234,17 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+#endif
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+	{
+		.procname	= "stack_erasing",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= stack_erasing_sysctl,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 #endif
 	{ }
 };
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index b0a015ef5268..0d5c799688f0 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -182,4 +182,12 @@ config STACKLEAK_METRICS
 	  can be useful for estimating the STACKLEAK performance impact for
 	  your workloads.
 
+config STACKLEAK_RUNTIME_DISABLE
+	bool "Allow runtime disabling of kernel stack erasing"
+	depends on GCC_PLUGIN_STACKLEAK
+	help
+	  This option provides 'stack_erasing' sysctl, which can be used in
+	  runtime to control kernel stack erasing for kernels built with
+	  CONFIG_GCC_PLUGIN_STACKLEAK.
+
 endif
-- 
cgit v1.2.3


From 0d6c3011409135ea84e2a231b013a22017ff999a Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 4 Sep 2018 15:31:14 +0200
Subject: HID: core: fix grouping by application

commit f07b3c1da92d ("HID: generic: create one input report per
application type") was effectively the same as MULTI_INPUT:
hidinput->report was never set, so hidinput_match_application()
always returned null.

Fix that by testing against the real application.

Note that this breaks some old eGalax touchscreens that expect MULTI_INPUT
instead of HID_QUIRK_INPUT_PER_APP. Enable this quirk for backward
compatibility on all non-Win8 touchscreens.

link: https://bugzilla.kernel.org/show_bug.cgi?id=200847
link: https://bugzilla.kernel.org/show_bug.cgi?id=200849
link: https://bugs.archlinux.org/task/59699
link: https://github.com/NixOS/nixpkgs/issues/45165

Cc: stable@vger.kernel.org # v4.18+
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c      | 4 ++--
 drivers/hid/hid-multitouch.c | 3 +++
 include/linux/hid.h          | 1 +
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index ac201817a2dd..a481eaf39e88 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1582,6 +1582,7 @@ static struct hid_input *hidinput_allocate(struct hid_device *hid,
 	input_dev->dev.parent = &hid->dev;
 
 	hidinput->input = input_dev;
+	hidinput->application = application;
 	list_add_tail(&hidinput->list, &hid->inputs);
 
 	INIT_LIST_HEAD(&hidinput->reports);
@@ -1677,8 +1678,7 @@ static struct hid_input *hidinput_match_application(struct hid_report *report)
 	struct hid_input *hidinput;
 
 	list_for_each_entry(hidinput, &hid->inputs, list) {
-		if (hidinput->report &&
-		    hidinput->report->application == report->application)
+		if (hidinput->application == report->application)
 			return hidinput;
 	}
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 88da991ef256..da954f3f4da7 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1697,6 +1697,9 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
 	 */
 	hdev->quirks |= HID_QUIRK_INPUT_PER_APP;
 
+	if (id->group != HID_GROUP_MULTITOUCH_WIN_8)
+		hdev->quirks |= HID_QUIRK_MULTI_INPUT;
+
 	timer_setup(&td->release_timer, mt_expired_timeout, 0);
 
 	ret = hid_parse(hdev);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 834e6461a690..d44a78362942 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -526,6 +526,7 @@ struct hid_input {
 	const char *name;
 	bool registered;
 	struct list_head reports;	/* the list of reports */
+	unsigned int application;	/* application usage for this input */
 };
 
 enum hid_type {
-- 
cgit v1.2.3


From 7525c9518ea6feabc8154956df0891a59a69d289 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 27 Jul 2018 23:05:42 +0200
Subject: mtd: rawnand: Get rid of the ->read_word() hook

Commit c120e75e0e7d ("mtd: nand: use read_oob() instead of cmdfunc()
for bad block check") removed this only user of the ->read_word()
method but kept the hook in place. Remove it now.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c |  9 ---------
 drivers/mtd/nand/raw/au1550nd.c              | 15 ---------------
 drivers/mtd/nand/raw/denali.c                | 10 ----------
 drivers/mtd/nand/raw/hisi504_nand.c          | 10 ----------
 drivers/mtd/nand/raw/mpc5121_nfc.c           | 11 -----------
 drivers/mtd/nand/raw/mxc_nand.c              | 13 -------------
 drivers/mtd/nand/raw/nand_base.c             | 14 --------------
 drivers/mtd/nand/raw/nandsim.c               | 10 ----------
 drivers/mtd/nand/raw/sh_flctl.c              | 10 ----------
 drivers/mtd/nand/raw/socrates_nand.c         | 12 ------------
 include/linux/mtd/rawnand.h                  |  2 --
 11 files changed, 116 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index a068b214ebaa..525e1325841d 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -418,14 +418,6 @@ static u8 atmel_nand_read_byte(struct mtd_info *mtd)
 	return ioread8(nand->activecs->io.virt);
 }
 
-static u16 atmel_nand_read_word(struct mtd_info *mtd)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct atmel_nand *nand = to_atmel_nand(chip);
-
-	return ioread16(nand->activecs->io.virt);
-}
-
 static void atmel_nand_write_byte(struct mtd_info *mtd, u8 byte)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -1500,7 +1492,6 @@ static void atmel_nand_init(struct atmel_nand_controller *nc,
 
 	chip->cmd_ctrl = atmel_nand_cmd_ctrl;
 	chip->read_byte = atmel_nand_read_byte;
-	chip->read_word = atmel_nand_read_word;
 	chip->write_byte = atmel_nand_write_byte;
 	chip->read_buf = atmel_nand_read_buf;
 	chip->write_buf = atmel_nand_write_buf;
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 35f5c84cd331..32c0440141fb 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -83,20 +83,6 @@ static void au_write_byte16(struct mtd_info *mtd, u_char byte)
 	wmb(); /* drain writebuffer */
 }
 
-/**
- * au_read_word -  read one word from the chip
- * @mtd:	MTD device structure
- *
- * read function for 16bit buswidth without endianness conversion
- */
-static u16 au_read_word(struct mtd_info *mtd)
-{
-	struct nand_chip *this = mtd_to_nand(mtd);
-	u16 ret = readw(this->IO_ADDR_R);
-	wmb(); /* drain writebuffer */
-	return ret;
-}
-
 /**
  * au_write_buf -  write buffer to chip
  * @mtd:	MTD device structure
@@ -462,7 +448,6 @@ static int au1550nd_probe(struct platform_device *pdev)
 
 	this->read_byte = (pd->devwidth) ? au_read_byte16 : au_read_byte;
 	ctx->write_byte = (pd->devwidth) ? au_write_byte16 : au_write_byte;
-	this->read_word = au_read_word;
 	this->write_buf = (pd->devwidth) ? au_write_buf16 : au_write_buf;
 	this->read_buf = (pd->devwidth) ? au_read_buf16 : au_read_buf;
 
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 67b2065e7a19..5ff2ca163884 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -279,15 +279,6 @@ static void denali_write_byte(struct mtd_info *mtd, uint8_t byte)
 	denali_write_buf(mtd, &byte, 1);
 }
 
-static uint16_t denali_read_word(struct mtd_info *mtd)
-{
-	uint16_t word;
-
-	denali_read_buf16(mtd, (uint8_t *)&word, 2);
-
-	return word;
-}
-
 static void denali_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
@@ -1354,7 +1345,6 @@ int denali_init(struct denali_nand_info *denali)
 	chip->select_chip = denali_select_chip;
 	chip->read_byte = denali_read_byte;
 	chip->write_byte = denali_write_byte;
-	chip->read_word = denali_read_word;
 	chip->cmd_ctrl = denali_cmd_ctrl;
 	chip->dev_ready = denali_dev_ready;
 	chip->waitfunc = denali_waitfunc;
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 950dc7789296..0f5c48aa5673 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -380,15 +380,6 @@ static uint8_t hisi_nfc_read_byte(struct mtd_info *mtd)
 	return *(uint8_t *)(host->buffer + host->offset - 1);
 }
 
-static u16 hisi_nfc_read_word(struct mtd_info *mtd)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct hinfc_host *host = nand_get_controller_data(chip);
-
-	host->offset += 2;
-	return *(u16 *)(host->buffer + host->offset - 2);
-}
-
 static void
 hisi_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 {
@@ -795,7 +786,6 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 	chip->cmdfunc		= hisi_nfc_cmdfunc;
 	chip->select_chip	= hisi_nfc_select_chip;
 	chip->read_byte		= hisi_nfc_read_byte;
-	chip->read_word		= hisi_nfc_read_word;
 	chip->write_buf		= hisi_nfc_write_buf;
 	chip->read_buf		= hisi_nfc_read_buf;
 	chip->chip_delay	= HINFC504_CHIP_DELAY;
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index 6d1740d54e0d..c0be9c30b4cf 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -515,16 +515,6 @@ static u8 mpc5121_nfc_read_byte(struct mtd_info *mtd)
 	return tmp;
 }
 
-/* Read word from NFC buffers */
-static u16 mpc5121_nfc_read_word(struct mtd_info *mtd)
-{
-	u16 tmp;
-
-	mpc5121_nfc_read_buf(mtd, (u_char *)&tmp, sizeof(tmp));
-
-	return tmp;
-}
-
 /*
  * Read NFC configuration from Reset Config Word
  *
@@ -703,7 +693,6 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	chip->dev_ready = mpc5121_nfc_dev_ready;
 	chip->cmdfunc = mpc5121_nfc_command;
 	chip->read_byte = mpc5121_nfc_read_byte;
-	chip->read_word = mpc5121_nfc_read_word;
 	chip->read_buf = mpc5121_nfc_read_buf;
 	chip->write_buf = mpc5121_nfc_write_buf;
 	chip->select_chip = mpc5121_nfc_select_chip;
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 4c9214dea424..a0884a621053 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -921,18 +921,6 @@ static u_char mxc_nand_read_byte(struct mtd_info *mtd)
 	return ret;
 }
 
-static uint16_t mxc_nand_read_word(struct mtd_info *mtd)
-{
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
-	uint16_t ret;
-
-	ret = *(uint16_t *)(host->data_buf + host->buf_start);
-	host->buf_start += 2;
-
-	return ret;
-}
-
 /* Write data of length len to buffer buf. The data to be
  * written on NAND Flash is first copied to RAMbuffer. After the Data Input
  * Operation by the NFC, the data is written to NAND Flash */
@@ -1793,7 +1781,6 @@ static int mxcnd_probe(struct platform_device *pdev)
 	this->dev_ready = mxc_nand_dev_ready;
 	this->cmdfunc = mxc_nand_command;
 	this->read_byte = mxc_nand_read_byte;
-	this->read_word = mxc_nand_read_word;
 	this->write_buf = mxc_nand_write_buf;
 	this->read_buf = mxc_nand_read_buf;
 	this->set_features = mxc_nand_set_features;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index e591410cddc9..306ef6ec41db 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -277,18 +277,6 @@ static uint8_t nand_read_byte16(struct mtd_info *mtd)
 	return (uint8_t) cpu_to_le16(readw(chip->IO_ADDR_R));
 }
 
-/**
- * nand_read_word - [DEFAULT] read one word from the chip
- * @mtd: MTD device structure
- *
- * Default read function for 16bit buswidth without endianness conversion.
- */
-static u16 nand_read_word(struct mtd_info *mtd)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	return readw(chip->IO_ADDR_R);
-}
-
 /**
  * nand_select_chip - [DEFAULT] control CE line
  * @mtd: MTD device structure
@@ -5007,8 +4995,6 @@ static void nand_set_defaults(struct nand_chip *chip)
 	/* If called twice, pointers that depend on busw may need to be reset */
 	if (!chip->read_byte || chip->read_byte == nand_read_byte)
 		chip->read_byte = busw ? nand_read_byte16 : nand_read_byte;
-	if (!chip->read_word)
-		chip->read_word = nand_read_word;
 	if (!chip->block_bad)
 		chip->block_bad = nand_block_bad;
 	if (!chip->block_markbad)
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index 71ac034aee9c..47a81d1b1397 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2108,15 +2108,6 @@ static int ns_device_ready(struct mtd_info *mtd)
 	return 1;
 }
 
-static uint16_t ns_nand_read_word(struct mtd_info *mtd)
-{
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
-	NS_DBG("read_word\n");
-
-	return chip->read_byte(mtd) | (chip->read_byte(mtd) << 8);
-}
-
 static void ns_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
@@ -2267,7 +2258,6 @@ static int __init ns_init_module(void)
 	chip->dev_ready  = ns_device_ready;
 	chip->write_buf  = ns_nand_write_buf;
 	chip->read_buf   = ns_nand_read_buf;
-	chip->read_word  = ns_nand_read_word;
 	chip->ecc.mode   = NAND_ECC_SOFT;
 	chip->ecc.algo   = NAND_ECC_HAMMING;
 	/* The NAND_SKIP_BBTSCAN option is necessary for 'overridesize' */
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index bb8866e05ff7..ef3036d9bf15 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -985,15 +985,6 @@ static uint8_t flctl_read_byte(struct mtd_info *mtd)
 	return data;
 }
 
-static uint16_t flctl_read_word(struct mtd_info *mtd)
-{
-	struct sh_flctl *flctl = mtd_to_flctl(mtd);
-	uint16_t *buf = (uint16_t *)&flctl->done_buff[flctl->index];
-
-	flctl->index += 2;
-	return *buf;
-}
-
 static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 {
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
@@ -1186,7 +1177,6 @@ static int flctl_probe(struct platform_device *pdev)
 	nand->chip_delay = 20;
 
 	nand->read_byte = flctl_read_byte;
-	nand->read_word = flctl_read_word;
 	nand->write_buf = flctl_write_buf;
 	nand->read_buf = flctl_read_buf;
 	nand->select_chip = flctl_select_chip;
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 9824a9923583..36c45aa21f66 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -85,17 +85,6 @@ static uint8_t socrates_nand_read_byte(struct mtd_info *mtd)
 	return byte;
 }
 
-/**
- * socrates_nand_read_word -  read one word from the chip
- * @mtd:	MTD device structure
- */
-static uint16_t socrates_nand_read_word(struct mtd_info *mtd)
-{
-	uint16_t word;
-	socrates_nand_read_buf(mtd, (uint8_t *)&word, sizeof(word));
-	return word;
-}
-
 /*
  * Hardware specific access to control-lines
  */
@@ -172,7 +161,6 @@ static int socrates_nand_probe(struct platform_device *ofdev)
 
 	nand_chip->cmd_ctrl = socrates_nand_cmd_ctrl;
 	nand_chip->read_byte = socrates_nand_read_byte;
-	nand_chip->read_word = socrates_nand_read_word;
 	nand_chip->write_buf = socrates_nand_write_buf;
 	nand_chip->read_buf = socrates_nand_read_buf;
 	nand_chip->dev_ready = socrates_nand_device_ready;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index efb2345359bb..d155470f53c8 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1182,7 +1182,6 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * @IO_ADDR_W:		[BOARDSPECIFIC] address to write the 8 I/O lines of the
  *			flash device.
  * @read_byte:		[REPLACEABLE] read one byte from the chip
- * @read_word:		[REPLACEABLE] read one word from the chip
  * @write_byte:		[REPLACEABLE] write a single byte to the chip on the
  *			low 8 I/O lines
  * @write_buf:		[REPLACEABLE] write data from the buffer to the chip
@@ -1287,7 +1286,6 @@ struct nand_chip {
 	void __iomem *IO_ADDR_W;
 
 	uint8_t (*read_byte)(struct mtd_info *mtd);
-	u16 (*read_word)(struct mtd_info *mtd);
 	void (*write_byte)(struct mtd_info *mtd, uint8_t byte);
 	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
 	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
-- 
cgit v1.2.3


From ff8648f29fe58c2d94d32a076d2de7b92be4b485 Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Mon, 13 Aug 2018 09:18:46 +0200
Subject: mtd: rawnand: fsl_ifc: fixup SRAM init for newer ctrl versions

Newer versions of the IFC controller use a different method of initializing the
internal SRAM: Instead of reading from flash, a bit in the NAND configuration
register has to be set in order to trigger the self-initializing process.

Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/fsl_ifc_nand.c | 33 ++++++++++++++++++++++++++-------
 include/linux/fsl_ifc.h             |  2 ++
 2 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index e4f5792dc589..7e7729df7827 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -30,6 +30,7 @@
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/fsl_ifc.h>
+#include <linux/iopoll.h>
 
 #define ERR_BYTE		0xFF /* Value returned for read
 					bytes when read failed	*/
@@ -769,6 +770,27 @@ static int fsl_ifc_sram_init(struct fsl_ifc_mtd *priv)
 	uint32_t csor = 0, csor_8k = 0, csor_ext = 0;
 	uint32_t cs = priv->bank;
 
+	if (ctrl->version < FSL_IFC_VERSION_1_1_0)
+		return 0;
+
+	if (ctrl->version > FSL_IFC_VERSION_1_1_0) {
+		u32 ncfgr, status;
+		int ret;
+
+		/* Trigger auto initialization */
+		ncfgr = ifc_in32(&ifc_runtime->ifc_nand.ncfgr);
+		ifc_out32(ncfgr | IFC_NAND_NCFGR_SRAM_INIT_EN, &ifc_runtime->ifc_nand.ncfgr);
+
+		/* Wait until done */
+		ret = readx_poll_timeout(ifc_in32, &ifc_runtime->ifc_nand.ncfgr,
+					 status, !(status & IFC_NAND_NCFGR_SRAM_INIT_EN),
+					 10, IFC_TIMEOUT_MSECS * 1000);
+		if (ret)
+			dev_err(priv->dev, "Failed to initialize SRAM!\n");
+
+		return ret;
+	}
+
 	/* Save CSOR and CSOR_ext */
 	csor = ifc_in32(&ifc_global->csor_cs[cs].csor);
 	csor_ext = ifc_in32(&ifc_global->csor_cs[cs].csor_ext);
@@ -825,6 +847,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	struct nand_chip *chip = &priv->chip;
 	struct mtd_info *mtd = nand_to_mtd(&priv->chip);
 	u32 csor;
+	int ret;
 
 	/* Fill in fsl_ifc_mtd structure */
 	mtd->dev.parent = priv->dev;
@@ -918,13 +941,9 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 		chip->ecc.algo = NAND_ECC_HAMMING;
 	}
 
-	if (ctrl->version >= FSL_IFC_VERSION_1_1_0) {
-		int ret;
-
-		ret = fsl_ifc_sram_init(priv);
-		if (ret)
-			return ret;
-	}
+	ret = fsl_ifc_sram_init(priv);
+	if (ret)
+		return ret;
 
 	/*
 	 * As IFC version 2.0.0 has 16KB of internal SRAM as compared to older
diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h
index 3fdfede2f0f3..5f343b796ad9 100644
--- a/include/linux/fsl_ifc.h
+++ b/include/linux/fsl_ifc.h
@@ -274,6 +274,8 @@
  */
 /* Auto Boot Mode */
 #define IFC_NAND_NCFGR_BOOT		0x80000000
+/* SRAM Initialization */
+#define IFC_NAND_NCFGR_SRAM_INIT_EN	0x20000000
 /* Addressing Mode-ROW0+n/COL0 */
 #define IFC_NAND_NCFGR_ADDR_MODE_RC0	0x00000000
 /* Addressing Mode-ROW0+n/COL0+n */
-- 
cgit v1.2.3


From 8ce78257965e6cd49720e653867e766ecd38883f Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:41 +0300
Subject: net/mlx5: Add proper NIC TX steering flow tables support

Extend the ability to add steering rules to NIC TX flow tables.
For now, we are only adding TX bypass (egress) which is used by the RDMA
side. This will allow to shape outgoing traffic and tweak it if needed, for
example performing encapsulation or rewriting headers.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 51 +++++++++++++++++------
 include/linux/mlx5/device.h                       |  6 +++
 3 files changed, 46 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 8e01f818021b..28c7301e08f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -760,8 +760,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
 	case FS_FT_FDB:
 	case FS_FT_SNIFFER_RX:
 	case FS_FT_SNIFFER_TX:
-		return mlx5_fs_cmd_get_fw_cmds();
 	case FS_FT_NIC_TX:
+		return mlx5_fs_cmd_get_fw_cmds();
 	default:
 		return mlx5_fs_cmd_get_stub_cmds();
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 5624030d2ed4..b7e7eb3535c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -76,6 +76,14 @@
 					   FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
 					   FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
 
+#define FS_CHAINING_CAPS_EGRESS                                                \
+	FS_REQUIRED_CAPS(                                                      \
+		FS_CAP(flow_table_properties_nic_transmit.flow_modify_en),     \
+		FS_CAP(flow_table_properties_nic_transmit.modify_root),        \
+		FS_CAP(flow_table_properties_nic_transmit                      \
+			       .identified_miss_table_mode),                   \
+		FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
+
 #define LEFTOVERS_NUM_LEVELS 1
 #define LEFTOVERS_NUM_PRIOS 1
 
@@ -151,6 +159,17 @@ static struct init_tree_node {
 	}
 };
 
+static struct init_tree_node egress_root_fs = {
+	.type = FS_TYPE_NAMESPACE,
+	.ar_size = 1,
+	.children = (struct init_tree_node[]) {
+		ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+			 FS_CHAINING_CAPS_EGRESS,
+			 ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+						  BY_PASS_PRIO_NUM_LEVELS))),
+	}
+};
+
 enum fs_i_lock_class {
 	FS_LOCK_GRANDPARENT,
 	FS_LOCK_PARENT,
@@ -1978,7 +1997,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 {
 	struct mlx5_flow_steering *steering = dev->priv.steering;
 	struct mlx5_flow_root_namespace *root_ns;
-	int prio;
+	int prio = 0;
 	struct fs_prio *fs_prio;
 	struct mlx5_flow_namespace *ns;
 
@@ -1998,16 +2017,17 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 		if (steering->sniffer_tx_root_ns)
 			return &steering->sniffer_tx_root_ns->ns;
 		return NULL;
-	case MLX5_FLOW_NAMESPACE_EGRESS:
-		if (steering->egress_root_ns)
-			return &steering->egress_root_ns->ns;
-		return NULL;
 	default:
 		break;
 	}
 
-	root_ns = steering->root_ns;
-	prio = type;
+	if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+		root_ns = steering->egress_root_ns;
+	} else { /* Must be NIC RX */
+		root_ns = steering->root_ns;
+		prio = type;
+	}
+
 	if (!root_ns)
 		return NULL;
 
@@ -2523,16 +2543,23 @@ cleanup_root_ns:
 
 static int init_egress_root_ns(struct mlx5_flow_steering *steering)
 {
-	struct fs_prio *prio;
+	int err;
 
 	steering->egress_root_ns = create_root_ns(steering,
 						  FS_FT_NIC_TX);
 	if (!steering->egress_root_ns)
 		return -ENOMEM;
 
-	/* create 1 prio*/
-	prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
-	return PTR_ERR_OR_ZERO(prio);
+	err = init_root_tree(steering, &egress_root_fs,
+			     &steering->egress_root_ns->ns.node);
+	if (err)
+		goto cleanup;
+	set_prio_attrs(steering->egress_root_ns);
+	return 0;
+cleanup:
+	cleanup_root_ns(steering->egress_root_ns);
+	steering->egress_root_ns = NULL;
+	return err;
 }
 
 int mlx5_init_fs(struct mlx5_core_dev *dev)
@@ -2600,7 +2627,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
-	if (MLX5_IPSEC_DEV(dev)) {
+	if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
 		err = init_egress_root_ns(steering);
 		if (err)
 			goto err;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 11fa4e66afc5..f2281e69ab39 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1120,6 +1120,12 @@ enum mlx5_qcam_feature_groups {
 #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \
 	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap)
 
+#define MLX5_CAP_FLOWTABLE_NIC_TX(mdev, cap) \
+		MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit.cap)
+
+#define MLX5_CAP_FLOWTABLE_NIC_TX_MAX(mdev, cap) \
+	MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit.cap)
+
 #define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \
 	MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap)
 
-- 
cgit v1.2.3


From 90c1d1b8da67330b09893d749401a45328b51704 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:42 +0300
Subject: net/mlx5: Export modify header alloc/dealloc functions

Those functions will be used by the RDMA side to create modify header
actions to be attached to flow steering rules via verbs.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c    | 2 ++
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 5 -----
 include/linux/mlx5/fs.h                             | 6 ++++++
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 28c7301e08f4..37bea30b68ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -702,6 +702,7 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 	kfree(in);
 	return err;
 }
+EXPORT_SYMBOL(mlx5_modify_header_alloc);
 
 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
 {
@@ -716,6 +717,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_modify_header_dealloc);
 
 static const struct mlx5_flow_cmds mlx5_flow_cmds = {
 	.create_flow_table = mlx5_cmd_create_flow_table,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index b4134fa0bba3..649d1bd83a1a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -176,11 +176,6 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev,
 		     u32 *encap_id);
 void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
 
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
-			     u8 namespace, u8 num_actions,
-			     void *modify_actions, u32 *modify_header_id);
-void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id);
-
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
 
 int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size);
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 804516e4f483..0cbf4d5cb1ab 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -196,4 +196,10 @@ int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 
+int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
+			     u8 namespace, u8 num_actions,
+			     void *modify_actions, u32 *modify_header_id);
+void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
+				u32 modify_header_id);
+
 #endif
-- 
cgit v1.2.3


From 61444b458b01c95e55003d6f0b4d4c936fde51cb Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:44 +0300
Subject: net/mlx5: Break encap/decap into two separated flow table creation
 flags

Today we are able to attach encap and decap actions only to the FDB. In
preparation to enable those actions on the NIC flow tables, break the
single flag into two. Those flags control whatever a decap or encap
operations can be attached to the flow table created. For FDB, if
encapsulation is required, we set both of them.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c           | 7 ++++---
 include/linux/mlx5/fs.h                                    | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f72b5c9dcfe9..ff21807a0c4b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -529,7 +529,8 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 		esw_size >>= 1;
 
 	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
-		flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
+		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP |
+			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 
 	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
 						  esw_size,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 9ae777e56529..1698f325a21e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,7 +152,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 				      struct mlx5_flow_table *next_ft,
 				      unsigned int *table_id, u32 flags)
 {
-	int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
+	int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP);
+	int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
 	int err;
@@ -169,9 +170,9 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 	}
 
 	MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
-		 en_encap_decap);
+		 en_decap);
 	MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
-		 en_encap_decap);
+		 en_encap);
 
 	switch (op_mod) {
 	case FS_FT_OP_MOD_NORMAL:
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 0cbf4d5cb1ab..0194e62ad66a 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -45,7 +45,8 @@ enum {
 };
 
 enum {
-	MLX5_FLOW_TABLE_TUNNEL_EN = BIT(0),
+	MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP = BIT(0),
+	MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1),
 };
 
 #define LEFTOVERS_RULE_NUM	 2
-- 
cgit v1.2.3


From e0e7a3861b6c6b673dc93e291ef11cf5e746b0c2 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:45 +0300
Subject: net/mlx5: Move header encap type to IFC header file

Those bits are hardware specification and should be defined in the
IFC header file.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 -----
 include/linux/mlx5/mlx5_ifc.h                   | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9131a1376e7d..240a6fe1587e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -100,11 +100,6 @@ struct mlx5e_tc_flow_parse_attr {
 	int mirred_ifindex;
 };
 
-enum {
-	MLX5_HEADER_TYPE_VXLAN = 0x0,
-	MLX5_HEADER_TYPE_NVGRE = 0x1,
-};
-
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f043d65b9bac..bd725e0924e5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -4848,6 +4848,11 @@ struct mlx5_ifc_alloc_encap_header_out_bits {
 	u8         reserved_at_60[0x20];
 };
 
+enum {
+	MLX5_HEADER_TYPE_VXLAN = 0x0,
+	MLX5_HEADER_TYPE_NVGRE = 0x1,
+};
+
 struct mlx5_ifc_alloc_encap_header_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
-- 
cgit v1.2.3


From 60786f0987c0d9354e5330ee11615b16cdb448fe Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:46 +0300
Subject: {net, RDMA}/mlx5: Rename encap to reformat packet

Renames all encap mlx5_{core,ib} code to use the new naming of packet
reformat. This change doesn't introduce any function change and is
needed to properly reflect the operation being done by this action.
For example not only can we encapsulate a packet, but also decapsulate it.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/devx.c                  |  6 +--
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c      |  8 +--
 .../mellanox/mlx5/core/diag/fs_tracepoint.h        |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 43 ++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  2 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  8 +--
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c   | 63 ++++++++++++----------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    | 13 ++---
 include/linux/mlx5/fs.h                            |  4 +-
 include/linux/mlx5/mlx5_ifc.h                      | 50 ++++++++---------
 11 files changed, 107 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index ac116d63e466..25dafa4ff6ca 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -284,7 +284,7 @@ static bool devx_is_obj_create_cmd(const void *in)
 	case MLX5_CMD_OP_CREATE_FLOW_TABLE:
 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
-	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
 	case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
@@ -627,9 +627,9 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
 			 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
 		break;
-	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
-			 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+			 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
 		break;
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
 		MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 6f589b4d33d9..39750fca371d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -308,7 +308,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
-	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT:
 	case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_FPGA_DESTROY_QP:
 	case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
@@ -427,7 +427,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
-	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
 	case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
 	case MLX5_CMD_OP_FPGA_CREATE_QP:
 	case MLX5_CMD_OP_FPGA_MODIFY_QP:
@@ -601,8 +601,8 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
 	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
-	MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
-	MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+	MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT);
 	MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 0240aee9189e..e83dda441a81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -133,7 +133,7 @@ TRACE_EVENT(mlx5_fs_del_fg,
 	{MLX5_FLOW_CONTEXT_ACTION_DROP,		 "DROP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,	 "FWD"},\
 	{MLX5_FLOW_CONTEXT_ACTION_COUNT,	 "CNT"},\
-	{MLX5_FLOW_CONTEXT_ACTION_ENCAP,	 "ENCAP"},\
+	{MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\
 	{MLX5_FLOW_CONTEXT_ACTION_DECAP,	 "DECAP"},\
 	{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR,	 "MOD_HDR"},\
 	{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH,	 "VLAN_PUSH"},\
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 240a6fe1587e..3df8f2b90908 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -681,7 +681,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 		.action = attr->action,
 		.has_flow_tag = true,
 		.flow_tag = attr->flow_tag,
-		.encap_id = 0,
+		.reformat_id = 0,
 	};
 	struct mlx5_fc *counter = NULL;
 	struct mlx5_flow_handle *rule;
@@ -829,7 +829,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	struct mlx5e_priv *out_priv;
 	int err;
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
 		out_dev = __dev_get_by_index(dev_net(priv->netdev),
 					     attr->parse_attr->mirred_ifindex);
 		err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
@@ -885,7 +885,7 @@ err_add_rule:
 err_mod_hdr:
 	mlx5_eswitch_del_vlan_action(esw, attr);
 err_add_vlan:
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
 		mlx5e_detach_encap(priv, flow);
 err_attach_encap:
 	return rule;
@@ -906,7 +906,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 
 	mlx5_eswitch_del_vlan_action(esw, attr);
 
-	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
 		mlx5e_detach_encap(priv, flow);
 		kvfree(attr->parse_attr);
 	}
@@ -923,9 +923,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 	struct mlx5e_tc_flow *flow;
 	int err;
 
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       e->encap_size, e->encap_header,
-			       &e->encap_id);
+	err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+					 e->encap_size, e->encap_header,
+					 &e->encap_id);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
 			       err);
@@ -979,7 +979,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 
 	if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
 		e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
-		mlx5_encap_dealloc(priv->mdev, e->encap_id);
+		mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
 	}
 }
 
@@ -1048,7 +1048,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 		mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
 
 		if (e->flags & MLX5_ENCAP_ENTRY_VALID)
-			mlx5_encap_dealloc(priv->mdev, e->encap_id);
+			mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
 
 		hash_del_rcu(&e->encap_hlist);
 		kfree(e->encap_header);
@@ -2323,7 +2323,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 		return -ENOMEM;
 
 	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 		fl4.flowi4_proto = IPPROTO_UDP;
 		fl4.fl4_dport = tun_key->tp_dst;
 		break;
@@ -2367,7 +2367,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 	read_unlock_bh(&n->lock);
 
 	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 		gen_vxlan_header_ipv4(out_dev, encap_header,
 				      ipv4_encap_size, e->h_dest, tos, ttl,
 				      fl4.daddr,
@@ -2387,8 +2387,9 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       ipv4_encap_size, encap_header, &e->encap_id);
+	err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+					 ipv4_encap_size, encap_header,
+					 &e->encap_id);
 	if (err)
 		goto destroy_neigh_entry;
 
@@ -2432,7 +2433,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 		return -ENOMEM;
 
 	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 		fl6.flowi6_proto = IPPROTO_UDP;
 		fl6.fl6_dport = tun_key->tp_dst;
 		break;
@@ -2476,7 +2477,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 	read_unlock_bh(&n->lock);
 
 	switch (e->tunnel_type) {
-	case MLX5_HEADER_TYPE_VXLAN:
+	case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
 		gen_vxlan_header_ipv6(out_dev, encap_header,
 				      ipv6_encap_size, e->h_dest, tos, ttl,
 				      &fl6.daddr,
@@ -2497,8 +2498,9 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
-			       ipv6_encap_size, encap_header, &e->encap_id);
+	err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+					 ipv6_encap_size, encap_header,
+					 &e->encap_id);
 	if (err)
 		goto destroy_neigh_entry;
 
@@ -2546,7 +2548,7 @@ vxlan_encap_offload_err:
 
 	if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
 	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
-		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
+		tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
 	} else {
 		netdev_warn(priv->netdev,
 			    "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
@@ -2721,7 +2723,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				parse_attr->mirred_ifindex = out_dev->ifindex;
 				parse_attr->tun_info = *info;
 				attr->parse_attr = parse_attr;
-				action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+				action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
 					  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 					  MLX5_FLOW_CONTEXT_ACTION_COUNT;
 				/* attr->out_rep is resolved when we handle encap */
@@ -2867,7 +2869,8 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
 		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
 
 	if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
-	    !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
+	    !(flow->esw_attr->action &
+	      MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT))
 		kvfree(parse_attr);
 
 	err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 2b252cde5cc2..525b7e43b298 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1746,7 +1746,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	esw->enabled_vports = 0;
 	esw->mode = SRIOV_NONE;
 	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
-	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
 	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
 		esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
 	else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index ff21807a0c4b..00ec6dd72080 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -127,8 +127,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		flow_act.modify_id = attr->mod_hdr_id;
 
-	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
-		flow_act.encap_id = attr->encap_id;
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+		flow_act.reformat_id = attr->encap_id;
 
 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
@@ -529,7 +529,7 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
 		esw_size >>= 1;
 
 	if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
-		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP |
+		flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
 			  MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 
 	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
@@ -1245,7 +1245,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
 		return err;
 
 	if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
-	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
+	    (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
 	     !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
 		return -EOPNOTSUPP;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 1698f325a21e..4539b709db20 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,7 +152,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 				      struct mlx5_flow_table *next_ft,
 				      unsigned int *table_id, u32 flags)
 {
-	int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP);
+	int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
 	int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
@@ -171,7 +171,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 
 	MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
 		 en_decap);
-	MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
+	MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
 		 en_encap);
 
 	switch (op_mod) {
@@ -344,7 +344,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 
 	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
 	MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
-	MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+	MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+		 fte->action.reformat_id);
 	MLX5_SET(flow_context, in_flow_context, modify_header_id,
 		 fte->action.modify_id);
 
@@ -595,16 +596,16 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
 	*bytes = MLX5_GET64(traffic_counter, stats, octets);
 }
 
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
-		     int header_type,
-		     size_t size,
-		     void *encap_header,
-		     u32 *encap_id)
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+			       int reformat_type,
+			       size_t size,
+			       void *reformat_data,
+			       u32 *packet_reformat_id)
 {
 	int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
-	u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
-	void *encap_header_in;
-	void *header;
+	u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+	void *packet_reformat_context_in;
+	void *reformat;
 	int inlen;
 	int err;
 	u32 *in;
@@ -615,39 +616,47 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev,
 		return -EINVAL;
 	}
 
-	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size,
+	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size,
 		     GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
 
-	encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
-	header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
-	inlen = header - (void *)in  + size;
+	packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in,
+						  in, packet_reformat_context);
+	reformat = MLX5_ADDR_OF(packet_reformat_context_in,
+				packet_reformat_context_in,
+				reformat_data);
+	inlen = reformat - (void *)in  + size;
 
 	memset(in, 0, inlen);
-	MLX5_SET(alloc_encap_header_in, in, opcode,
-		 MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
-	MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
-	MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
-	memcpy(header, encap_header, size);
+	MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+		 reformat_data_size, size);
+	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+		 reformat_type, reformat_type);
+	memcpy(reformat, reformat_data, size);
 
 	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
-	*encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+	*packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out,
+				       out, packet_reformat_id);
 	kfree(in);
 	return err;
 }
 
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+				  u32 packet_reformat_id)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
-	u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
+	u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
 
 	memset(in, 0, sizeof(in));
-	MLX5_SET(dealloc_encap_header_in, in, opcode,
-		 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
-	MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+	MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+	MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+		 packet_reformat_id);
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index b7e7eb3535c7..d2b162cfe86b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1407,7 +1407,7 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
 		return false;
 
 	if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP  |
-			     MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+			     MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
 			     MLX5_FLOW_CONTEXT_ACTION_DECAP |
 			     MLX5_FLOW_CONTEXT_ACTION_MOD_HDR  |
 			     MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 649d1bd83a1a..f3c8f51cc9c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -169,12 +169,13 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
 void mlx5_dev_list_lock(void);
 void mlx5_dev_list_unlock(void);
 int mlx5_dev_list_trylock(void);
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
-		     int header_type,
-		     size_t size,
-		     void *encap_header,
-		     u32 *encap_id);
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+			       int reformat_type,
+			       size_t size,
+			       void *reformat_data,
+			       u32 *packet_reformat_id);
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+				  u32 packet_reformat_id);
 
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
 
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 0194e62ad66a..37d0c08d0966 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -45,7 +45,7 @@ enum {
 };
 
 enum {
-	MLX5_FLOW_TABLE_TUNNEL_EN_ENCAP = BIT(0),
+	MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT = BIT(0),
 	MLX5_FLOW_TABLE_TUNNEL_EN_DECAP = BIT(1),
 };
 
@@ -160,7 +160,7 @@ struct mlx5_flow_act {
 	u32 action;
 	bool has_flow_tag;
 	u32 flow_tag;
-	u32 encap_id;
+	u32 reformat_id;
 	u32 modify_id;
 	uintptr_t esp_id;
 	struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index bd725e0924e5..c79eaae28e59 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -243,8 +243,8 @@ enum {
 	MLX5_CMD_OP_DEALLOC_FLOW_COUNTER          = 0x93a,
 	MLX5_CMD_OP_QUERY_FLOW_COUNTER            = 0x93b,
 	MLX5_CMD_OP_MODIFY_FLOW_TABLE             = 0x93c,
-	MLX5_CMD_OP_ALLOC_ENCAP_HEADER            = 0x93d,
-	MLX5_CMD_OP_DEALLOC_ENCAP_HEADER          = 0x93e,
+	MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT = 0x93d,
+	MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT = 0x93e,
 	MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT   = 0x940,
 	MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
 	MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT   = 0x942,
@@ -336,7 +336,7 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         modify_root[0x1];
 	u8         identified_miss_table_mode[0x1];
 	u8         flow_table_modify[0x1];
-	u8         encap[0x1];
+	u8         reformat[0x1];
 	u8         decap[0x1];
 	u8         reserved_at_9[0x1];
 	u8         pop_vlan[0x1];
@@ -599,7 +599,7 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         vxlan_encap_decap[0x1];
 	u8         nvgre_encap_decap[0x1];
 	u8         reserved_at_22[0x9];
-	u8         log_max_encap_headers[0x5];
+	u8         log_max_packet_reformat_context[0x5];
 	u8         reserved_2b[0x6];
 	u8         max_encap_header_size[0xa];
 
@@ -2394,7 +2394,7 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_DROP      = 0x2,
 	MLX5_FLOW_CONTEXT_ACTION_FWD_DEST  = 0x4,
 	MLX5_FLOW_CONTEXT_ACTION_COUNT     = 0x8,
-	MLX5_FLOW_CONTEXT_ACTION_ENCAP     = 0x10,
+	MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT = 0x10,
 	MLX5_FLOW_CONTEXT_ACTION_DECAP     = 0x20,
 	MLX5_FLOW_CONTEXT_ACTION_MOD_HDR   = 0x40,
 	MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  = 0x80,
@@ -2427,7 +2427,7 @@ struct mlx5_ifc_flow_context_bits {
 	u8         reserved_at_a0[0x8];
 	u8         flow_counter_list_size[0x18];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         modify_header_id[0x20];
 
@@ -4802,19 +4802,19 @@ struct mlx5_ifc_query_eq_in_bits {
 	u8         reserved_at_60[0x20];
 };
 
-struct mlx5_ifc_encap_header_in_bits {
+struct mlx5_ifc_packet_reformat_context_in_bits {
 	u8         reserved_at_0[0x5];
-	u8         header_type[0x3];
+	u8         reformat_type[0x3];
 	u8         reserved_at_8[0xe];
-	u8         encap_header_size[0xa];
+	u8         reformat_data_size[0xa];
 
 	u8         reserved_at_20[0x10];
-	u8         encap_header[2][0x8];
+	u8         reformat_data[2][0x8];
 
-	u8         more_encap_header[0][0x8];
+	u8         more_reformat_data[0][0x8];
 };
 
-struct mlx5_ifc_query_encap_header_out_bits {
+struct mlx5_ifc_query_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
@@ -4822,38 +4822,38 @@ struct mlx5_ifc_query_encap_header_out_bits {
 
 	u8         reserved_at_40[0xa0];
 
-	struct mlx5_ifc_encap_header_in_bits encap_header[0];
+	struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0];
 };
 
-struct mlx5_ifc_query_encap_header_in_bits {
+struct mlx5_ifc_query_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_at_60[0xa0];
 };
 
-struct mlx5_ifc_alloc_encap_header_out_bits {
+struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
 	u8         syndrome[0x20];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_at_60[0x20];
 };
 
 enum {
-	MLX5_HEADER_TYPE_VXLAN = 0x0,
-	MLX5_HEADER_TYPE_NVGRE = 0x1,
+	MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
+	MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
 };
 
-struct mlx5_ifc_alloc_encap_header_in_bits {
+struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
@@ -4862,10 +4862,10 @@ struct mlx5_ifc_alloc_encap_header_in_bits {
 
 	u8         reserved_at_40[0xa0];
 
-	struct mlx5_ifc_encap_header_in_bits encap_header;
+	struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context;
 };
 
-struct mlx5_ifc_dealloc_encap_header_out_bits {
+struct mlx5_ifc_dealloc_packet_reformat_context_out_bits {
 	u8         status[0x8];
 	u8         reserved_at_8[0x18];
 
@@ -4874,14 +4874,14 @@ struct mlx5_ifc_dealloc_encap_header_out_bits {
 	u8         reserved_at_40[0x40];
 };
 
-struct mlx5_ifc_dealloc_encap_header_in_bits {
+struct mlx5_ifc_dealloc_packet_reformat_context_in_bits {
 	u8         opcode[0x10];
 	u8         reserved_at_10[0x10];
 
 	u8         reserved_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         encap_id[0x20];
+	u8         packet_reformat_id[0x20];
 
 	u8         reserved_60[0x20];
 };
@@ -6983,7 +6983,7 @@ struct mlx5_ifc_create_flow_table_out_bits {
 };
 
 struct mlx5_ifc_flow_table_context_bits {
-	u8         encap_en[0x1];
+	u8         reformat_en[0x1];
 	u8         decap_en[0x1];
 	u8         reserved_at_2[0x2];
 	u8         table_miss_action[0x4];
-- 
cgit v1.2.3


From bea4e1f6c6c5744d467ebf8b0699f5e391835130 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:47 +0300
Subject: net/mlx5: Expose new packet reformat capabilities

Expose new abilities when creating a packet reformat context.

The new types which can be created are:
MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: Ability to create generic encap
operation to be done by the HW.

MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2: Ability to create generic decap
operation where the inner packet doesn't contain L2.

MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: Ability to create generic encap
operation to be done by the HW. The L2 of the original packet
is dropped.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c79eaae28e59..3a4a2e0567e9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -344,8 +344,12 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
 	u8         reserved_at_c[0x1];
 	u8         pop_vlan_2[0x1];
 	u8         push_vlan_2[0x1];
-	u8         reserved_at_f[0x11];
-
+	u8	   reformat_and_vlan_action[0x1];
+	u8	   reserved_at_10[0x2];
+	u8	   reformat_l3_tunnel_to_l2[0x1];
+	u8	   reformat_l2_to_l3_tunnel[0x1];
+	u8	   reformat_and_modify_action[0x1];
+	u8         reserved_at_14[0xb];
 	u8         reserved_at_20[0x2];
 	u8         log_max_ft_size[0x6];
 	u8         log_max_modify_header_context[0x8];
@@ -554,7 +558,13 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 	u8         nic_rx_multi_path_tirs[0x1];
 	u8         nic_rx_multi_path_tirs_fts[0x1];
 	u8         allow_sniffer_and_nic_rx_shared_tir[0x1];
-	u8         reserved_at_3[0x1fd];
+	u8	   reserved_at_3[0x1d];
+	u8	   encap_general_header[0x1];
+	u8	   reserved_at_21[0xa];
+	u8	   log_max_packet_reformat_context[0x5];
+	u8	   reserved_at_30[0x6];
+	u8	   max_encap_header_size[0xa];
+	u8	   reserved_at_40[0x1c0];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive;
 
@@ -4851,6 +4861,9 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
 enum {
 	MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
 	MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
+	MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
+	MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
+	MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
 };
 
 struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
-- 
cgit v1.2.3


From 50acec06f3928fc29647aecf1270e54cae583afb Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 28 Aug 2018 14:18:49 +0300
Subject: net/mlx5: Export packet reformat alloc/dealloc functions

This will allow for the RDMA side to allocate packet reformat context.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c    | 2 ++
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 8 --------
 include/linux/mlx5/fs.h                             | 9 +++++++++
 3 files changed, 11 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index cc9537891e39..dc8d7f6b52c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -651,6 +651,7 @@ int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
 	kfree(in);
 	return err;
 }
+EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
 
 void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
 				  u32 packet_reformat_id)
@@ -666,6 +667,7 @@ void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
 
 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
+EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
 
 int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 			     u8 namespace, u8 num_actions,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 66a5dd5a6cbe..61a014e3f688 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -170,14 +170,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
 void mlx5_dev_list_lock(void);
 void mlx5_dev_list_unlock(void);
 int mlx5_dev_list_trylock(void);
-int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
-			       int reformat_type,
-			       size_t size,
-			       void *reformat_data,
-			       enum mlx5_flow_namespace_type namespace,
-			       u32 *packet_reformat_id);
-void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
-				  u32 packet_reformat_id);
 
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
 
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 37d0c08d0966..b1c026f1c8ba 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -203,4 +203,13 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
 void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
 				u32 modify_header_id);
 
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+			       int reformat_type,
+			       size_t size,
+			       void *reformat_data,
+			       enum mlx5_flow_namespace_type namespace,
+			       u32 *packet_reformat_id);
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+				  u32 packet_reformat_id);
+
 #endif
-- 
cgit v1.2.3


From 03512ceb60ae4be71ed3129dabb8625224c8ec40 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Fri, 31 Aug 2018 11:31:09 +0300
Subject: ieee80211: remove redundant leading zeroes

The defines of IEEE80211_HE_OPERATION_VHT_OPER_INFO and
IEEE80211_HE_OPERATION_MULTI_BSSID_AP have leading zeroes
that makes the number look like it is bigger than 32 bit.
This is misleading, remove it.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 9c03a7d5e400..17ea51d088ae 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1963,8 +1963,8 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
 #define IEEE80211_HE_OPERATION_TWT_REQUIRED			0x00000200
 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK		0x000ffc00
 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET		10
-#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR		0x000100000
-#define IEEE80211_HE_OPERATION_VHT_OPER_INFO			0x000200000
+#define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR		0x00100000
+#define IEEE80211_HE_OPERATION_VHT_OPER_INFO			0x00200000
 #define IEEE80211_HE_OPERATION_MULTI_BSSID_AP			0x10000000
 #define IEEE80211_HE_OPERATION_TX_BSSID_INDICATOR		0x20000000
 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED		0x40000000
-- 
cgit v1.2.3


From b0aa75f0b1b2e6bc77128fab36c8ed87e84917cc Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 31 Aug 2018 11:31:16 +0300
Subject: ieee80211: add new VHT capability fields/parsing

IEEE 802.11-2016 extended the VHT capability fields to allow
indicating the number of spatial streams depending on the
actually used bandwidth, add support for decoding this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  35 ++++++++++++++-
 net/wireless/util.c       | 109 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 17ea51d088ae..280600a10111 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1460,13 +1460,16 @@ struct ieee80211_ht_operation {
  *	STA can receive. Rate expressed in units of 1 Mbps.
  *	If this field is 0 this value should not be used to
  *	consider the highest RX data rate supported.
- *	The top 3 bits of this field are reserved.
+ *	The top 3 bits of this field indicate the Maximum NSTS,total
+ *	(a beamformee capability.)
  * @tx_mcs_map: TX MCS map 2 bits for each stream, total 8 streams
  * @tx_highest: Indicates highest long GI VHT PPDU data rate
  *	STA can transmit. Rate expressed in units of 1 Mbps.
  *	If this field is 0 this value should not be used to
  *	consider the highest TX data rate supported.
- *	The top 3 bits of this field are reserved.
+ *	The top 2 bits of this field are reserved, the
+ *	3rd bit from the top indiciates VHT Extended NSS BW
+ *	Capability.
  */
 struct ieee80211_vht_mcs_info {
 	__le16 rx_mcs_map;
@@ -1475,6 +1478,13 @@ struct ieee80211_vht_mcs_info {
 	__le16 tx_highest;
 } __packed;
 
+/* for rx_highest */
+#define IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT	13
+#define IEEE80211_VHT_MAX_NSTS_TOTAL_MASK	(7 << IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT)
+
+/* for tx_highest */
+#define IEEE80211_VHT_EXT_NSS_BW_CAPABLE	(1 << 13)
+
 /**
  * enum ieee80211_vht_mcs_support - VHT MCS support definitions
  * @IEEE80211_VHT_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the
@@ -1650,6 +1660,7 @@ struct ieee80211_mu_edca_param_set {
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ		0x00000004
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ	0x00000008
 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK			0x0000000C
+#define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_SHIFT			2
 #define IEEE80211_VHT_CAP_RXLDPC				0x00000010
 #define IEEE80211_VHT_CAP_SHORT_GI_80				0x00000020
 #define IEEE80211_VHT_CAP_SHORT_GI_160				0x00000040
@@ -1678,6 +1689,26 @@ struct ieee80211_mu_edca_param_set {
 #define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB	0x0c000000
 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN			0x10000000
 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN			0x20000000
+#define IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT			30
+#define IEEE80211_VHT_CAP_EXT_NSS_BW_MASK			0xc0000000
+
+/**
+ * ieee80211_get_vht_max_nss - return max NSS for a given bandwidth/MCS
+ * @cap: VHT capabilities of the peer
+ * @bw: bandwidth to use
+ * @mcs: MCS index to use
+ * @ext_nss_bw_capable: indicates whether or not the local transmitter
+ *	(rate scaling algorithm) can deal with the new logic
+ *	(dot11VHTExtendedNSSBWCapable)
+ *
+ * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can
+ * vary for a given BW/MCS. This function parses the data.
+ *
+ * Note: This function is exported by cfg80211.
+ */
+int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
+			      enum ieee80211_vht_chanwidth bw,
+			      int mcs, bool ext_nss_bw_capable);
 
 /* 802.11ax HE MAC capabilities */
 #define IEEE80211_HE_MAC_CAP0_HTC_HE				0x01
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 4293f980e9c4..ef14d80ca03e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,17 +5,20 @@
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2017	Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
 #include <linux/etherdevice.h>
 #include <linux/slab.h>
+#include <linux/ieee80211.h>
 #include <net/cfg80211.h>
 #include <net/ip.h>
 #include <net/dsfield.h>
 #include <linux/if_vlan.h>
 #include <linux/mpls.h>
 #include <linux/gcd.h>
+#include <linux/bitfield.h>
 #include "core.h"
 #include "rdev-ops.h"
 
@@ -1938,3 +1941,109 @@ void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr)
 	netif_rx_ni(skb);
 }
 EXPORT_SYMBOL(cfg80211_send_layer2_update);
+
+int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
+			      enum ieee80211_vht_chanwidth bw,
+			      int mcs, bool ext_nss_bw_capable)
+{
+	u16 map = le16_to_cpu(cap->supp_mcs.rx_mcs_map);
+	int max_vht_nss = 0;
+	int ext_nss_bw;
+	int supp_width;
+	int i, mcs_encoding;
+
+	if (map == 0xffff)
+		return 0;
+
+	if (WARN_ON(mcs > 9))
+		return 0;
+	if (mcs <= 7)
+		mcs_encoding = 0;
+	else if (mcs == 8)
+		mcs_encoding = 1;
+	else
+		mcs_encoding = 2;
+
+	/* find max_vht_nss for the given MCS */
+	for (i = 7; i >= 0; i--) {
+		int supp = (map >> (2 * i)) & 3;
+
+		if (supp == 3)
+			continue;
+
+		if (supp >= mcs_encoding) {
+			max_vht_nss = i;
+			break;
+		}
+	}
+
+	if (!(cap->supp_mcs.tx_mcs_map &
+			cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE)))
+		return max_vht_nss;
+
+	ext_nss_bw = le32_get_bits(cap->vht_cap_info,
+				   IEEE80211_VHT_CAP_EXT_NSS_BW_MASK);
+	supp_width = le32_get_bits(cap->vht_cap_info,
+				   IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK);
+
+	/* if not capable, treat ext_nss_bw as 0 */
+	if (!ext_nss_bw_capable)
+		ext_nss_bw = 0;
+
+	/* This is invalid */
+	if (supp_width == 3)
+		return 0;
+
+	/* This is an invalid combination so pretend nothing is supported */
+	if (supp_width == 2 && (ext_nss_bw == 1 || ext_nss_bw == 2))
+		return 0;
+
+	/*
+	 * Cover all the special cases according to IEEE 802.11-2016
+	 * Table 9-250. All other cases are either factor of 1 or not
+	 * valid/supported.
+	 */
+	switch (bw) {
+	case IEEE80211_VHT_CHANWIDTH_USE_HT:
+	case IEEE80211_VHT_CHANWIDTH_80MHZ:
+		if ((supp_width == 1 || supp_width == 2) &&
+		    ext_nss_bw == 3)
+			return 2 * max_vht_nss;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_160MHZ:
+		if (supp_width == 0 &&
+		    (ext_nss_bw == 1 || ext_nss_bw == 2))
+			return DIV_ROUND_UP(max_vht_nss, 2);
+		if (supp_width == 0 &&
+		    ext_nss_bw == 3)
+			return DIV_ROUND_UP(3 * max_vht_nss, 4);
+		if (supp_width == 1 &&
+		    ext_nss_bw == 3)
+			return 2 * max_vht_nss;
+		break;
+	case IEEE80211_VHT_CHANWIDTH_80P80MHZ:
+		if (supp_width == 0 &&
+		    (ext_nss_bw == 1 || ext_nss_bw == 2))
+			return 0; /* not possible */
+		if (supp_width == 0 &&
+		    ext_nss_bw == 2)
+			return DIV_ROUND_UP(max_vht_nss, 2);
+		if (supp_width == 0 &&
+		    ext_nss_bw == 3)
+			return DIV_ROUND_UP(3 * max_vht_nss, 4);
+		if (supp_width == 1 &&
+		    ext_nss_bw == 0)
+			return 0; /* not possible */
+		if (supp_width == 1 &&
+		    ext_nss_bw == 1)
+			return DIV_ROUND_UP(max_vht_nss, 2);
+		if (supp_width == 1 &&
+		    ext_nss_bw == 2)
+			return DIV_ROUND_UP(3 * max_vht_nss, 4);
+		break;
+	}
+
+	/* not covered or invalid combination received */
+	return max_vht_nss;
+}
+EXPORT_SYMBOL(ieee80211_get_vht_max_nss);
-- 
cgit v1.2.3


From add7453ad62f05c8f1a48675bb4dfed52e6ac878 Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Wed, 5 Sep 2018 08:06:08 +0300
Subject: wireless: align to draft 11ax D3.0

Align to new 11ax draft D3.0.  Change/add new MAC and PHY capabilities
and update drivers' 11ax capabilities and mac80211's debugfs
accordingly.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 51 ++++++++++----
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c  |  4 --
 drivers/net/wireless/mac80211_hwsim.c              | 18 ++---
 include/linux/ieee80211.h                          | 72 ++++++++++++--------
 net/mac80211/debugfs_sta.c                         | 77 +++++++++++++++-------
 5 files changed, 146 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 73969dbeb5c5..27db4a3ba1f8 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -476,30 +476,40 @@ static struct ieee80211_sband_iftype_data iwl_he_capa = {
 		.has_he = true,
 		.he_cap_elem = {
 			.mac_cap_info[0] =
-				IEEE80211_HE_MAC_CAP0_HTC_HE,
+				IEEE80211_HE_MAC_CAP0_HTC_HE |
+				IEEE80211_HE_MAC_CAP0_TWT_REQ,
 			.mac_cap_info[1] =
 				IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
-				IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8,
+				IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
 			.mac_cap_info[2] =
 				IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP |
+				IEEE80211_HE_MAC_CAP2_MU_CASCADING |
 				IEEE80211_HE_MAC_CAP2_ACK_EN,
 			.mac_cap_info[3] =
-				IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU |
-				IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2,
-			.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
+				IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
+				IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
+			.mac_cap_info[4] =
+				IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU |
+				IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39,
+			.mac_cap_info[5] =
+				IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40 |
+				IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41 |
+				IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU,
 			.phy_cap_info[0] =
-				IEEE80211_HE_PHY_CAP0_DUAL_BAND |
 				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G |
 				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
 				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G,
 			.phy_cap_info[1] =
+				IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
 				IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
 				IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
-				IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS,
+				IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS,
 			.phy_cap_info[2] =
 				IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
 				IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
-				IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ,
+				IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ |
+				IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO |
+				IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO,
 			.phy_cap_info[3] =
 				IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK |
 				IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 |
@@ -511,18 +521,31 @@ static struct ieee80211_sband_iftype_data iwl_he_capa = {
 				IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8,
 			.phy_cap_info[5] =
 				IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 |
-				IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2,
+				IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2 |
+				IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK |
+				IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK,
 			.phy_cap_info[6] =
+				IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU |
+				IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU |
+				IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB |
+				IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB |
+				IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB |
+				IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO |
 				IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT,
 			.phy_cap_info[7] =
 				IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_AR |
 				IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI |
-				IEEE80211_HE_PHY_CAP7_MAX_NC_7,
+				IEEE80211_HE_PHY_CAP7_MAX_NC_1,
 			.phy_cap_info[8] =
 				IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI |
 				IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G |
 				IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU |
-				IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU,
+				IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU |
+				IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ,
+			.phy_cap_info[9] =
+				IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK |
+				IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB |
+				IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB,
 		},
 		/*
 		 * Set default Tx/Rx HE MCS NSS Support field. Indicate support
@@ -559,9 +582,11 @@ static void iwl_init_he_hw_capab(struct ieee80211_supported_band *sband,
 	/* If not 2x2, we need to indicate 1x1 in the Midamble RX Max NSTS */
 	if ((tx_chains & rx_chains) != ANT_AB) {
 		iwl_he_capa.he_cap.he_cap_elem.phy_cap_info[1] &=
-			~IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS;
+			~IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS;
 		iwl_he_capa.he_cap.he_cap_elem.phy_cap_info[2] &=
-			~IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS;
+			~IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS;
+		iwl_he_capa.he_cap.he_cap_elem.phy_cap_info[7] &=
+			~IEEE80211_HE_PHY_CAP7_MAX_NC_MASK;
 	}
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index b15b0d84bb7e..d46f3fbea46e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1978,10 +1978,6 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm,
 			sta_ctxt_cmd.htc_flags |=
 				cpu_to_le32(IWL_HE_HTC_LINK_ADAP_BOTH);
 	}
-	if (sta->he_cap.he_cap_elem.mac_cap_info[2] &
-	    IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED)
-		sta_ctxt_cmd.htc_flags |=
-			cpu_to_le32(IWL_HE_HTC_UL_MU_RESP_SCHED);
 	if (sta->he_cap.he_cap_elem.mac_cap_info[2] & IEEE80211_HE_MAC_CAP2_BSR)
 		sta_ctxt_cmd.htc_flags |= cpu_to_le32(IWL_HE_HTC_BSR_SUPP);
 	if (sta->he_cap.he_cap_elem.mac_cap_info[3] &
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 1068757ec42e..f3863101af78 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2008, Jouni Malinen <j@w1.fi>
  * Copyright (c) 2011, Javier Lopez <jlopex@gmail.com>
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
+ * Copyright (C) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -2529,23 +2530,20 @@ static const struct ieee80211_sband_iftype_data he_capa_2ghz = {
 				IEEE80211_HE_MAC_CAP0_HTC_HE,
 			.mac_cap_info[1] =
 				IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
-				IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8,
+				IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
 			.mac_cap_info[2] =
 				IEEE80211_HE_MAC_CAP2_BSR |
 				IEEE80211_HE_MAC_CAP2_MU_CASCADING |
 				IEEE80211_HE_MAC_CAP2_ACK_EN,
 			.mac_cap_info[3] =
-				IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU |
 				IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-				IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2,
+				IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
 			.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
-			.phy_cap_info[0] =
-				IEEE80211_HE_PHY_CAP0_DUAL_BAND,
 			.phy_cap_info[1] =
 				IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
 				IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
 				IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
-				IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS,
+				IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS,
 			.phy_cap_info[2] =
 				IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
 				IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
@@ -2579,18 +2577,16 @@ static const struct ieee80211_sband_iftype_data he_capa_5ghz = {
 				IEEE80211_HE_MAC_CAP0_HTC_HE,
 			.mac_cap_info[1] =
 				IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US |
-				IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8,
+				IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8,
 			.mac_cap_info[2] =
 				IEEE80211_HE_MAC_CAP2_BSR |
 				IEEE80211_HE_MAC_CAP2_MU_CASCADING |
 				IEEE80211_HE_MAC_CAP2_ACK_EN,
 			.mac_cap_info[3] =
-				IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU |
 				IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
-				IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2,
+				IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2,
 			.mac_cap_info[4] = IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU,
 			.phy_cap_info[0] =
-				IEEE80211_HE_PHY_CAP0_DUAL_BAND |
 				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
 				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
 				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G,
@@ -2598,7 +2594,7 @@ static const struct ieee80211_sband_iftype_data he_capa_5ghz = {
 				IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK |
 				IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A |
 				IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD |
-				IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS,
+				IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS,
 			.phy_cap_info[2] =
 				IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
 				IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 280600a10111..c4809ad8ab46 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1555,11 +1555,11 @@ struct ieee80211_vht_operation {
  * struct ieee80211_he_cap_elem - HE capabilities element
  *
  * This structure is the "HE capabilities element" fixed fields as
- * described in P802.11ax_D2.0 section 9.4.2.237.2 and 9.4.2.237.3
+ * described in P802.11ax_D3.0 section 9.4.2.237.2 and 9.4.2.237.3
  */
 struct ieee80211_he_cap_elem {
-	u8 mac_cap_info[5];
-	u8 phy_cap_info[9];
+	u8 mac_cap_info[6];
+	u8 phy_cap_info[11];
 } __packed;
 
 #define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN	5
@@ -1738,15 +1738,15 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US		0x04
 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US		0x08
 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK		0x0c
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_1		0x00
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_2		0x10
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_3		0x20
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_4		0x30
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_5		0x40
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_6		0x50
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_7		0x60
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_8		0x70
-#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_QOS_MASK		0x70
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_1		0x00
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_2		0x10
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_3		0x20
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_4		0x30
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_5		0x40
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_6		0x50
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_7		0x60
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8		0x70
+#define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_MASK		0x70
 
 /* Link adaptation is split between byte HE_MAC_CAP1 and
  * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE
@@ -1760,14 +1760,13 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 
 #define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION			0x01
 #define IEEE80211_HE_MAC_CAP2_ALL_ACK				0x02
-#define IEEE80211_HE_MAC_CAP2_UL_MU_RESP_SCHED			0x04
+#define IEEE80211_HE_MAC_CAP2_TRS				0x04
 #define IEEE80211_HE_MAC_CAP2_BSR				0x08
 #define IEEE80211_HE_MAC_CAP2_BCAST_TWT				0x10
 #define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP			0x20
 #define IEEE80211_HE_MAC_CAP2_MU_CASCADING			0x40
 #define IEEE80211_HE_MAC_CAP2_ACK_EN				0x80
 
-#define IEEE80211_HE_MAC_CAP3_GRP_ADDR_MULTI_STA_BA_DL_MU	0x01
 #define IEEE80211_HE_MAC_CAP3_OMI_CONTROL			0x02
 #define IEEE80211_HE_MAC_CAP3_OFDMA_RA				0x04
 
@@ -1775,25 +1774,34 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
  * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the
  * same field in the HE capabilities.
  */
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT	0x00
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1		0x08
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2		0x10
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED	0x18
-#define IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK		0x18
-#define IEEE80211_HE_MAC_CAP3_A_AMSDU_FRAG			0x20
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT	0x00
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1		0x08
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2		0x10
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED	0x18
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK		0x18
+#define IEEE80211_HE_MAC_CAP3_AMSDU_FRAG			0x20
 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED			0x40
 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS		0x80
 
 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG		0x01
 #define IEEE80211_HE_MAC_CAP4_QTP				0x02
 #define IEEE80211_HE_MAC_CAP4_BQR				0x04
-#define IEEE80211_HE_MAC_CAP4_SR_RESP				0x08
+#define IEEE80211_HE_MAC_CAP4_SRP_RESP				0x08
 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP			0x10
 #define IEEE80211_HE_MAC_CAP4_OPS				0x20
 #define IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU			0x40
+/* Multi TID agg TX is split between byte #4 and #5
+ * The value is a combination of B39,B40,B41
+ */
+#define IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39		0x80
+
+#define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40		0x01
+#define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41		0x02
+#define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECVITE_TRANSMISSION	0x04
+#define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU			0x08
+#define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX		0x10
 
 /* 802.11ax HE PHY capabilities */
-#define IEEE80211_HE_PHY_CAP0_DUAL_BAND					0x01
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G		0x02
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G	0x04
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G		0x08
@@ -1810,10 +1818,10 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A				0x10
 #define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD			0x20
 #define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US		0x40
-/* Midamble RX Max NSTS is split between byte #2 and byte #3 */
-#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_MAX_NSTS			0x80
+/* Midamble RX/TX Max NSTS is split between byte #2 and byte #3 */
+#define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS			0x80
 
-#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_MAX_NSTS			0x01
+#define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS			0x01
 #define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US			0x02
 #define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ			0x04
 #define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ			0x08
@@ -1914,7 +1922,19 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU			0x04
 #define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU			0x08
 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI		0x10
-#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_2X_AND_1XLTF			0x20
+#define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_TX_2X_AND_1XLTF		0x20
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_20MHZ				0x00
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_40MHZ				0x40
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_80MHZ				0x80
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ		0xc0
+#define IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_MASK				0xc0
+
+#define IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM		0x01
+#define IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK		0x02
+#define IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU		0x04
+#define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU		0x08
+#define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB	0x10
+#define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB	0x20
 
 /* 802.11ax HE TX/RX MCS NSS Support  */
 #define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS			(3)
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 95124978947f..af5185a836e5 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -522,8 +522,8 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 
 	cap = hec->he_cap_elem.mac_cap_info;
 	p += scnprintf(p, buf_sz + buf - p,
-		       "MAC-CAP: %#.2x %#.2x %#.2x %#.2x %#.2x\n",
-		       cap[0], cap[1], cap[2], cap[3], cap[4]);
+		       "MAC-CAP: %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x\n",
+		       cap[0], cap[1], cap[2], cap[3], cap[4], cap[5]);
 
 #define PRINT(fmt, ...)							\
 	p += scnprintf(p, buf_sz + buf - p, "\t\t" fmt "\n",		\
@@ -563,7 +563,8 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 			    "MIN-FRAG-SIZE-%d", UNLIMITED, "UNLIMITED");
 	PFLAG_RANGE_DEFAULT(MAC, 1, TF_MAC_PAD_DUR, 0, 8, 0,
 			    "TF-MAC-PAD-DUR-%dUS", MASK, "UNKNOWN");
-	PFLAG_RANGE(MAC, 1, MULTI_TID_AGG_QOS, 0, 1, 1, "MULTI-TID-AGG-QOS-%d");
+	PFLAG_RANGE(MAC, 1, MULTI_TID_AGG_RX_QOS, 0, 1, 1,
+		    "MULTI-TID-AGG-RX-QOS-%d");
 
 	if (cap[0] & IEEE80211_HE_MAC_CAP0_HTC_HE) {
 		switch (((cap[2] << 1) | (cap[1] >> 7)) & 0x3) {
@@ -583,52 +584,55 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 	}
 
 	PFLAG(MAC, 2, ALL_ACK, "ALL-ACK");
-	PFLAG(MAC, 2, UL_MU_RESP_SCHED, "UL-MU-RESP-SCHED");
+	PFLAG(MAC, 2, TRS, "TRS");
 	PFLAG(MAC, 2, BSR, "BSR");
 	PFLAG(MAC, 2, BCAST_TWT, "BCAST-TWT");
 	PFLAG(MAC, 2, 32BIT_BA_BITMAP, "32BIT-BA-BITMAP");
 	PFLAG(MAC, 2, MU_CASCADING, "MU-CASCADING");
 	PFLAG(MAC, 2, ACK_EN, "ACK-EN");
 
-	PFLAG(MAC, 3, GRP_ADDR_MULTI_STA_BA_DL_MU,
-	      "GRP-ADDR-MULTI-STA-BA-DL-MU");
 	PFLAG(MAC, 3, OMI_CONTROL, "OMI-CONTROL");
 	PFLAG(MAC, 3, OFDMA_RA, "OFDMA-RA");
 
-	switch (cap[3] & IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_MASK) {
-	case IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_USE_VHT:
-		PRINT("MAX-A-AMPDU-LEN-EXP-USE-VHT");
+	switch (cap[3] & IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK) {
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_USE_VHT:
+		PRINT("MAX-AMPDU-LEN-EXP-USE-VHT");
 		break;
-	case IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_1:
-		PRINT("MAX-A-AMPDU-LEN-EXP-VHT-1");
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_1:
+		PRINT("MAX-AMPDU-LEN-EXP-VHT-1");
 		break;
-	case IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_VHT_2:
-		PRINT("MAX-A-AMPDU-LEN-EXP-VHT-2");
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_VHT_2:
+		PRINT("MAX-AMPDU-LEN-EXP-VHT-2");
 		break;
-	case IEEE80211_HE_MAC_CAP3_MAX_A_AMPDU_LEN_EXP_RESERVED:
-		PRINT("MAX-A-AMPDU-LEN-EXP-RESERVED");
+	case IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED:
+		PRINT("MAX-AMPDU-LEN-EXP-RESERVED");
 		break;
 	}
 
-	PFLAG(MAC, 3, A_AMSDU_FRAG, "A-AMSDU-FRAG");
+	PFLAG(MAC, 3, AMSDU_FRAG, "AMSDU-FRAG");
 	PFLAG(MAC, 3, FLEX_TWT_SCHED, "FLEX-TWT-SCHED");
 	PFLAG(MAC, 3, RX_CTRL_FRAME_TO_MULTIBSS, "RX-CTRL-FRAME-TO-MULTIBSS");
 
 	PFLAG(MAC, 4, BSRP_BQRP_A_MPDU_AGG, "BSRP-BQRP-A-MPDU-AGG");
 	PFLAG(MAC, 4, QTP, "QTP");
 	PFLAG(MAC, 4, BQR, "BQR");
-	PFLAG(MAC, 4, SR_RESP, "SR-RESP");
+	PFLAG(MAC, 4, SRP_RESP, "SRP-RESP");
 	PFLAG(MAC, 4, NDP_FB_REP, "NDP-FB-REP");
 	PFLAG(MAC, 4, OPS, "OPS");
 	PFLAG(MAC, 4, AMDSU_IN_AMPDU, "AMSDU-IN-AMPDU");
 
+	PRINT("MULTI-TID-AGG-TX-QOS-%d", ((cap[5] << 1) | (cap[4] >> 7)) & 0x7);
+
+	PFLAG(MAC, 5, SUBCHAN_SELECVITE_TRANSMISSION,
+	      "SUBCHAN-SELECVITE-TRANSMISSION");
+	PFLAG(MAC, 5, UL_2x996_TONE_RU, "UL-2x996-TONE-RU");
+	PFLAG(MAC, 5, OM_CTRL_UL_MU_DATA_DIS_RX, "OM-CTRL-UL-MU-DATA-DIS-RX");
+
 	cap = hec->he_cap_elem.phy_cap_info;
 	p += scnprintf(p, buf_sz + buf - p,
-		       "PHY CAP: %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x\n",
+		       "PHY CAP: %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x %#.2x\n",
 		       cap[0], cap[1], cap[2], cap[3], cap[4], cap[5], cap[6],
-		       cap[7], cap[8]);
-
-	PFLAG(PHY, 0, DUAL_BAND, "DUAL-BAND");
+		       cap[7], cap[8], cap[9], cap[10]);
 
 	PFLAG(PHY, 0, CHANNEL_WIDTH_SET_40MHZ_IN_2G,
 	      "CHANNEL-WIDTH-SET-40MHZ-IN-2G");
@@ -754,7 +758,36 @@ static ssize_t sta_he_capa_read(struct file *file, char __user *userbuf,
 	PFLAG(PHY, 8, 80MHZ_IN_160MHZ_HE_PPDU, "80MHZ-IN-160MHZ-HE-PPDU");
 	PFLAG(PHY, 8, HE_ER_SU_1XLTF_AND_08_US_GI,
 	      "HE-ER-SU-1XLTF-AND-08-US-GI");
-	PFLAG(PHY, 8, MIDAMBLE_RX_2X_AND_1XLTF, "MIDAMBLE-RX-2X-AND-1XLTF");
+	PFLAG(PHY, 8, MIDAMBLE_RX_TX_2X_AND_1XLTF,
+	      "MIDAMBLE-RX-TX-2X-AND-1XLTF");
+
+	switch (cap[8] & IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_MASK) {
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_20MHZ:
+		PRINT("DDCM-MAX-BW-20MHZ");
+		break;
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_40MHZ:
+		PRINT("DCM-MAX-BW-40MHZ");
+		break;
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_80MHZ:
+		PRINT("DCM-MAX-BW-80MHZ");
+		break;
+	case IEEE80211_HE_PHY_CAP8_DCM_MAX_BW_160_OR_80P80_MHZ:
+		PRINT("DCM-MAX-BW-160-OR-80P80-MHZ");
+		break;
+	}
+
+	PFLAG(PHY, 9, LONGER_THAN_16_SIGB_OFDM_SYM,
+	      "LONGER-THAN-16-SIGB-OFDM-SYM");
+	PFLAG(PHY, 9, NON_TRIGGERED_CQI_FEEDBACK,
+	      "NON-TRIGGERED-CQI-FEEDBACK");
+	PFLAG(PHY, 9, TX_1024_QAM_LESS_THAN_242_TONE_RU,
+	      "TX-1024-QAM-LESS-THAN-242-TONE-RU");
+	PFLAG(PHY, 9, RX_1024_QAM_LESS_THAN_242_TONE_RU,
+	      "RX-1024-QAM-LESS-THAN-242-TONE-RU");
+	PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB,
+	      "RX-FULL-BW-SU-USING-MU-WITH-COMP-SIGB");
+	PFLAG(PHY, 9, RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB,
+	      "RX-FULL-BW-SU-USING-MU-WITH-NON-COMP-SIGB");
 
 #undef PFLAG_RANGE_DEFAULT
 #undef PFLAG_RANGE
-- 
cgit v1.2.3


From 1ff2e1a44e02d4bdbb9be67c7d9acc240a67141f Mon Sep 17 00:00:00 2001
From: Harry Cutts <hcutts@chromium.org>
Date: Thu, 30 Aug 2018 14:56:19 -0700
Subject: HID: input: Create a utility class for counting scroll events

To avoid code duplication, this class counts high-resolution scroll
movements and emits the legacy low-resolution events when appropriate.
Drivers should be able to create one instance for each scroll wheel that
they need to handle.

Signed-off-by: Harry Cutts <hcutts@chromium.org>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/hid.h     | 28 ++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 4e94ea3e280a..6e84e7b9afcb 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1826,3 +1826,48 @@ void hidinput_disconnect(struct hid_device *hid)
 }
 EXPORT_SYMBOL_GPL(hidinput_disconnect);
 
+/**
+ * hid_scroll_counter_handle_scroll() - Send high- and low-resolution scroll
+ *                                      events given a high-resolution wheel
+ *                                      movement.
+ * @counter: a hid_scroll_counter struct describing the wheel.
+ * @hi_res_value: the movement of the wheel, in the mouse's high-resolution
+ *                units.
+ *
+ * Given a high-resolution movement, this function converts the movement into
+ * microns and emits high-resolution scroll events for the input device. It also
+ * uses the multiplier from &struct hid_scroll_counter to emit low-resolution
+ * scroll events when appropriate for backwards-compatibility with userspace
+ * input libraries.
+ */
+void hid_scroll_counter_handle_scroll(struct hid_scroll_counter *counter,
+				      int hi_res_value)
+{
+	int low_res_scroll_amount;
+	/* Some wheels will rest 7/8ths of a notch from the previous notch
+	 * after slow movement, so we want the threshold for low-res events to
+	 * be in the middle of the notches (e.g. after 4/8ths) as opposed to on
+	 * the notches themselves (8/8ths).
+	 */
+	int threshold = counter->resolution_multiplier / 2;
+
+	input_report_rel(counter->dev, REL_WHEEL_HI_RES,
+			 hi_res_value * counter->microns_per_hi_res_unit);
+
+	counter->remainder += hi_res_value;
+	if (abs(counter->remainder) >= threshold) {
+		/* Add (or subtract) 1 because we want to trigger when the wheel
+		 * is half-way to the next notch (i.e. scroll 1 notch after a
+		 * 1/2 notch movement, 2 notches after a 1 1/2 notch movement,
+		 * etc.).
+		 */
+		low_res_scroll_amount =
+			counter->remainder / counter->resolution_multiplier
+			+ (hi_res_value > 0 ? 1 : -1);
+		input_report_rel(counter->dev, REL_WHEEL,
+				 low_res_scroll_amount);
+		counter->remainder -=
+			low_res_scroll_amount * counter->resolution_multiplier;
+	}
+}
+EXPORT_SYMBOL_GPL(hid_scroll_counter_handle_scroll);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 834e6461a690..037e37b0b0e6 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1138,6 +1138,34 @@ static inline u32 hid_report_len(struct hid_report *report)
 int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size,
 		int interrupt);
 
+
+/**
+ * struct hid_scroll_counter - Utility class for processing high-resolution
+ *                             scroll events.
+ * @dev: the input device for which events should be reported.
+ * @microns_per_hi_res_unit: the amount moved by the user's finger for each
+ *                           high-resolution unit reported by the mouse, in
+ *                           microns.
+ * @resolution_multiplier: the wheel's resolution in high-resolution mode as a
+ *                         multiple of its lower resolution. For example, if
+ *                         moving the wheel by one "notch" would result in a
+ *                         value of 1 in low-resolution mode but 8 in
+ *                         high-resolution, the multiplier is 8.
+ * @remainder: counts the number of high-resolution units moved since the last
+ *             low-resolution event (REL_WHEEL or REL_HWHEEL) was sent. Should
+ *             only be used by class methods.
+ */
+struct hid_scroll_counter {
+	struct input_dev *dev;
+	int microns_per_hi_res_unit;
+	int resolution_multiplier;
+
+	int remainder;
+};
+
+void hid_scroll_counter_handle_scroll(struct hid_scroll_counter *counter,
+				      int hi_res_value);
+
 /* HID quirks API */
 unsigned long hid_lookup_quirk(const struct hid_device *hdev);
 int hid_quirks_init(char **quirks_param, __u16 bus, int count);
-- 
cgit v1.2.3


From d23df2dc56325c72b51670b1fb400ddd23dc17cd Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 3 Sep 2018 12:51:59 -0700
Subject: linux/mod_devicetable.h: fix kernel-doc missing notation for
 typec_device_id

Fix kernel-doc warning for missing struct member description:

../include/linux/mod_devicetable.h:763: warning: Function parameter or member 'driver_data' not described in 'typec_device_id'

Fixes: 8a37d87d72f0c ("usb: typec: Bus type for alternate modes")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/mod_devicetable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 1298a7daa57d..01797cb4587e 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -754,6 +754,7 @@ struct tb_service_id {
  * struct typec_device_id - USB Type-C alternate mode identifiers
  * @svid: Standard or Vendor ID
  * @mode: Mode index
+ * @driver_data: Driver specific data
  */
 struct typec_device_id {
 	__u16 svid;
-- 
cgit v1.2.3


From 865e63b04e9b2a658d7f26bd13a71dcd964a9118 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 4 Sep 2018 16:26:11 -0400
Subject: tracing: Add back in rcu_irq_enter/exit_irqson() for rcuidle
 tracepoints

Borislav reported the following splat:

 =============================
 WARNING: suspicious RCU usage
 4.19.0-rc1+ #1 Not tainted
 -----------------------------
 ./include/linux/rcupdate.h:631 rcu_read_lock() used illegally while idle!
 other info that might help us debug this:

 RCU used illegally from idle CPU!
 rcu_scheduler_active = 2, debug_locks = 1
 RCU used illegally from extended quiescent state!
 1 lock held by swapper/0/0:
  #0: 000000004557ee0e (rcu_read_lock){....}, at: perf_event_output_forward+0x0/0x130

 stack backtrace:
 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0-rc1+ #1
 Hardware name: LENOVO 2320CTO/2320CTO, BIOS G2ET86WW (2.06 ) 11/13/2012
 Call Trace:
  dump_stack+0x85/0xcb
  perf_event_output_forward+0xf6/0x130
  __perf_event_overflow+0x52/0xe0
  perf_swevent_overflow+0x91/0xb0
  perf_tp_event+0x11a/0x350
  ? find_held_lock+0x2d/0x90
  ? __lock_acquire+0x2ce/0x1350
  ? __lock_acquire+0x2ce/0x1350
  ? retint_kernel+0x2d/0x2d
  ? find_held_lock+0x2d/0x90
  ? tick_nohz_get_sleep_length+0x83/0xb0
  ? perf_trace_cpu+0xbb/0xd0
  ? perf_trace_buf_alloc+0x5a/0xa0
  perf_trace_cpu+0xbb/0xd0
  cpuidle_enter_state+0x185/0x340
  do_idle+0x1eb/0x260
  cpu_startup_entry+0x5f/0x70
  start_kernel+0x49b/0x4a6
  secondary_startup_64+0xa4/0xb0

This is due to the tracepoints moving to SRCU usage which does not require
RCU to be "watching". But perf uses these tracepoints with RCU and expects
it to be. Hence, we still need to add in the rcu_irq_enter/exit_irqson()
calls for "rcuidle" tracepoints. This is a temporary fix until we have SRCU
working in NMI context, and then perf can be converted to use that instead
of normal RCU.

Link: http://lkml.kernel.org/r/20180904162611.6a120068@gandalf.local.home

Cc: x86-ml <x86@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Fixes: e6753f23d961d ("tracepoint: Make rcuidle tracepoint callers use SRCU")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 7f2e16e76ac4..041f7e56a289 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -158,8 +158,10 @@ extern void syscall_unregfunc(void);
 		 * For rcuidle callers, use srcu since sched-rcu	\
 		 * doesn't work from the idle path.			\
 		 */							\
-		if (rcuidle)						\
+		if (rcuidle) {						\
 			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+			rcu_irq_enter_irqson();				\
+		}							\
 									\
 		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
 									\
@@ -171,8 +173,10 @@ extern void syscall_unregfunc(void);
 			} while ((++it_func_ptr)->func);		\
 		}							\
 									\
-		if (rcuidle)						\
+		if (rcuidle) {						\
+			rcu_irq_exit_irqson();				\
 			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+		}							\
 									\
 		preempt_enable_notrace();				\
 	} while (0)
-- 
cgit v1.2.3


From 76d5581c870454be5f1f1a106c57985902e7ea20 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Sun, 5 Aug 2018 09:19:33 +0300
Subject: net/mlx5: Fix use-after-free in self-healing flow

When the mlx5 health mechanism detects a problem while the driver
is in the middle of init_one or remove_one, the driver needs to prevent
the health mechanism from scheduling future work; if future work
is scheduled, there is a problem with use-after-free: the system WQ
tries to run the work item (which has been freed) at the scheduled
future time.

Prevent this by disabling work item scheduling in the health mechanism
when the driver is in the middle of init_one() or remove_one().

Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters")
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Feras Daoud <ferasda@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 10 +++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/main.c   |  6 +++---
 include/linux/mlx5/driver.h                      |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index d39b0b7011b2..9f39aeca863f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -331,9 +331,17 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 	add_timer(&health->timer);
 }
 
-void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
+	unsigned long flags;
+
+	if (disable_health) {
+		spin_lock_irqsave(&health->wq_lock, flags);
+		set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
+		set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+		spin_unlock_irqrestore(&health->wq_lock, flags);
+	}
 
 	del_timer_sync(&health->timer);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index cf3e4a659052..739aad0a0b35 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1286,7 +1286,7 @@ err_cleanup_once:
 		mlx5_cleanup_once(dev);
 
 err_stop_poll:
-	mlx5_stop_health_poll(dev);
+	mlx5_stop_health_poll(dev, boot);
 	if (mlx5_cmd_teardown_hca(dev)) {
 		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
 		goto out_err;
@@ -1346,7 +1346,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 	mlx5_free_irq_vectors(dev);
 	if (cleanup)
 		mlx5_cleanup_once(dev);
-	mlx5_stop_health_poll(dev);
+	mlx5_stop_health_poll(dev, cleanup);
 	err = mlx5_cmd_teardown_hca(dev);
 	if (err) {
 		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
@@ -1608,7 +1608,7 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
 	 * with the HCA, so the health polll is no longer needed.
 	 */
 	mlx5_drain_health_wq(dev);
-	mlx5_stop_health_poll(dev);
+	mlx5_stop_health_poll(dev, false);
 
 	ret = mlx5_cmd_force_teardown_hca(dev);
 	if (ret) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7a452716de4b..aa65f58c6610 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1052,7 +1052,7 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn);
 void mlx5_health_cleanup(struct mlx5_core_dev *dev);
 int mlx5_health_init(struct mlx5_core_dev *dev);
 void mlx5_start_health_poll(struct mlx5_core_dev *dev);
-void mlx5_stop_health_poll(struct mlx5_core_dev *dev);
+void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health);
 void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
 void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 8d71e818506718e8d7032ce824b5c74a17d4f7a5 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 21 Aug 2018 16:04:41 +0300
Subject: net/mlx5: Use u16 for Work Queue buffer fragment size

Minimal stride size is 16.
Hence, the number of strides in a fragment (of PAGE_SIZE)
is <= PAGE_SIZE / 16 <= 4K.

u16 is sufficient to represent this.

Fixes: 388ca8be0037 ("IB/mlx5: Implement fragmented completion queue (CQ)")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/wq.h | 2 +-
 include/linux/mlx5/driver.h                  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index c8c315eb5128..d838af9539b1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -39,9 +39,9 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-u32 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
+u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
 {
-	return (u32)wq->fbc.frag_sz_m1 + 1;
+	return wq->fbc.frag_sz_m1 + 1;
 }
 
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 2bd4c3184eba..3a1a170bb2d7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,7 +80,7 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
-u32 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
+u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index aa65f58c6610..3a1258fd8ac3 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -362,7 +362,7 @@ struct mlx5_frag_buf {
 struct mlx5_frag_buf_ctrl {
 	struct mlx5_frag_buf	frag_buf;
 	u32			sz_m1;
-	u32			frag_sz_m1;
+	u16			frag_sz_m1;
 	u32			strides_offset;
 	u8			log_sz;
 	u8			log_stride;
-- 
cgit v1.2.3


From a09036221092989b88c55d24d1f12ceb1d7d361f Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 21 Aug 2018 16:07:58 +0300
Subject: net/mlx5: Use u16 for Work Queue buffer strides offset

Minimal stride size is 16.
Hence, the number of strides in a fragment (of PAGE_SIZE)
is <= PAGE_SIZE / 16 <= 4K.

u16 is sufficient to represent this.

Fixes: d7037ad73daa ("net/mlx5: Fix QP fragmented buffer allocation")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c | 2 +-
 include/linux/mlx5/driver.h                  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index d838af9539b1..68e7f8df2a6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -138,7 +138,7 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
-	u32 sq_strides_offset;
+	u16 sq_strides_offset;
 	u32 rq_pg_remainder;
 	int err;
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 3a1258fd8ac3..66d94b4557cf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -363,7 +363,7 @@ struct mlx5_frag_buf_ctrl {
 	struct mlx5_frag_buf	frag_buf;
 	u32			sz_m1;
 	u16			frag_sz_m1;
-	u32			strides_offset;
+	u16			strides_offset;
 	u8			log_sz;
 	u8			log_stride;
 	u8			log_frag_strides;
@@ -995,7 +995,7 @@ static inline u32 mlx5_base_mkey(const u32 key)
 }
 
 static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
-					u32 strides_offset,
+					u16 strides_offset,
 					struct mlx5_frag_buf_ctrl *fbc)
 {
 	fbc->log_stride = log_stride;
-- 
cgit v1.2.3


From 83033688b7ade18d2dbbcefa810f02ff66ba549d Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 23 Jul 2018 10:55:39 +0300
Subject: net/mlx5: Change flow counters addlist type to single linked list

In order to prevent flow counters stats work function from traversing whole
flow counters tree while searching for deleted flow counters, new list to
store deleted flow counters will be added to struct mlx5_fc_stats. However,
the flow counter structure itself has no space left to store any more data
in first cache line. To free space that is needed to store additional list
node, convert current addlist double linked list (two pointers per node) to
atomic single linked list (one pointer per node).

Lockless NULL-terminated single linked list data type doesn't require any
additional external synchronization for operations used by flow counters
module (add single new element, remove all elements from list and traverse
them). Remove addlist_lock that is no longer needed.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  3 +-
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 45 ++++++++++------------
 include/linux/mlx5/driver.h                        |  4 +-
 3 files changed, 23 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 32070e5d993d..f68590291e0c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -36,6 +36,7 @@
 #include <linux/refcount.h>
 #include <linux/mlx5/fs.h>
 #include <linux/rhashtable.h>
+#include <linux/llist.h>
 
 enum fs_node_type {
 	FS_TYPE_NAMESPACE,
@@ -139,7 +140,7 @@ struct mlx5_fc_cache {
 
 struct mlx5_fc {
 	struct rb_node node;
-	struct list_head list;
+	struct llist_node addlist;
 
 	/* last{packets,bytes} members are used when calculating the delta since
 	 * last reading
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 58af6be13dfa..d996d6cf9e19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -52,7 +52,9 @@
  * access to counter list:
  * - create (user context)
  *   - mlx5_fc_create() only adds to an addlist to be used by
- *     mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
+ *     mlx5_fc_stats_query_work(). addlist is a lockless single linked list
+ *     that doesn't require any additional synchronization when adding single
+ *     node.
  *   - spawn thread to do the actual destroy
  *
  * - destroy (user context)
@@ -156,28 +158,29 @@ out:
 	return node;
 }
 
+static void mlx5_free_fc(struct mlx5_core_dev *dev,
+			 struct mlx5_fc *counter)
+{
+	mlx5_cmd_fc_free(dev, counter->id);
+	kfree(counter);
+}
+
 static void mlx5_fc_stats_work(struct work_struct *work)
 {
 	struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
 						 priv.fc_stats.work.work);
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct llist_node *tmplist = llist_del_all(&fc_stats->addlist);
 	unsigned long now = jiffies;
 	struct mlx5_fc *counter = NULL;
 	struct mlx5_fc *last = NULL;
 	struct rb_node *node;
-	LIST_HEAD(tmplist);
-
-	spin_lock(&fc_stats->addlist_lock);
 
-	list_splice_tail_init(&fc_stats->addlist, &tmplist);
-
-	if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
+	if (tmplist || !RB_EMPTY_ROOT(&fc_stats->counters))
 		queue_delayed_work(fc_stats->wq, &fc_stats->work,
 				   fc_stats->sampling_interval);
 
-	spin_unlock(&fc_stats->addlist_lock);
-
-	list_for_each_entry(counter, &tmplist, list)
+	llist_for_each_entry(counter, tmplist, addlist)
 		mlx5_fc_stats_insert(&fc_stats->counters, counter);
 
 	node = rb_first(&fc_stats->counters);
@@ -229,9 +232,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
 		counter->cache.lastuse = jiffies;
 		counter->aging = true;
 
-		spin_lock(&fc_stats->addlist_lock);
-		list_add(&counter->list, &fc_stats->addlist);
-		spin_unlock(&fc_stats->addlist_lock);
+		llist_add(&counter->addlist, &fc_stats->addlist);
 
 		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
 	}
@@ -268,8 +269,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 
 	fc_stats->counters = RB_ROOT;
-	INIT_LIST_HEAD(&fc_stats->addlist);
-	spin_lock_init(&fc_stats->addlist_lock);
+	init_llist_head(&fc_stats->addlist);
 
 	fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
 	if (!fc_stats->wq)
@@ -284,6 +284,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct llist_node *tmplist;
 	struct mlx5_fc *counter;
 	struct mlx5_fc *tmp;
 	struct rb_node *node;
@@ -292,13 +293,9 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	destroy_workqueue(dev->priv.fc_stats.wq);
 	dev->priv.fc_stats.wq = NULL;
 
-	list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
-		list_del(&counter->list);
-
-		mlx5_cmd_fc_free(dev, counter->id);
-
-		kfree(counter);
-	}
+	tmplist = llist_del_all(&fc_stats->addlist);
+	llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
+		mlx5_free_fc(dev, counter);
 
 	node = rb_first(&fc_stats->counters);
 	while (node) {
@@ -308,9 +305,7 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 
 		rb_erase(&counter->node, &fc_stats->counters);
 
-		mlx5_cmd_fc_free(dev, counter->id);
-
-		kfree(counter);
+		mlx5_free_fc(dev, counter);
 	}
 }
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7a452716de4b..c00549293982 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -584,9 +584,7 @@ struct mlx5_irq_info {
 
 struct mlx5_fc_stats {
 	struct rb_root counters;
-	struct list_head addlist;
-	/* protect addlist add/splice operations */
-	spinlock_t addlist_lock;
+	struct llist_head addlist;
 
 	struct workqueue_struct *wq;
 	struct delayed_work work;
-- 
cgit v1.2.3


From 6e5e22839136fdb466af0aa46ff2404713dff974 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 23 Jul 2018 11:32:05 +0300
Subject: net/mlx5: Add new list to store deleted flow counters

In order to prevent flow counters stats work function from traversing whole
flow counters tree while searching for deleted flow counters, new list to
store deleted flow counters is added to struct mlx5_fc_stats. Lockless
NULL-terminated single linked list data type is used due to following
reasons:
 - This use case only needs to add single element to list and
 remove/iterate whole list. Lockless list doesn't require any additional
 synchronization for these operations.
 - First cache line of flow counter data structure only has space to store
 single additional pointer, which precludes usage of double linked list.

Remove flow counter 'deleted' flag that is no longer needed.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 34 ++++++++--------------
 include/linux/mlx5/driver.h                        |  1 +
 3 files changed, 14 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index f68590291e0c..617d6239c5f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -141,6 +141,7 @@ struct mlx5_fc_cache {
 struct mlx5_fc {
 	struct rb_node node;
 	struct llist_node addlist;
+	struct llist_node dellist;
 
 	/* last{packets,bytes} members are used when calculating the delta since
 	 * last reading
@@ -149,7 +150,6 @@ struct mlx5_fc {
 	u64 lastbytes;
 
 	u32 id;
-	bool deleted;
 	bool aging;
 
 	struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index d996d6cf9e19..f1266f215a31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -58,7 +58,7 @@
  *   - spawn thread to do the actual destroy
  *
  * - destroy (user context)
- *   - mark a counter as deleted
+ *   - add a counter to lockless dellist
  *   - spawn thread to do the actual del
  *
  * - dump (user context)
@@ -171,9 +171,8 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 						 priv.fc_stats.work.work);
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 	struct llist_node *tmplist = llist_del_all(&fc_stats->addlist);
+	struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
 	unsigned long now = jiffies;
-	struct mlx5_fc *counter = NULL;
-	struct mlx5_fc *last = NULL;
 	struct rb_node *node;
 
 	if (tmplist || !RB_EMPTY_ROOT(&fc_stats->counters))
@@ -183,26 +182,17 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 	llist_for_each_entry(counter, tmplist, addlist)
 		mlx5_fc_stats_insert(&fc_stats->counters, counter);
 
-	node = rb_first(&fc_stats->counters);
-	while (node) {
-		counter = rb_entry(node, struct mlx5_fc, node);
-
-		node = rb_next(node);
-
-		if (counter->deleted) {
-			rb_erase(&counter->node, &fc_stats->counters);
-
-			mlx5_cmd_fc_free(dev, counter->id);
-
-			kfree(counter);
-			continue;
-		}
+	tmplist = llist_del_all(&fc_stats->dellist);
+	llist_for_each_entry_safe(counter, tmp, tmplist, dellist) {
+		rb_erase(&counter->node, &fc_stats->counters);
 
-		last = counter;
+		mlx5_free_fc(dev, counter);
 	}
 
-	if (time_before(now, fc_stats->next_query) || !last)
+	node = rb_last(&fc_stats->counters);
+	if (time_before(now, fc_stats->next_query) || !node)
 		return;
+	last = rb_entry(node, struct mlx5_fc, node);
 
 	node = rb_first(&fc_stats->counters);
 	while (node) {
@@ -254,13 +244,12 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 		return;
 
 	if (counter->aging) {
-		counter->deleted = true;
+		llist_add(&counter->dellist, &fc_stats->dellist);
 		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
 		return;
 	}
 
-	mlx5_cmd_fc_free(dev, counter->id);
-	kfree(counter);
+	mlx5_free_fc(dev, counter);
 }
 EXPORT_SYMBOL(mlx5_fc_destroy);
 
@@ -270,6 +259,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 
 	fc_stats->counters = RB_ROOT;
 	init_llist_head(&fc_stats->addlist);
+	init_llist_head(&fc_stats->dellist);
 
 	fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
 	if (!fc_stats->wq)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c00549293982..4b53ac64004b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -585,6 +585,7 @@ struct mlx5_irq_info {
 struct mlx5_fc_stats {
 	struct rb_root counters;
 	struct llist_head addlist;
+	struct llist_head dellist;
 
 	struct workqueue_struct *wq;
 	struct delayed_work work;
-- 
cgit v1.2.3


From 9aff93d7d0d4b3f3076d7bd12a4ad06ef1cf9804 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 24 Jul 2018 09:52:11 +0300
Subject: net/mlx5: Store flow counters in a list

In order to improve performance of flow counter stats query loop that
traverses all configured flow counters, replace rb_tree with double-linked
list. This change improves performance of traversing flow counters by
removing the tree traversal. (profiling data showed that call to rb_next
was most top CPU consumer)

However, lookup of flow flow counter in list becomes linear, instead of
logarithmic. This problem is fixed by next patch in series, which adds idr
for fast lookup. Idr is to be used because it is not an intrusive data
structure and doesn't require adding any new members to struct mlx5_fc,
which allows its control data part to stay <= 1 cache line in size.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 88 ++++++++++------------
 include/linux/mlx5/driver.h                        |  2 +-
 3 files changed, 42 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 617d6239c5f3..a06f83c0c2b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -139,7 +139,7 @@ struct mlx5_fc_cache {
 };
 
 struct mlx5_fc {
-	struct rb_node node;
+	struct list_head list;
 	struct llist_node addlist;
 	struct llist_node dellist;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index f1266f215a31..90ebfee37508 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -73,36 +73,38 @@
  *   elapsed, the thread will actually query the hardware.
  */
 
-static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
+						      u32 id)
 {
-	struct rb_node **new = &root->rb_node;
-	struct rb_node *parent = NULL;
-
-	while (*new) {
-		struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node);
-		int result = counter->id - this->id;
-
-		parent = *new;
-		if (result < 0)
-			new = &((*new)->rb_left);
-		else
-			new = &((*new)->rb_right);
-	}
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct mlx5_fc *counter;
+
+	list_for_each_entry(counter, &fc_stats->counters, list)
+		if (counter->id > id)
+			return &counter->list;
+
+	return &fc_stats->counters;
+}
+
+static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
+				 struct mlx5_fc *counter)
+{
+	struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id);
 
-	/* Add new node and rebalance tree. */
-	rb_link_node(&counter->node, parent, new);
-	rb_insert_color(&counter->node, root);
+	list_add_tail(&counter->list, next);
 }
 
-/* The function returns the last node that was queried so the caller
+/* The function returns the last counter that was queried so the caller
  * function can continue calling it till all counters are queried.
  */
-static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
+static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
 					   struct mlx5_fc *first,
 					   u32 last_id)
 {
+	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	struct mlx5_fc *counter = NULL;
 	struct mlx5_cmd_fc_bulk *b;
-	struct rb_node *node = NULL;
+	bool more = false;
 	u32 afirst_id;
 	int num;
 	int err;
@@ -132,14 +134,16 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
 		goto out;
 	}
 
-	for (node = &first->node; node; node = rb_next(node)) {
-		struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
+	counter = first;
+	list_for_each_entry_from(counter, &fc_stats->counters, list) {
 		struct mlx5_fc_cache *c = &counter->cache;
 		u64 packets;
 		u64 bytes;
 
-		if (counter->id > last_id)
+		if (counter->id > last_id) {
+			more = true;
 			break;
+		}
 
 		mlx5_cmd_fc_bulk_get(dev, b,
 				     counter->id, &packets, &bytes);
@@ -155,7 +159,7 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
 out:
 	mlx5_cmd_fc_bulk_free(b);
 
-	return node;
+	return more ? counter : NULL;
 }
 
 static void mlx5_free_fc(struct mlx5_core_dev *dev,
@@ -173,33 +177,30 @@ static void mlx5_fc_stats_work(struct work_struct *work)
 	struct llist_node *tmplist = llist_del_all(&fc_stats->addlist);
 	struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
 	unsigned long now = jiffies;
-	struct rb_node *node;
 
-	if (tmplist || !RB_EMPTY_ROOT(&fc_stats->counters))
+	if (tmplist || !list_empty(&fc_stats->counters))
 		queue_delayed_work(fc_stats->wq, &fc_stats->work,
 				   fc_stats->sampling_interval);
 
 	llist_for_each_entry(counter, tmplist, addlist)
-		mlx5_fc_stats_insert(&fc_stats->counters, counter);
+		mlx5_fc_stats_insert(dev, counter);
 
 	tmplist = llist_del_all(&fc_stats->dellist);
 	llist_for_each_entry_safe(counter, tmp, tmplist, dellist) {
-		rb_erase(&counter->node, &fc_stats->counters);
+		list_del(&counter->list);
 
 		mlx5_free_fc(dev, counter);
 	}
 
-	node = rb_last(&fc_stats->counters);
-	if (time_before(now, fc_stats->next_query) || !node)
+	if (time_before(now, fc_stats->next_query) ||
+	    list_empty(&fc_stats->counters))
 		return;
-	last = rb_entry(node, struct mlx5_fc, node);
-
-	node = rb_first(&fc_stats->counters);
-	while (node) {
-		counter = rb_entry(node, struct mlx5_fc, node);
+	last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list);
 
-		node = mlx5_fc_stats_query(dev, counter, last->id);
-	}
+	counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
+				   list);
+	while (counter)
+		counter = mlx5_fc_stats_query(dev, counter, last->id);
 
 	fc_stats->next_query = now + fc_stats->sampling_interval;
 }
@@ -257,7 +258,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 
-	fc_stats->counters = RB_ROOT;
+	INIT_LIST_HEAD(&fc_stats->counters);
 	init_llist_head(&fc_stats->addlist);
 	init_llist_head(&fc_stats->dellist);
 
@@ -277,7 +278,6 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	struct llist_node *tmplist;
 	struct mlx5_fc *counter;
 	struct mlx5_fc *tmp;
-	struct rb_node *node;
 
 	cancel_delayed_work_sync(&dev->priv.fc_stats.work);
 	destroy_workqueue(dev->priv.fc_stats.wq);
@@ -287,16 +287,8 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
 		mlx5_free_fc(dev, counter);
 
-	node = rb_first(&fc_stats->counters);
-	while (node) {
-		counter = rb_entry(node, struct mlx5_fc, node);
-
-		node = rb_next(node);
-
-		rb_erase(&counter->node, &fc_stats->counters);
-
+	list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
 		mlx5_free_fc(dev, counter);
-	}
 }
 
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 4b53ac64004b..61bed33e6675 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -583,7 +583,7 @@ struct mlx5_irq_info {
 };
 
 struct mlx5_fc_stats {
-	struct rb_root counters;
+	struct list_head counters;
 	struct llist_head addlist;
 	struct llist_head dellist;
 
-- 
cgit v1.2.3


From 12d6066c3b29c5606c4a2466f964fbd9ede803c5 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 24 Jul 2018 16:37:40 +0300
Subject: net/mlx5: Add flow counters idr

Previous patch in series changed flow counter storage structure from
rb_tree to linked list in order to improve flow counter traversal
performance. The drawback of such solution is that flow counter lookup by
id becomes linear in complexity.

Store pointers to flow counters in idr in order to improve lookup
performance to logarithmic again. Idr is non-intrusive data structure and
doesn't require extending flow counter struct with new elements. This means
that idr can be used for lookup, while linked list from previous patch is
used for traversal, and struct mlx5_fc size is <= 2 cache lines.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Amir Vadai <amir@vadai.me>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fs_counters.c  | 37 +++++++++++++++++++---
 include/linux/mlx5/driver.h                        |  2 ++
 2 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 90ebfee37508..09206c4acd9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -77,13 +77,18 @@ static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
 						      u32 id)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+	unsigned long next_id = (unsigned long)id + 1;
 	struct mlx5_fc *counter;
 
-	list_for_each_entry(counter, &fc_stats->counters, list)
-		if (counter->id > id)
-			return &counter->list;
+	rcu_read_lock();
+	/* skip counters that are in idr, but not yet in counters list */
+	while ((counter = idr_get_next_ul(&fc_stats->counters_idr,
+					  &next_id)) != NULL &&
+	       list_empty(&counter->list))
+		next_id++;
+	rcu_read_unlock();
 
-	return &fc_stats->counters;
+	return counter ? &counter->list : &fc_stats->counters;
 }
 
 static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
@@ -214,15 +219,29 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
 	counter = kzalloc(sizeof(*counter), GFP_KERNEL);
 	if (!counter)
 		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&counter->list);
 
 	err = mlx5_cmd_fc_alloc(dev, &counter->id);
 	if (err)
 		goto err_out;
 
 	if (aging) {
+		u32 id = counter->id;
+
 		counter->cache.lastuse = jiffies;
 		counter->aging = true;
 
+		idr_preload(GFP_KERNEL);
+		spin_lock(&fc_stats->counters_idr_lock);
+
+		err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id,
+				    GFP_NOWAIT);
+
+		spin_unlock(&fc_stats->counters_idr_lock);
+		idr_preload_end();
+		if (err)
+			goto err_out_alloc;
+
 		llist_add(&counter->addlist, &fc_stats->addlist);
 
 		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
@@ -230,6 +249,8 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
 
 	return counter;
 
+err_out_alloc:
+	mlx5_cmd_fc_free(dev, counter->id);
 err_out:
 	kfree(counter);
 
@@ -245,6 +266,10 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 		return;
 
 	if (counter->aging) {
+		spin_lock(&fc_stats->counters_idr_lock);
+		WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
+		spin_unlock(&fc_stats->counters_idr_lock);
+
 		llist_add(&counter->dellist, &fc_stats->dellist);
 		mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
 		return;
@@ -258,6 +283,8 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
 
+	spin_lock_init(&fc_stats->counters_idr_lock);
+	idr_init(&fc_stats->counters_idr);
 	INIT_LIST_HEAD(&fc_stats->counters);
 	init_llist_head(&fc_stats->addlist);
 	init_llist_head(&fc_stats->dellist);
@@ -283,6 +310,8 @@ void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
 	destroy_workqueue(dev->priv.fc_stats.wq);
 	dev->priv.fc_stats.wq = NULL;
 
+	idr_destroy(&fc_stats->counters_idr);
+
 	tmplist = llist_del_all(&fc_stats->addlist);
 	llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
 		mlx5_free_fc(dev, counter);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 61bed33e6675..2a0c845f6bdb 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -583,6 +583,8 @@ struct mlx5_irq_info {
 };
 
 struct mlx5_fc_stats {
+	spinlock_t counters_idr_lock; /* protects counters_idr */
+	struct idr counters_idr;
 	struct list_head counters;
 	struct llist_head addlist;
 	struct llist_head dellist;
-- 
cgit v1.2.3


From 64109f1dc41f25f4a9c6b114e04b6266bf4128ad Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Tue, 5 Jun 2018 09:22:18 +0300
Subject: net/mlx5e: Replace PTP clock lock from RW lock to seq lock

Changed "priv.clock.lock" lock from 'rw_lock' to 'seq_lock'
in order to improve packet rate performance.

Tested on Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz.
Sent 64b packets between two peers connected by ConnectX-5,
and measured packet rate for the receiver in three modes:
	no time-stamping (base rate)
	time-stamping using rw_lock (old lock) for critical region
	time-stamping using seq_lock (new lock) for critical region
Only the receiver time stamped its packets.

The measured packet rate improvements are:

	Single flow (multiple TX rings to single RX ring):
		without timestamping:	  4.26 (M packets)/sec
		with rw-lock (old lock):  4.1  (M packets)/sec
		with seq-lock (new lock): 4.16 (M packets)/sec
		1.46% improvement

	Multiple flows (multiple TX rings to six RX rings):
		without timestamping: 	  22   (M packets)/sec
		with rw-lock (old lock):  11.7 (M packets)/sec
		with seq-lock (new lock): 21.3 (M packets)/sec
		82.05% improvement

The packet rate improvement is due to the lack of atomic operations
for the 'readers' by the seq-lock.
Since there are much more 'readers' than 'writers' contention
on this lock, almost all atomic operations are saved.
this results in a dramatic decrease in overall
cache misses.

Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/lib/clock.c    | 34 +++++++++++-----------
 .../net/ethernet/mellanox/mlx5/core/lib/clock.h    |  8 +++--
 include/linux/mlx5/driver.h                        |  2 +-
 3 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 3f767cde4c1d..0d90b1b4a3d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -111,10 +111,10 @@ static void mlx5_pps_out(struct work_struct *work)
 	for (i = 0; i < clock->ptp_info.n_pins; i++) {
 		u64 tstart;
 
-		write_lock_irqsave(&clock->lock, flags);
+		write_seqlock_irqsave(&clock->lock, flags);
 		tstart = clock->pps_info.start[i];
 		clock->pps_info.start[i] = 0;
-		write_unlock_irqrestore(&clock->lock, flags);
+		write_sequnlock_irqrestore(&clock->lock, flags);
 		if (!tstart)
 			continue;
 
@@ -132,10 +132,10 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
 						overflow_work);
 	unsigned long flags;
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_read(&clock->tc);
 	mlx5_update_clock_info_page(clock->mdev);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 	schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
 }
 
@@ -147,10 +147,10 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
 	u64 ns = timespec64_to_ns(ts);
 	unsigned long flags;
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_init(&clock->tc, &clock->cycles, ns);
 	mlx5_update_clock_info_page(clock->mdev);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
 }
@@ -162,9 +162,9 @@ static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	u64 ns;
 	unsigned long flags;
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	ns = timecounter_read(&clock->tc);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	*ts = ns_to_timespec64(ns);
 
@@ -177,10 +177,10 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 						ptp_info);
 	unsigned long flags;
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_adjtime(&clock->tc, delta);
 	mlx5_update_clock_info_page(clock->mdev);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
 }
@@ -203,12 +203,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
 	adj *= delta;
 	diff = div_u64(adj, 1000000000ULL);
 
-	write_lock_irqsave(&clock->lock, flags);
+	write_seqlock_irqsave(&clock->lock, flags);
 	timecounter_read(&clock->tc);
 	clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
 				       clock->nominal_c_mult + diff;
 	mlx5_update_clock_info_page(clock->mdev);
-	write_unlock_irqrestore(&clock->lock, flags);
+	write_sequnlock_irqrestore(&clock->lock, flags);
 
 	return 0;
 }
@@ -307,12 +307,12 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
 		ts.tv_nsec = rq->perout.start.nsec;
 		ns = timespec64_to_ns(&ts);
 		cycles_now = mlx5_read_internal_timer(mdev);
-		write_lock_irqsave(&clock->lock, flags);
+		write_seqlock_irqsave(&clock->lock, flags);
 		nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
 		nsec_delta = ns - nsec_now;
 		cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
 					 clock->cycles.mult);
-		write_unlock_irqrestore(&clock->lock, flags);
+		write_sequnlock_irqrestore(&clock->lock, flags);
 		time_stamp = cycles_now + cycles_delta;
 		field_select = MLX5_MTPPS_FS_PIN_MODE |
 			       MLX5_MTPPS_FS_PATTERN |
@@ -471,14 +471,14 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
 		ts.tv_sec += 1;
 		ts.tv_nsec = 0;
 		ns = timespec64_to_ns(&ts);
-		write_lock_irqsave(&clock->lock, flags);
+		write_seqlock_irqsave(&clock->lock, flags);
 		nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
 		nsec_delta = ns - nsec_now;
 		cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
 					 clock->cycles.mult);
 		clock->pps_info.start[pin] = cycles_now + cycles_delta;
 		schedule_work(&clock->pps_info.out_work);
-		write_unlock_irqrestore(&clock->lock, flags);
+		write_sequnlock_irqrestore(&clock->lock, flags);
 		break;
 	default:
 		mlx5_core_err(mdev, " Unhandled event\n");
@@ -498,7 +498,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
 		mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
 		return;
 	}
-	rwlock_init(&clock->lock);
+	seqlock_init(&clock->lock);
 	clock->cycles.read = read_internal_timer;
 	clock->cycles.shift = MLX5_CYCLES_SHIFT;
 	clock->cycles.mult = clocksource_khz2mult(dev_freq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 02e2e4575e4f..263cb6e2aeee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -46,11 +46,13 @@ static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
 static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
 						u64 timestamp)
 {
+	unsigned int seq;
 	u64 nsec;
 
-	read_lock(&clock->lock);
-	nsec = timecounter_cyc2time(&clock->tc, timestamp);
-	read_unlock(&clock->lock);
+	do {
+		seq = read_seqbegin(&clock->lock);
+		nsec = timecounter_cyc2time(&clock->tc, timestamp);
+	} while (read_seqretry(&clock->lock, seq));
 
 	return ns_to_ktime(nsec);
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 2a0c845f6bdb..b7fce2c9443d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -805,7 +805,7 @@ struct mlx5_pps {
 };
 
 struct mlx5_clock {
-	rwlock_t                   lock;
+	seqlock_t                  lock;
 	struct cyclecounter        cycles;
 	struct timecounter         tc;
 	struct hwtstamp_config     hwtstamp_config;
-- 
cgit v1.2.3


From fa788d986a3aac5069378ed04697bd06f83d3488 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Mon, 3 Sep 2018 16:23:36 +0200
Subject: packet: add sockopt to ignore outgoing packets

Currently, the only way to ignore outgoing packets on a packet socket is
via the BPF filter.  With MSG_ZEROCOPY, packets that are looped into
AF_PACKET are copied in dev_queue_xmit_nit(), and this copy happens even
if the filter run from packet_rcv() would reject them.  So the presence
of a packet socket on the interface takes away the benefits of
MSG_ZEROCOPY, even if the packet socket is not interested in outgoing
packets.  (Even when MSG_ZEROCOPY is not used, the skb is unnecessarily
cloned, but the cost for that is much lower.)

Add a socket option to allow AF_PACKET sockets to ignore outgoing
packets to solve this.  Note that the *BSDs already have something
similar: BIOCSSEESENT/BIOCSDIRECTION and BIOCSDIRFILT.

The first intended user is lldpd.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h      |  1 +
 include/uapi/linux/if_packet.h |  1 +
 net/core/dev.c                 |  3 +++
 net/packet/af_packet.c         | 17 +++++++++++++++++
 4 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4271f6b4e419..e2b3bd750c98 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2343,6 +2343,7 @@ static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb,
 
 struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
+	bool			ignore_outgoing;
 	struct net_device	*dev;	/* NULL is wildcarded here	     */
 	int			(*func) (struct sk_buff *,
 					 struct net_device *,
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 67b61d91d89b..467b654bd4c7 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -57,6 +57,7 @@ struct sockaddr_ll {
 #define PACKET_QDISC_BYPASS		20
 #define PACKET_ROLLOVER_STATS		21
 #define PACKET_FANOUT_DATA		22
+#define PACKET_IGNORE_OUTGOING		23
 
 #define PACKET_FANOUT_HASH		0
 #define PACKET_FANOUT_LB		1
diff --git a/net/core/dev.c b/net/core/dev.c
index 82114e1111e6..ca78dc5a79a3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1969,6 +1969,9 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_lock();
 again:
 	list_for_each_entry_rcu(ptype, ptype_list, list) {
+		if (ptype->ignore_outgoing)
+			continue;
+
 		/* Never send packets back to the socket
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 75c92a87e7b2..f85f67b5c1f4 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3805,6 +3805,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		return fanout_set_data(po, optval, optlen);
 	}
+	case PACKET_IGNORE_OUTGOING:
+	{
+		int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		if (val < 0 || val > 1)
+			return -EINVAL;
+
+		po->prot_hook.ignore_outgoing = !!val;
+		return 0;
+	}
 	case PACKET_TX_HAS_OFF:
 	{
 		unsigned int val;
@@ -3928,6 +3942,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			((u32)po->fanout->flags << 24)) :
 		       0);
 		break;
+	case PACKET_IGNORE_OUTGOING:
+		val = po->prot_hook.ignore_outgoing;
+		break;
 	case PACKET_ROLLOVER_STATS:
 		if (!po->rollover)
 			return -EINVAL;
-- 
cgit v1.2.3


From a3f723079df85eafc10c628dabdfcf374b8e1523 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <denis.bolotin@cavium.com>
Date: Wed, 5 Sep 2018 18:35:55 +0300
Subject: qed*: Utilize FW 8.37.7.0

This patch adds a new qed firmware with fixes and support for new features.

Fixes:
- Fix a rare case of device crash with iWARP, iSCSI or FCoE offload.
- Fix GRE tunneled traffic when iWARP offload is enabled.
- Fix RoCE failure in ib_send_bw when using inline data.
- Fix latency optimization flow for inline WQEs.
- BigBear 100G fix

RDMA:
- Reduce task context size.
- Application page sizes above 2GB support.
- Performance improvements.

ETH:
- Tenant DCB support.
- Replace RSS indirection table update interface.

Misc:
- Debug Tools changes.

Signed-off-by: Denis Bolotin <denis.bolotin@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h       |   1 +
 drivers/net/ethernet/qlogic/qed/qed_debug.c | 248 +++++++++++++----------
 drivers/net/ethernet/qlogic/qed/qed_dev.c   |  11 ++
 drivers/net/ethernet/qlogic/qed/qed_hsi.h   | 297 +++++++++++++++++++---------
 include/linux/qed/common_hsi.h              |  10 +-
 include/linux/qed/iscsi_common.h            |   2 +-
 6 files changed, 367 insertions(+), 202 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index a60e1c8d470a..5f0962d353ce 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -623,6 +623,7 @@ struct qed_hwfn {
 	void				*unzip_buf;
 
 	struct dbg_tools_data		dbg_info;
+	void				*dbg_user_info;
 
 	/* PWM region specific data */
 	u16				wid_count;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 1aa9fc1c5890..78a638ec7c0a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -3454,7 +3454,8 @@ static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn,
 			addr = BYTES_TO_DWORDS(storm->sem_fast_mem_addr +
 					       SEM_FAST_REG_STORM_REG_FILE) +
 			       IOR_SET_OFFSET(set_id);
-			buf[strlen(buf) - 1] = '0' + set_id;
+			if (strlen(buf) > 0)
+				buf[strlen(buf) - 1] = '0' + set_id;
 			offset += qed_grc_dump_mem(p_hwfn,
 						   p_ptt,
 						   dump_buf + offset,
@@ -5563,35 +5564,6 @@ struct block_info {
 	enum block_id id;
 };
 
-struct mcp_trace_format {
-	u32 data;
-#define MCP_TRACE_FORMAT_MODULE_MASK	0x0000ffff
-#define MCP_TRACE_FORMAT_MODULE_SHIFT	0
-#define MCP_TRACE_FORMAT_LEVEL_MASK	0x00030000
-#define MCP_TRACE_FORMAT_LEVEL_SHIFT	16
-#define MCP_TRACE_FORMAT_P1_SIZE_MASK	0x000c0000
-#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT	18
-#define MCP_TRACE_FORMAT_P2_SIZE_MASK	0x00300000
-#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT	20
-#define MCP_TRACE_FORMAT_P3_SIZE_MASK	0x00c00000
-#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT	22
-#define MCP_TRACE_FORMAT_LEN_MASK	0xff000000
-#define MCP_TRACE_FORMAT_LEN_SHIFT	24
-
-	char *format_str;
-};
-
-/* Meta data structure, generated by a perl script during MFW build. therefore,
- * the structs mcp_trace_meta and mcp_trace_format are duplicated in the perl
- * script.
- */
-struct mcp_trace_meta {
-	u32 modules_num;
-	char **modules;
-	u32 formats_num;
-	struct mcp_trace_format *formats;
-};
-
 /* REG fifo element */
 struct reg_fifo_element {
 	u64 data;
@@ -5714,6 +5686,20 @@ struct igu_fifo_addr_data {
 	enum igu_fifo_addr_types type;
 };
 
+struct mcp_trace_meta {
+	u32 modules_num;
+	char **modules;
+	u32 formats_num;
+	struct mcp_trace_format *formats;
+	bool is_allocated;
+};
+
+/* Debug Tools user data */
+struct dbg_tools_user_data {
+	struct mcp_trace_meta mcp_trace_meta;
+	const u32 *mcp_trace_user_meta_buf;
+};
+
 /******************************** Constants **********************************/
 
 #define MAX_MSG_LEN				1024
@@ -6137,15 +6123,6 @@ static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = {
 
 /******************************** Variables **********************************/
 
-/* MCP Trace meta data array - used in case the dump doesn't contain the
- * meta data (e.g. due to no NVRAM access).
- */
-static struct user_dbg_array s_mcp_trace_meta_arr = { NULL, 0 };
-
-/* Parsed MCP Trace meta data info, based on MCP trace meta array */
-static struct mcp_trace_meta s_mcp_trace_meta;
-static bool s_mcp_trace_meta_valid;
-
 /* Temporary buffer, used for print size calculations */
 static char s_temp_buf[MAX_MSG_LEN];
 
@@ -6311,6 +6288,12 @@ static u32 qed_print_section_params(u32 *dump_buf,
 	return dump_offset;
 }
 
+static struct dbg_tools_user_data *
+qed_dbg_get_user_data(struct qed_hwfn *p_hwfn)
+{
+	return (struct dbg_tools_user_data *)p_hwfn->dbg_user_info;
+}
+
 /* Parses the idle check rules and returns the number of characters printed.
  * In case of parsing error, returns 0.
  */
@@ -6570,43 +6553,26 @@ static enum dbg_status qed_parse_idle_chk_dump(u32 *dump_buf,
 	return DBG_STATUS_OK;
 }
 
-/* Frees the specified MCP Trace meta data */
-static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn,
-				    struct mcp_trace_meta *meta)
-{
-	u32 i;
-
-	s_mcp_trace_meta_valid = false;
-
-	/* Release modules */
-	if (meta->modules) {
-		for (i = 0; i < meta->modules_num; i++)
-			kfree(meta->modules[i]);
-		kfree(meta->modules);
-	}
-
-	/* Release formats */
-	if (meta->formats) {
-		for (i = 0; i < meta->formats_num; i++)
-			kfree(meta->formats[i].format_str);
-		kfree(meta->formats);
-	}
-}
-
 /* Allocates and fills MCP Trace meta data based on the specified meta data
  * dump buffer.
  * Returns debug status code.
  */
-static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
-						const u32 *meta_buf,
-						struct mcp_trace_meta *meta)
+static enum dbg_status
+qed_mcp_trace_alloc_meta_data(struct qed_hwfn *p_hwfn,
+			      const u32 *meta_buf)
 {
-	u8 *meta_buf_bytes = (u8 *)meta_buf;
+	struct dbg_tools_user_data *dev_user_data;
 	u32 offset = 0, signature, i;
+	struct mcp_trace_meta *meta;
+	u8 *meta_buf_bytes;
+
+	dev_user_data = qed_dbg_get_user_data(p_hwfn);
+	meta = &dev_user_data->mcp_trace_meta;
+	meta_buf_bytes = (u8 *)meta_buf;
 
 	/* Free the previous meta before loading a new one. */
-	if (s_mcp_trace_meta_valid)
-		qed_mcp_trace_free_meta(p_hwfn, meta);
+	if (meta->is_allocated)
+		qed_mcp_trace_free_meta_data(p_hwfn);
 
 	memset(meta, 0, sizeof(*meta));
 
@@ -6674,7 +6640,7 @@ static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
 				      format_len, format_ptr->format_str);
 	}
 
-	s_mcp_trace_meta_valid = true;
+	meta->is_allocated = true;
 	return DBG_STATUS_OK;
 }
 
@@ -6687,21 +6653,26 @@ static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn,
  *               buffer.
  * data_size - size in bytes of data to parse.
  * parsed_buf - destination buffer for parsed data.
- * parsed_bytes - size of parsed data in bytes.
+ * parsed_results_bytes - size of parsed data in bytes.
  */
-static enum dbg_status qed_parse_mcp_trace_buf(u8 *trace_buf,
+static enum dbg_status qed_parse_mcp_trace_buf(struct qed_hwfn *p_hwfn,
+					       u8 *trace_buf,
 					       u32 trace_buf_size,
 					       u32 data_offset,
 					       u32 data_size,
 					       char *parsed_buf,
-					       u32 *parsed_bytes)
+					       u32 *parsed_results_bytes)
 {
+	struct dbg_tools_user_data *dev_user_data;
+	struct mcp_trace_meta *meta;
 	u32 param_mask, param_shift;
 	enum dbg_status status;
 
-	*parsed_bytes = 0;
+	dev_user_data = qed_dbg_get_user_data(p_hwfn);
+	meta = &dev_user_data->mcp_trace_meta;
+	*parsed_results_bytes = 0;
 
-	if (!s_mcp_trace_meta_valid)
+	if (!meta->is_allocated)
 		return DBG_STATUS_MCP_TRACE_BAD_DATA;
 
 	status = DBG_STATUS_OK;
@@ -6723,7 +6694,7 @@ static enum dbg_status qed_parse_mcp_trace_buf(u8 *trace_buf,
 		format_idx = header & MFW_TRACE_EVENTID_MASK;
 
 		/* Skip message if its index doesn't exist in the meta data */
-		if (format_idx >= s_mcp_trace_meta.formats_num) {
+		if (format_idx >= meta->formats_num) {
 			u8 format_size =
 				(u8)((header & MFW_TRACE_PRM_SIZE_MASK) >>
 				     MFW_TRACE_PRM_SIZE_SHIFT);
@@ -6738,7 +6709,7 @@ static enum dbg_status qed_parse_mcp_trace_buf(u8 *trace_buf,
 			continue;
 		}
 
-		format_ptr = &s_mcp_trace_meta.formats[format_idx];
+		format_ptr = &meta->formats[format_idx];
 
 		for (i = 0,
 		     param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK,
@@ -6783,19 +6754,20 @@ static enum dbg_status qed_parse_mcp_trace_buf(u8 *trace_buf,
 			return DBG_STATUS_MCP_TRACE_BAD_DATA;
 
 		/* Print current message to results buffer */
-		*parsed_bytes +=
-			sprintf(qed_get_buf_ptr(parsed_buf, *parsed_bytes),
+		*parsed_results_bytes +=
+			sprintf(qed_get_buf_ptr(parsed_buf,
+						*parsed_results_bytes),
 				"%s %-8s: ",
 				s_mcp_trace_level_str[format_level],
-				s_mcp_trace_meta.modules[format_module]);
-		*parsed_bytes +=
-		    sprintf(qed_get_buf_ptr(parsed_buf, *parsed_bytes),
+				meta->modules[format_module]);
+		*parsed_results_bytes +=
+		    sprintf(qed_get_buf_ptr(parsed_buf, *parsed_results_bytes),
 			    format_ptr->format_str,
 			    params[0], params[1], params[2]);
 	}
 
 	/* Add string NULL terminator */
-	(*parsed_bytes)++;
+	(*parsed_results_bytes)++;
 
 	return status;
 }
@@ -6803,24 +6775,25 @@ static enum dbg_status qed_parse_mcp_trace_buf(u8 *trace_buf,
 /* Parses an MCP Trace dump buffer.
  * If result_buf is not NULL, the MCP Trace results are printed to it.
  * In any case, the required results buffer size is assigned to
- * parsed_bytes.
+ * parsed_results_bytes.
  * The parsing status is returned.
  */
 static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 						u32 *dump_buf,
-						char *parsed_buf,
-						u32 *parsed_bytes)
+						char *results_buf,
+						u32 *parsed_results_bytes,
+						bool free_meta_data)
 {
 	const char *section_name, *param_name, *param_str_val;
 	u32 data_size, trace_data_dwords, trace_meta_dwords;
-	u32 offset, results_offset, parsed_buf_bytes;
+	u32 offset, results_offset, results_buf_bytes;
 	u32 param_num_val, num_section_params;
 	struct mcp_trace *trace;
 	enum dbg_status status;
 	const u32 *meta_buf;
 	u8 *trace_buf;
 
-	*parsed_bytes = 0;
+	*parsed_results_bytes = 0;
 
 	/* Read global_params section */
 	dump_buf += qed_read_section_hdr(dump_buf,
@@ -6831,7 +6804,7 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	/* Print global params */
 	dump_buf += qed_print_section_params(dump_buf,
 					     num_section_params,
-					     parsed_buf, &results_offset);
+					     results_buf, &results_offset);
 
 	/* Read trace_data section */
 	dump_buf += qed_read_section_hdr(dump_buf,
@@ -6846,6 +6819,9 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 
 	/* Prepare trace info */
 	trace = (struct mcp_trace *)dump_buf;
+	if (trace->signature != MFW_TRACE_SIGNATURE || !trace->size)
+		return DBG_STATUS_MCP_TRACE_BAD_DATA;
+
 	trace_buf = (u8 *)dump_buf + sizeof(*trace);
 	offset = trace->trace_oldest;
 	data_size = qed_cyclic_sub(trace->trace_prod, offset, trace->size);
@@ -6865,31 +6841,39 @@ static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn,
 	/* Choose meta data buffer */
 	if (!trace_meta_dwords) {
 		/* Dump doesn't include meta data */
-		if (!s_mcp_trace_meta_arr.ptr)
+		struct dbg_tools_user_data *dev_user_data =
+			qed_dbg_get_user_data(p_hwfn);
+
+		if (!dev_user_data->mcp_trace_user_meta_buf)
 			return DBG_STATUS_MCP_TRACE_NO_META;
-		meta_buf = s_mcp_trace_meta_arr.ptr;
+
+		meta_buf = dev_user_data->mcp_trace_user_meta_buf;
 	} else {
 		/* Dump includes meta data */
 		meta_buf = dump_buf;
 	}
 
 	/* Allocate meta data memory */
-	status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &s_mcp_trace_meta);
+	status = qed_mcp_trace_alloc_meta_data(p_hwfn, meta_buf);
 	if (status != DBG_STATUS_OK)
 		return status;
 
-	status = qed_parse_mcp_trace_buf(trace_buf,
+	status = qed_parse_mcp_trace_buf(p_hwfn,
+					 trace_buf,
 					 trace->size,
 					 offset,
 					 data_size,
-					 parsed_buf ?
-					 parsed_buf + results_offset :
+					 results_buf ?
+					 results_buf + results_offset :
 					 NULL,
-					 &parsed_buf_bytes);
+					 &results_buf_bytes);
 	if (status != DBG_STATUS_OK)
 		return status;
 
-	*parsed_bytes = results_offset + parsed_buf_bytes;
+	if (free_meta_data)
+		qed_mcp_trace_free_meta_data(p_hwfn);
+
+	*parsed_results_bytes = results_offset + results_buf_bytes;
 
 	return DBG_STATUS_OK;
 }
@@ -7361,6 +7345,16 @@ enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr)
 	return DBG_STATUS_OK;
 }
 
+enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn)
+{
+	p_hwfn->dbg_user_info = kzalloc(sizeof(struct dbg_tools_user_data),
+					GFP_KERNEL);
+	if (!p_hwfn->dbg_user_info)
+		return DBG_STATUS_VIRT_MEM_ALLOC_FAILED;
+
+	return DBG_STATUS_OK;
+}
+
 const char *qed_dbg_get_status_str(enum dbg_status status)
 {
 	return (status <
@@ -7397,10 +7391,13 @@ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
 				       num_errors, num_warnings);
 }
 
-void qed_dbg_mcp_trace_set_meta_data(u32 *data, u32 size)
+void qed_dbg_mcp_trace_set_meta_data(struct qed_hwfn *p_hwfn,
+				     const u32 *meta_buf)
 {
-	s_mcp_trace_meta_arr.ptr = data;
-	s_mcp_trace_meta_arr.size_in_dwords = size;
+	struct dbg_tools_user_data *dev_user_data =
+		qed_dbg_get_user_data(p_hwfn);
+
+	dev_user_data->mcp_trace_user_meta_buf = meta_buf;
 }
 
 enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
@@ -7409,7 +7406,7 @@ enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn,
 						   u32 *results_buf_size)
 {
 	return qed_parse_mcp_trace_dump(p_hwfn,
-					dump_buf, NULL, results_buf_size);
+					dump_buf, NULL, results_buf_size, true);
 }
 
 enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
@@ -7421,20 +7418,61 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
 
 	return qed_parse_mcp_trace_dump(p_hwfn,
 					dump_buf,
-					results_buf, &parsed_buf_size);
+					results_buf, &parsed_buf_size, true);
+}
+
+enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn,
+						 u32 *dump_buf,
+						 char *results_buf)
+{
+	u32 parsed_buf_size;
+
+	return qed_parse_mcp_trace_dump(p_hwfn, dump_buf, results_buf,
+					&parsed_buf_size, false);
 }
 
-enum dbg_status qed_print_mcp_trace_line(u8 *dump_buf,
+enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
+					 u8 *dump_buf,
 					 u32 num_dumped_bytes,
 					 char *results_buf)
 {
-	u32 parsed_bytes;
+	u32 parsed_results_bytes;
 
-	return qed_parse_mcp_trace_buf(dump_buf,
+	return qed_parse_mcp_trace_buf(p_hwfn,
+				       dump_buf,
 				       num_dumped_bytes,
 				       0,
 				       num_dumped_bytes,
-				       results_buf, &parsed_bytes);
+				       results_buf, &parsed_results_bytes);
+}
+
+/* Frees the specified MCP Trace meta data */
+void qed_mcp_trace_free_meta_data(struct qed_hwfn *p_hwfn)
+{
+	struct dbg_tools_user_data *dev_user_data;
+	struct mcp_trace_meta *meta;
+	u32 i;
+
+	dev_user_data = qed_dbg_get_user_data(p_hwfn);
+	meta = &dev_user_data->mcp_trace_meta;
+	if (!meta->is_allocated)
+		return;
+
+	/* Release modules */
+	if (meta->modules) {
+		for (i = 0; i < meta->modules_num; i++)
+			kfree(meta->modules[i]);
+		kfree(meta->modules);
+	}
+
+	/* Release formats */
+	if (meta->formats) {
+		for (i = 0; i < meta->formats_num; i++)
+			kfree(meta->formats[i].format_str);
+		kfree(meta->formats);
+	}
+
+	meta->is_allocated = false;
 }
 
 enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 016ca8a7ec8a..128eb63ca54a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -144,6 +144,12 @@ static void qed_qm_info_free(struct qed_hwfn *p_hwfn)
 	qm_info->wfq_data = NULL;
 }
 
+static void qed_dbg_user_data_free(struct qed_hwfn *p_hwfn)
+{
+	kfree(p_hwfn->dbg_user_info);
+	p_hwfn->dbg_user_info = NULL;
+}
+
 void qed_resc_free(struct qed_dev *cdev)
 {
 	int i;
@@ -183,6 +189,7 @@ void qed_resc_free(struct qed_dev *cdev)
 		qed_l2_free(p_hwfn);
 		qed_dmae_info_free(p_hwfn);
 		qed_dcbx_info_free(p_hwfn);
+		qed_dbg_user_data_free(p_hwfn);
 	}
 }
 
@@ -1083,6 +1090,10 @@ int qed_resc_alloc(struct qed_dev *cdev)
 		rc = qed_dcbx_info_alloc(p_hwfn);
 		if (rc)
 			goto alloc_err;
+
+		rc = qed_dbg_alloc_user_data(p_hwfn);
+		if (rc)
+			goto alloc_err;
 	}
 
 	cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 8faceb691657..21ec8091a24a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -274,7 +274,8 @@ struct core_rx_start_ramrod_data {
 	u8 mf_si_mcast_accept_all;
 	struct core_rx_action_on_error action_on_error;
 	u8 gsi_offload_flag;
-	u8 reserved[6];
+	u8 wipe_inner_vlan_pri_en;
+	u8 reserved[5];
 };
 
 /* Ramrod data for rx queue stop ramrod */
@@ -351,7 +352,8 @@ struct core_tx_start_ramrod_data {
 	__le16 pbl_size;
 	__le16 qm_pq_id;
 	u8 gsi_offload_flag;
-	u8 resrved[3];
+	u8 vport_id;
+	u8 resrved[2];
 };
 
 /* Ramrod data for tx queue stop ramrod */
@@ -914,6 +916,16 @@ struct eth_rx_rate_limit {
 	__le16 reserved1;
 };
 
+/* Update RSS indirection table entry command */
+struct eth_tstorm_rss_update_data {
+	u8 valid;
+	u8 vport_id;
+	u8 ind_table_index;
+	u8 reserved;
+	__le16 ind_table_value;
+	__le16 reserved1;
+};
+
 struct eth_ustorm_per_pf_stat {
 	struct regpair rcv_lb_ucast_bytes;
 	struct regpair rcv_lb_mcast_bytes;
@@ -1241,6 +1253,10 @@ struct rl_update_ramrod_data {
 	u8 rl_id_first;
 	u8 rl_id_last;
 	u8 rl_dc_qcn_flg;
+	u8 dcqcn_reset_alpha_on_idle;
+	u8 rl_bc_stage_th;
+	u8 rl_timer_stage_th;
+	u8 reserved1;
 	__le32 rl_bc_rate;
 	__le16 rl_max_rate;
 	__le16 rl_r_ai;
@@ -1249,7 +1265,7 @@ struct rl_update_ramrod_data {
 	__le32 dcqcn_k_us;
 	__le32 dcqcn_timeuot_us;
 	__le32 qcn_timeuot_us;
-	__le32 reserved[2];
+	__le32 reserved2;
 };
 
 /* Slowpath Element (SPQE) */
@@ -3322,6 +3338,25 @@ enum dbg_status qed_dbg_read_attn(struct qed_hwfn *p_hwfn,
 enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
 				   struct dbg_attn_block_result *results);
 
+/******************************* Data Types **********************************/
+
+struct mcp_trace_format {
+	u32 data;
+#define MCP_TRACE_FORMAT_MODULE_MASK	0x0000ffff
+#define MCP_TRACE_FORMAT_MODULE_SHIFT	0
+#define MCP_TRACE_FORMAT_LEVEL_MASK	0x00030000
+#define MCP_TRACE_FORMAT_LEVEL_SHIFT	16
+#define MCP_TRACE_FORMAT_P1_SIZE_MASK	0x000c0000
+#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT	18
+#define MCP_TRACE_FORMAT_P2_SIZE_MASK	0x00300000
+#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT	20
+#define MCP_TRACE_FORMAT_P3_SIZE_MASK	0x00c00000
+#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT	22
+#define MCP_TRACE_FORMAT_LEN_MASK	0xff000000
+#define MCP_TRACE_FORMAT_LEN_SHIFT	24
+	char *format_str;
+};
+
 /******************************** Constants **********************************/
 
 #define MAX_NAME_LEN	16
@@ -3336,6 +3371,13 @@ enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn,
  */
 enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr);
 
+/**
+ * @brief qed_dbg_alloc_user_data - Allocates user debug data.
+ *
+ * @param p_hwfn -		 HW device data
+ */
+enum dbg_status qed_dbg_alloc_user_data(struct qed_hwfn *p_hwfn);
+
 /**
  * @brief qed_dbg_get_status_str - Returns a string for the specified status.
  *
@@ -3381,8 +3423,7 @@ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
 					   u32 *num_warnings);
 
 /**
- * @brief qed_dbg_mcp_trace_set_meta_data - Sets a pointer to the MCP Trace
- *	meta data.
+ * @brief qed_dbg_mcp_trace_set_meta_data - Sets the MCP Trace meta data.
  *
  * Needed in case the MCP Trace dump doesn't contain the meta data (e.g. due to
  * no NVRAM access).
@@ -3390,7 +3431,8 @@ enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn,
  * @param data - pointer to MCP Trace meta data
  * @param size - size of MCP Trace meta data in dwords
  */
-void qed_dbg_mcp_trace_set_meta_data(u32 *data, u32 size);
+void qed_dbg_mcp_trace_set_meta_data(struct qed_hwfn *p_hwfn,
+				     const u32 *meta_buf);
 
 /**
  * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size
@@ -3424,19 +3466,45 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
 					    u32 num_dumped_dwords,
 					    char *results_buf);
 
+/**
+ * @brief qed_print_mcp_trace_results_cont - Prints MCP Trace results, and
+ * keeps the MCP trace meta data allocated, to support continuous MCP Trace
+ * parsing. After the continuous parsing ends, mcp_trace_free_meta_data should
+ * be called to free the meta data.
+ *
+ * @param p_hwfn -	      HW device data
+ * @param dump_buf -	      mcp trace dump buffer, starting from the header.
+ * @param results_buf -	      buffer for printing the mcp trace results.
+ *
+ * @return error if the parsing fails, ok otherwise.
+ */
+enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn,
+						 u32 *dump_buf,
+						 char *results_buf);
+
 /**
  * @brief print_mcp_trace_line - Prints MCP Trace results for a single line
  *
+ * @param p_hwfn -	      HW device data
  * @param dump_buf -	      mcp trace dump buffer, starting from the header.
  * @param num_dumped_bytes -  number of bytes that were dumped.
  * @param results_buf -	      buffer for printing the mcp trace results.
  *
  * @return error if the parsing fails, ok otherwise.
  */
-enum dbg_status qed_print_mcp_trace_line(u8 *dump_buf,
+enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
+					 u8 *dump_buf,
 					 u32 num_dumped_bytes,
 					 char *results_buf);
 
+/**
+ * @brief mcp_trace_free_meta_data - Frees the MCP Trace meta data.
+ * Should be called after continuous MCP Trace parsing.
+ *
+ * @param p_hwfn - HW device data
+ */
+void qed_mcp_trace_free_meta_data(struct qed_hwfn *p_hwfn);
+
 /**
  * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size
  *	for reg_fifo results (in bytes).
@@ -4303,154 +4371,161 @@ void qed_set_rdma_error_level(struct qed_hwfn *p_hwfn,
 	(IRO[29].base + ((pf_id) * IRO[29].m1))
 #define ETH_RX_RATE_LIMIT_SIZE				(IRO[29].size)
 
+/* RSS indirection table entry update command per PF offset in TSTORM PF BAR0.
+ * Use eth_tstorm_rss_update_data for update.
+ */
+#define TSTORM_ETH_RSS_UPDATE_OFFSET(pf_id) \
+	(IRO[30].base + ((pf_id) * IRO[30].m1))
+#define TSTORM_ETH_RSS_UPDATE_SIZE			(IRO[30].size)
+
 /* Xstorm queue zone */
 #define XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \
-	(IRO[30].base + ((queue_id) * IRO[30].m1))
-#define XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[30].size)
+	(IRO[31].base + ((queue_id) * IRO[31].m1))
+#define XSTORM_ETH_QUEUE_ZONE_SIZE			(IRO[31].size)
 
 /* Ystorm cqe producer */
 #define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) \
-	(IRO[31].base + ((rss_id) * IRO[31].m1))
-#define YSTORM_TOE_CQ_PROD_SIZE				(IRO[31].size)
+	(IRO[32].base + ((rss_id) * IRO[32].m1))
+#define YSTORM_TOE_CQ_PROD_SIZE				(IRO[32].size)
 
 /* Ustorm cqe producer */
 #define USTORM_TOE_CQ_PROD_OFFSET(rss_id) \
-	(IRO[32].base + ((rss_id) * IRO[32].m1))
-#define USTORM_TOE_CQ_PROD_SIZE				(IRO[32].size)
+	(IRO[33].base + ((rss_id) * IRO[33].m1))
+#define USTORM_TOE_CQ_PROD_SIZE				(IRO[33].size)
 
 /* Ustorm grq producer */
 #define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) \
-	(IRO[33].base + ((pf_id) * IRO[33].m1))
-#define USTORM_TOE_GRQ_PROD_SIZE			(IRO[33].size)
+	(IRO[34].base + ((pf_id) * IRO[34].m1))
+#define USTORM_TOE_GRQ_PROD_SIZE			(IRO[34].size)
 
 /* Tstorm cmdq-cons of given command queue-id */
 #define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \
-	(IRO[34].base + ((cmdq_queue_id) * IRO[34].m1))
-#define TSTORM_SCSI_CMDQ_CONS_SIZE			(IRO[34].size)
+	(IRO[35].base + ((cmdq_queue_id) * IRO[35].m1))
+#define TSTORM_SCSI_CMDQ_CONS_SIZE			(IRO[35].size)
 
 /* Tstorm (reflects M-Storm) bdq-external-producer of given function ID,
  * BDqueue-id.
  */
 #define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
-	(IRO[35].base + ((func_id) * IRO[35].m1) + ((bdq_id) * IRO[35].m2))
-#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[35].size)
+	(IRO[36].base + ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2))
+#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[36].size)
 
 /* Mstorm bdq-external-producer of given BDQ resource ID, BDqueue-id */
 #define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \
-	(IRO[36].base + ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2))
-#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[36].size)
+	(IRO[37].base + ((func_id) * IRO[37].m1) + ((bdq_id) * IRO[37].m2))
+#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE			(IRO[37].size)
 
 /* Tstorm iSCSI RX stats */
 #define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[37].base + ((pf_id) * IRO[37].m1))
-#define TSTORM_ISCSI_RX_STATS_SIZE			(IRO[37].size)
+	(IRO[38].base + ((pf_id) * IRO[38].m1))
+#define TSTORM_ISCSI_RX_STATS_SIZE			(IRO[38].size)
 
 /* Mstorm iSCSI RX stats */
 #define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[38].base + ((pf_id) * IRO[38].m1))
-#define MSTORM_ISCSI_RX_STATS_SIZE			(IRO[38].size)
+	(IRO[39].base + ((pf_id) * IRO[39].m1))
+#define MSTORM_ISCSI_RX_STATS_SIZE			(IRO[39].size)
 
 /* Ustorm iSCSI RX stats */
 #define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \
-	(IRO[39].base + ((pf_id) * IRO[39].m1))
-#define USTORM_ISCSI_RX_STATS_SIZE			(IRO[39].size)
+	(IRO[40].base + ((pf_id) * IRO[40].m1))
+#define USTORM_ISCSI_RX_STATS_SIZE			(IRO[40].size)
 
 /* Xstorm iSCSI TX stats */
 #define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[40].base + ((pf_id) * IRO[40].m1))
-#define XSTORM_ISCSI_TX_STATS_SIZE			(IRO[40].size)
+	(IRO[41].base + ((pf_id) * IRO[41].m1))
+#define XSTORM_ISCSI_TX_STATS_SIZE			(IRO[41].size)
 
 /* Ystorm iSCSI TX stats */
 #define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[41].base + ((pf_id) * IRO[41].m1))
-#define YSTORM_ISCSI_TX_STATS_SIZE			(IRO[41].size)
+	(IRO[42].base + ((pf_id) * IRO[42].m1))
+#define YSTORM_ISCSI_TX_STATS_SIZE			(IRO[42].size)
 
 /* Pstorm iSCSI TX stats */
 #define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \
-	(IRO[42].base + ((pf_id) * IRO[42].m1))
-#define PSTORM_ISCSI_TX_STATS_SIZE			(IRO[42].size)
+	(IRO[43].base + ((pf_id) * IRO[43].m1))
+#define PSTORM_ISCSI_TX_STATS_SIZE			(IRO[43].size)
 
 /* Tstorm FCoE RX stats */
 #define TSTORM_FCOE_RX_STATS_OFFSET(pf_id) \
-	(IRO[43].base + ((pf_id) * IRO[43].m1))
-#define TSTORM_FCOE_RX_STATS_SIZE			(IRO[43].size)
+	(IRO[44].base + ((pf_id) * IRO[44].m1))
+#define TSTORM_FCOE_RX_STATS_SIZE			(IRO[44].size)
 
 /* Pstorm FCoE TX stats */
 #define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) \
-	(IRO[44].base + ((pf_id) * IRO[44].m1))
-#define PSTORM_FCOE_TX_STATS_SIZE			(IRO[44].size)
+	(IRO[45].base + ((pf_id) * IRO[45].m1))
+#define PSTORM_FCOE_TX_STATS_SIZE			(IRO[45].size)
 
 /* Pstorm RDMA queue statistics */
 #define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
-	(IRO[45].base + ((rdma_stat_counter_id) * IRO[45].m1))
-#define PSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[45].size)
+	(IRO[46].base + ((rdma_stat_counter_id) * IRO[46].m1))
+#define PSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[46].size)
 
 /* Tstorm RDMA queue statistics */
 #define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
-	(IRO[46].base + ((rdma_stat_counter_id) * IRO[46].m1))
-#define TSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[46].size)
+	(IRO[47].base + ((rdma_stat_counter_id) * IRO[47].m1))
+#define TSTORM_RDMA_QUEUE_STAT_SIZE			(IRO[47].size)
 
 /* Xstorm error level for assert */
 #define XSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[47].base +	((pf_id) * IRO[47].m1))
-#define XSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[47].size)
+	(IRO[48].base +	((pf_id) * IRO[48].m1))
+#define XSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[48].size)
 
 /* Ystorm error level for assert */
 #define YSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[48].base + ((pf_id) * IRO[48].m1))
-#define YSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[48].size)
+	(IRO[49].base + ((pf_id) * IRO[49].m1))
+#define YSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[49].size)
 
 /* Pstorm error level for assert */
 #define PSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[49].base +	((pf_id) * IRO[49].m1))
-#define PSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[49].size)
+	(IRO[50].base +	((pf_id) * IRO[50].m1))
+#define PSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[50].size)
 
 /* Tstorm error level for assert */
 #define TSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[50].base +	((pf_id) * IRO[50].m1))
-#define TSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[50].size)
+	(IRO[51].base +	((pf_id) * IRO[51].m1))
+#define TSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[51].size)
 
 /* Mstorm error level for assert */
 #define MSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[51].base + ((pf_id) * IRO[51].m1))
-#define MSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[51].size)
+	(IRO[52].base + ((pf_id) * IRO[52].m1))
+#define MSTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[52].size)
 
 /* Ustorm error level for assert */
 #define USTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) \
-	(IRO[52].base + ((pf_id) * IRO[52].m1))
-#define USTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[52].size)
+	(IRO[53].base + ((pf_id) * IRO[53].m1))
+#define USTORM_RDMA_ASSERT_LEVEL_SIZE			(IRO[53].size)
 
 /* Xstorm iWARP rxmit stats */
 #define XSTORM_IWARP_RXMIT_STATS_OFFSET(pf_id) \
-	(IRO[53].base +	((pf_id) * IRO[53].m1))
-#define XSTORM_IWARP_RXMIT_STATS_SIZE			(IRO[53].size)
+	(IRO[54].base +	((pf_id) * IRO[54].m1))
+#define XSTORM_IWARP_RXMIT_STATS_SIZE			(IRO[54].size)
 
 /* Tstorm RoCE Event Statistics */
 #define TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id) \
-	(IRO[54].base + ((roce_pf_id) * IRO[54].m1))
-#define TSTORM_ROCE_EVENTS_STAT_SIZE			(IRO[54].size)
+	(IRO[55].base + ((roce_pf_id) * IRO[55].m1))
+#define TSTORM_ROCE_EVENTS_STAT_SIZE			(IRO[55].size)
 
 /* DCQCN Received Statistics */
 #define YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id) \
-	(IRO[55].base + ((roce_pf_id) * IRO[55].m1))
-#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE		(IRO[55].size)
+	(IRO[56].base + ((roce_pf_id) * IRO[56].m1))
+#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE		(IRO[56].size)
 
 /* RoCE Error Statistics */
 #define YSTORM_ROCE_ERROR_STATS_OFFSET(roce_pf_id) \
-	(IRO[56].base + ((roce_pf_id) * IRO[56].m1))
-#define YSTORM_ROCE_ERROR_STATS_SIZE			(IRO[56].size)
+	(IRO[57].base + ((roce_pf_id) * IRO[57].m1))
+#define YSTORM_ROCE_ERROR_STATS_SIZE			(IRO[57].size)
 
 /* DCQCN Sent Statistics */
 #define PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id) \
-	(IRO[57].base + ((roce_pf_id) * IRO[57].m1))
-#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE		(IRO[57].size)
+	(IRO[58].base + ((roce_pf_id) * IRO[58].m1))
+#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE		(IRO[58].size)
 
 /* RoCE CQEs Statistics */
 #define USTORM_ROCE_CQE_STATS_OFFSET(roce_pf_id) \
-	(IRO[58].base + ((roce_pf_id) * IRO[58].m1))
-#define USTORM_ROCE_CQE_STATS_SIZE			(IRO[58].size)
+	(IRO[59].base + ((roce_pf_id) * IRO[59].m1))
+#define USTORM_ROCE_CQE_STATS_SIZE			(IRO[59].size)
 
-static const struct iro iro_arr[59] = {
+static const struct iro iro_arr[60] = {
 	{0x0, 0x0, 0x0, 0x0, 0x8},
 	{0x4cb8, 0x88, 0x0, 0x0, 0x88},
 	{0x6530, 0x20, 0x0, 0x0, 0x20},
@@ -4461,14 +4536,14 @@ static const struct iro iro_arr[59] = {
 	{0x84, 0x8, 0x0, 0x0, 0x2},
 	{0x4c48, 0x0, 0x0, 0x0, 0x78},
 	{0x3e38, 0x0, 0x0, 0x0, 0x78},
-	{0x2b78, 0x0, 0x0, 0x0, 0x78},
+	{0x3ef8, 0x0, 0x0, 0x0, 0x78},
 	{0x4c40, 0x0, 0x0, 0x0, 0x78},
 	{0x4998, 0x0, 0x0, 0x0, 0x78},
 	{0x7f50, 0x0, 0x0, 0x0, 0x78},
 	{0xa28, 0x8, 0x0, 0x0, 0x8},
 	{0x6210, 0x10, 0x0, 0x0, 0x10},
 	{0xb820, 0x30, 0x0, 0x0, 0x30},
-	{0x96c0, 0x30, 0x0, 0x0, 0x30},
+	{0xa990, 0x30, 0x0, 0x0, 0x30},
 	{0x4b68, 0x80, 0x0, 0x0, 0x40},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
 	{0x53a8, 0x80, 0x4, 0x0, 0x4},
@@ -4476,11 +4551,12 @@ static const struct iro iro_arr[59] = {
 	{0x4ba8, 0x80, 0x0, 0x0, 0x20},
 	{0x8158, 0x40, 0x0, 0x0, 0x30},
 	{0xe770, 0x60, 0x0, 0x0, 0x60},
-	{0x2d10, 0x80, 0x0, 0x0, 0x38},
-	{0xf2b8, 0x78, 0x0, 0x0, 0x78},
+	{0x4090, 0x80, 0x0, 0x0, 0x38},
+	{0xfea8, 0x78, 0x0, 0x0, 0x78},
 	{0x1f8, 0x4, 0x0, 0x0, 0x4},
 	{0xaf20, 0x0, 0x0, 0x0, 0xf0},
 	{0xb010, 0x8, 0x0, 0x0, 0x8},
+	{0xc00, 0x8, 0x0, 0x0, 0x8},
 	{0x1f8, 0x8, 0x0, 0x0, 0x8},
 	{0xac0, 0x8, 0x0, 0x0, 0x8},
 	{0x2578, 0x8, 0x0, 0x0, 0x8},
@@ -4492,23 +4568,23 @@ static const struct iro iro_arr[59] = {
 	{0x12908, 0x18, 0x0, 0x0, 0x10},
 	{0x11aa8, 0x40, 0x0, 0x0, 0x18},
 	{0xa588, 0x50, 0x0, 0x0, 0x20},
-	{0x8700, 0x40, 0x0, 0x0, 0x28},
-	{0x10300, 0x18, 0x0, 0x0, 0x10},
+	{0x8f00, 0x40, 0x0, 0x0, 0x28},
+	{0x10e30, 0x18, 0x0, 0x0, 0x10},
 	{0xde48, 0x48, 0x0, 0x0, 0x38},
-	{0x10768, 0x20, 0x0, 0x0, 0x20},
-	{0x2d48, 0x80, 0x0, 0x0, 0x10},
+	{0x11298, 0x20, 0x0, 0x0, 0x20},
+	{0x40c8, 0x80, 0x0, 0x0, 0x10},
 	{0x5048, 0x10, 0x0, 0x0, 0x10},
 	{0xc748, 0x8, 0x0, 0x0, 0x1},
-	{0xa128, 0x8, 0x0, 0x0, 0x1},
-	{0x10f00, 0x8, 0x0, 0x0, 0x1},
+	{0xa928, 0x8, 0x0, 0x0, 0x1},
+	{0x11a30, 0x8, 0x0, 0x0, 0x1},
 	{0xf030, 0x8, 0x0, 0x0, 0x1},
 	{0x13028, 0x8, 0x0, 0x0, 0x1},
 	{0x12c58, 0x8, 0x0, 0x0, 0x1},
 	{0xc9b8, 0x30, 0x0, 0x0, 0x10},
 	{0xed90, 0x28, 0x0, 0x0, 0x28},
-	{0xa520, 0x18, 0x0, 0x0, 0x18},
-	{0xa6a0, 0x8, 0x0, 0x0, 0x8},
-	{0x13108, 0x8, 0x0, 0x0, 0x8},
+	{0xad20, 0x18, 0x0, 0x0, 0x18},
+	{0xaea0, 0x8, 0x0, 0x0, 0x8},
+	{0x13c38, 0x8, 0x0, 0x0, 0x8},
 	{0x13c50, 0x18, 0x0, 0x0, 0x18},
 };
 
@@ -5661,6 +5737,14 @@ enum eth_filter_type {
 	MAX_ETH_FILTER_TYPE
 };
 
+/* inner to inner vlan priority translation configurations */
+struct eth_in_to_in_pri_map_cfg {
+	u8 inner_vlan_pri_remap_en;
+	u8 reserved[7];
+	u8 non_rdma_in_to_in_pri_map[8];
+	u8 rdma_in_to_in_pri_map[8];
+};
+
 /* Eth IPv4 Fragment Type */
 enum eth_ipv4_frag_type {
 	ETH_IPV4_NOT_FRAG,
@@ -6018,6 +6102,14 @@ struct tx_queue_update_ramrod_data {
 	struct regpair reserved1[5];
 };
 
+/* Inner to Inner VLAN priority map update mode */
+enum update_in_to_in_pri_map_mode_enum {
+	ETH_IN_TO_IN_PRI_MAP_UPDATE_DISABLED,
+	ETH_IN_TO_IN_PRI_MAP_UPDATE_NON_RDMA_TBL,
+	ETH_IN_TO_IN_PRI_MAP_UPDATE_RDMA_TBL,
+	MAX_UPDATE_IN_TO_IN_PRI_MAP_MODE_ENUM
+};
+
 /* Ramrod data for vport update ramrod */
 struct vport_filter_update_ramrod_data {
 	struct eth_filter_cmd_header filter_cmd_hdr;
@@ -6048,7 +6140,8 @@ struct vport_start_ramrod_data {
 	u8 zero_placement_offset;
 	u8 ctl_frame_mac_check_en;
 	u8 ctl_frame_ethtype_check_en;
-	u8 reserved[1];
+	u8 wipe_inner_vlan_pri_en;
+	struct eth_in_to_in_pri_map_cfg in_to_in_vlan_pri_map_cfg;
 };
 
 /* Ramrod data for vport stop ramrod */
@@ -6100,7 +6193,9 @@ struct vport_update_ramrod_data_cmn {
 	u8 update_ctl_frame_checks_en_flg;
 	u8 ctl_frame_mac_check_en;
 	u8 ctl_frame_ethtype_check_en;
-	u8 reserved[15];
+	u8 update_in_to_in_pri_map_mode;
+	u8 in_to_in_pri_map[8];
+	u8 reserved[6];
 };
 
 struct vport_update_ramrod_mcast {
@@ -6929,11 +7024,6 @@ struct mstorm_rdma_task_st_ctx {
 	struct regpair temp[4];
 };
 
-/* The roce task context of Ustorm */
-struct ustorm_rdma_task_st_ctx {
-	struct regpair temp[2];
-};
-
 struct e4_ustorm_rdma_task_ag_ctx {
 	u8 reserved;
 	u8 state;
@@ -7007,8 +7097,6 @@ struct e4_rdma_task_context {
 	struct e4_mstorm_rdma_task_ag_ctx mstorm_ag_context;
 	struct mstorm_rdma_task_st_ctx mstorm_st_context;
 	struct rdif_task_context rdif_context;
-	struct ustorm_rdma_task_st_ctx ustorm_st_context;
-	struct regpair ustorm_st_padding[2];
 	struct e4_ustorm_rdma_task_ag_ctx ustorm_ag_context;
 };
 
@@ -7388,7 +7476,7 @@ struct e4_ustorm_rdma_conn_ag_ctx {
 #define E4_USTORM_RDMA_CONN_AG_CTX_RULE8EN_MASK		0x1
 #define E4_USTORM_RDMA_CONN_AG_CTX_RULE8EN_SHIFT	7
 	u8 byte2;
-	u8 byte3;
+	u8 nvmf_only;
 	__le16 conn_dpi;
 	__le16 word1;
 	__le32 cq_cons;
@@ -7831,7 +7919,12 @@ struct roce_create_qp_req_ramrod_data {
 	struct regpair qp_handle_for_cqe;
 	struct regpair qp_handle_for_async;
 	u8 stats_counter_id;
-	u8 reserved3[7];
+	u8 reserved3[6];
+	u8 flags2;
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_EDPM_MODE_MASK			0x1
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_EDPM_MODE_SHIFT			0
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_MASK			0x7F
+#define ROCE_CREATE_QP_REQ_RAMROD_DATA_RESERVED_SHIFT			1
 	__le16 regular_latency_phy_queue;
 	__le16 dpi;
 };
@@ -7954,6 +8047,7 @@ enum roce_event_opcode {
 	ROCE_EVENT_DESTROY_QP,
 	ROCE_EVENT_CREATE_UD_QP,
 	ROCE_EVENT_DESTROY_UD_QP,
+	ROCE_EVENT_FUNC_UPDATE,
 	MAX_ROCE_EVENT_OPCODE
 };
 
@@ -7962,7 +8056,13 @@ struct roce_init_func_params {
 	u8 ll2_queue_id;
 	u8 cnp_vlan_priority;
 	u8 cnp_dscp;
-	u8 reserved;
+	u8 flags;
+#define ROCE_INIT_FUNC_PARAMS_DCQCN_NP_EN_MASK		0x1
+#define ROCE_INIT_FUNC_PARAMS_DCQCN_NP_EN_SHIFT		0
+#define ROCE_INIT_FUNC_PARAMS_DCQCN_RP_EN_MASK		0x1
+#define ROCE_INIT_FUNC_PARAMS_DCQCN_RP_EN_SHIFT		1
+#define ROCE_INIT_FUNC_PARAMS_RESERVED0_MASK		0x3F
+#define ROCE_INIT_FUNC_PARAMS_RESERVED0_SHIFT		2
 	__le32 cnp_send_timeout;
 	__le16 rl_offset;
 	u8 rl_count_log;
@@ -8109,9 +8209,24 @@ enum roce_ramrod_cmd_id {
 	ROCE_RAMROD_DESTROY_QP,
 	ROCE_RAMROD_CREATE_UD_QP,
 	ROCE_RAMROD_DESTROY_UD_QP,
+	ROCE_RAMROD_FUNC_UPDATE,
 	MAX_ROCE_RAMROD_CMD_ID
 };
 
+/* RoCE func init ramrod data */
+struct roce_update_func_params {
+	u8 cnp_vlan_priority;
+	u8 cnp_dscp;
+	__le16 flags;
+#define ROCE_UPDATE_FUNC_PARAMS_DCQCN_NP_EN_MASK	0x1
+#define ROCE_UPDATE_FUNC_PARAMS_DCQCN_NP_EN_SHIFT	0
+#define ROCE_UPDATE_FUNC_PARAMS_DCQCN_RP_EN_MASK	0x1
+#define ROCE_UPDATE_FUNC_PARAMS_DCQCN_RP_EN_SHIFT	1
+#define ROCE_UPDATE_FUNC_PARAMS_RESERVED0_MASK		0x3FFF
+#define ROCE_UPDATE_FUNC_PARAMS_RESERVED0_SHIFT		2
+	__le32 cnp_send_timeout;
+};
+
 struct e4_xstorm_roce_conn_ag_ctx_dq_ext_ld_part {
 	u8 reserved0;
 	u8 state;
diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h
index 0081fa6d1268..03f59a28fefd 100644
--- a/include/linux/qed/common_hsi.h
+++ b/include/linux/qed/common_hsi.h
@@ -110,7 +110,7 @@
 
 #define FW_MAJOR_VERSION	8
 #define FW_MINOR_VERSION        37
-#define FW_REVISION_VERSION     2
+#define FW_REVISION_VERSION     7
 #define FW_ENGINEERING_VERSION	0
 
 /***********************/
@@ -931,12 +931,12 @@ struct db_rdma_dpm_params {
 #define DB_RDMA_DPM_PARAMS_WQE_SIZE_SHIFT		16
 #define DB_RDMA_DPM_PARAMS_RESERVED0_MASK		0x1
 #define DB_RDMA_DPM_PARAMS_RESERVED0_SHIFT		27
-#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK		0x1
-#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT		28
+#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_MASK		0x1
+#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_SHIFT		28
 #define DB_RDMA_DPM_PARAMS_S_FLG_MASK			0x1
 #define DB_RDMA_DPM_PARAMS_S_FLG_SHIFT			29
-#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK		0x1
-#define DB_RDMA_DPM_PARAMS_RESERVED1_SHIFT		30
+#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK		0x1
+#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT		30
 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK	0x1
 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT	31
 };
diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h
index b34c573f2b30..66aba505ec56 100644
--- a/include/linux/qed/iscsi_common.h
+++ b/include/linux/qed/iscsi_common.h
@@ -896,7 +896,7 @@ struct e4_ustorm_iscsi_task_ag_ctx {
 	__le32 exp_cont_len;
 	__le32 total_data_acked;
 	__le32 exp_data_acked;
-	u8 next_tid_valid;
+	u8 byte2;
 	u8 byte3;
 	__le16 word1;
 	__le16 next_tid;
-- 
cgit v1.2.3


From 7759eb23fd9808a2e4498cf36a798ed65cde78ae Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Wed, 5 Sep 2018 15:45:54 -0600
Subject: block: remove bio_rewind_iter()

It is pointed that bio_rewind_iter() is one very bad API[1]:

1) bio size may not be restored after rewinding

2) it causes some bogus change, such as 5151842b9d8732 (block: reset
bi_iter.bi_done after splitting bio)

3) rewinding really makes things complicated wrt. bio splitting

4) unnecessary updating of .bi_done in fast path

[1] https://marc.info/?t=153549924200005&r=1&w=2

So this patch takes Kent's suggestion to restore one bio into its original
state via saving bio iterator(struct bvec_iter) in bio_integrity_prep(),
given now bio_rewind_iter() is only used by bio integrity code.

Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Hannes Reinecke <hare@suse.com>
Suggested-by: Kent Overstreet <kent.overstreet@gmail.com>
Acked-by: Kent Overstreet <kent.overstreet@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio-integrity.c | 12 ++++--------
 block/bio.c           |  1 -
 include/linux/bio.h   | 22 ++++------------------
 include/linux/bvec.h  |  3 ---
 4 files changed, 8 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 67b5fb861a51..290af497997b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -306,6 +306,8 @@ bool bio_integrity_prep(struct bio *bio)
 	if (bio_data_dir(bio) == WRITE) {
 		bio_integrity_process(bio, &bio->bi_iter,
 				      bi->profile->generate_fn);
+	} else {
+		bip->bio_iter = bio->bi_iter;
 	}
 	return true;
 
@@ -331,20 +333,14 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 		container_of(work, struct bio_integrity_payload, bip_work);
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
-	struct bvec_iter iter = bio->bi_iter;
 
 	/*
 	 * At the moment verify is called bio's iterator was advanced
 	 * during split and completion, we need to rewind iterator to
 	 * it's original position.
 	 */
-	if (bio_rewind_iter(bio, &iter, iter.bi_done)) {
-		bio->bi_status = bio_integrity_process(bio, &iter,
-						       bi->profile->verify_fn);
-	} else {
-		bio->bi_status = BLK_STS_IOERR;
-	}
-
+	bio->bi_status = bio_integrity_process(bio, &bip->bio_iter,
+						bi->profile->verify_fn);
 	bio_integrity_free(bio);
 	bio_endio(bio);
 }
diff --git a/block/bio.c b/block/bio.c
index 8c680a776171..f685e762809d 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1807,7 +1807,6 @@ struct bio *bio_split(struct bio *bio, int sectors,
 		bio_integrity_trim(split);
 
 	bio_advance(bio, split->bi_iter.bi_size);
-	bio->bi_iter.bi_done = 0;
 
 	if (bio_flagged(bio, BIO_TRACE_COMPLETION))
 		bio_set_flag(split, BIO_TRACE_COMPLETION);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 51371740d2a8..14b4fa266357 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -170,27 +170,11 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 {
 	iter->bi_sector += bytes >> 9;
 
-	if (bio_no_advance_iter(bio)) {
+	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
-		iter->bi_done += bytes;
-	} else {
+	else
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
 		/* TODO: It is reasonable to complete bio with error here. */
-	}
-}
-
-static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter,
-		unsigned int bytes)
-{
-	iter->bi_sector -= bytes >> 9;
-
-	if (bio_no_advance_iter(bio)) {
-		iter->bi_size += bytes;
-		iter->bi_done -= bytes;
-		return true;
-	}
-
-	return bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
 }
 
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
@@ -353,6 +337,8 @@ struct bio_integrity_payload {
 	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
 	unsigned short		bip_flags;	/* control flags */
 
+	struct bvec_iter	bio_iter;	/* for rewinding parent bio */
+
 	struct work_struct	bip_work;	/* I/O completion */
 
 	struct bio_vec		*bip_vec;
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index fe7a22dd133b..02c73c6aa805 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -40,8 +40,6 @@ struct bvec_iter {
 
 	unsigned int		bi_idx;		/* current index into bvl_vec */
 
-	unsigned int            bi_done;	/* number of bytes completed */
-
 	unsigned int            bi_bvec_done;	/* number of bytes completed in
 						   current bvec */
 };
@@ -85,7 +83,6 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 		bytes -= len;
 		iter->bi_size -= len;
 		iter->bi_bvec_done += len;
-		iter->bi_done += len;
 
 		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
 			iter->bi_bvec_done = 0;
-- 
cgit v1.2.3


From cc8a4ea182efac95ad4582053f8a51271fab734d Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 18 Jul 2018 18:09:55 +0200
Subject: platform/chrome: Move mfd/cros_ec_lpc* includes to drivers/platform.

The cros-ec-lpc driver lives in drivers/platform because is platform
specific, however there are two includes (cros_ec_lpc_mec.h and
cros_ec_lpc_reg.h) that lives in include/linux/mfd. These two includes
are only used for the platform driver and are not really related to the
MFD subsystem, so move the includes from include/linux/mfd to
drivers/platform/chrome.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/platform/chrome/cros_ec_lpc.c     |  3 +-
 drivers/platform/chrome/cros_ec_lpc_mec.c |  3 +-
 drivers/platform/chrome/cros_ec_lpc_mec.h | 90 +++++++++++++++++++++++++++++++
 drivers/platform/chrome/cros_ec_lpc_reg.c |  3 +-
 drivers/platform/chrome/cros_ec_lpc_reg.h | 61 +++++++++++++++++++++
 include/linux/mfd/cros_ec_lpc_mec.h       | 90 -------------------------------
 include/linux/mfd/cros_ec_lpc_reg.h       | 61 ---------------------
 7 files changed, 157 insertions(+), 154 deletions(-)
 create mode 100644 drivers/platform/chrome/cros_ec_lpc_mec.h
 create mode 100644 drivers/platform/chrome/cros_ec_lpc_reg.h
 delete mode 100644 include/linux/mfd/cros_ec_lpc_mec.h
 delete mode 100644 include/linux/mfd/cros_ec_lpc_reg.h

(limited to 'include/linux')

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index 31c8b8c49e45..7ec8789bf161 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -27,12 +27,13 @@
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
-#include <linux/mfd/cros_ec_lpc_reg.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/suspend.h>
 
+#include "cros_ec_lpc_reg.h"
+
 #define DRV_NAME "cros_ec_lpcs"
 #define ACPI_DRV_NAME "GOOG0004"
 
diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.c b/drivers/platform/chrome/cros_ec_lpc_mec.c
index 2eda2c2fc210..c4edfa83e493 100644
--- a/drivers/platform/chrome/cros_ec_lpc_mec.c
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.c
@@ -24,10 +24,11 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/mfd/cros_ec_commands.h>
-#include <linux/mfd/cros_ec_lpc_mec.h>
 #include <linux/mutex.h>
 #include <linux/types.h>
 
+#include "cros_ec_lpc_mec.h"
+
 /*
  * This mutex must be held while accessing the EMI unit. We can't rely on the
  * EC mutex because memmap data may be accessed without it being held.
diff --git a/drivers/platform/chrome/cros_ec_lpc_mec.h b/drivers/platform/chrome/cros_ec_lpc_mec.h
new file mode 100644
index 000000000000..105068c0e919
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_mec.h
@@ -0,0 +1,90 @@
+/*
+ * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __CROS_EC_LPC_MEC_H
+#define __CROS_EC_LPC_MEC_H
+
+#include <linux/mfd/cros_ec_commands.h>
+
+enum cros_ec_lpc_mec_emi_access_mode {
+	/* 8-bit access */
+	ACCESS_TYPE_BYTE = 0x0,
+	/* 16-bit access */
+	ACCESS_TYPE_WORD = 0x1,
+	/* 32-bit access */
+	ACCESS_TYPE_LONG = 0x2,
+	/*
+	 * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the
+	 * EC data register to be incremented.
+	 */
+	ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3,
+};
+
+enum cros_ec_lpc_mec_io_type {
+	MEC_IO_READ,
+	MEC_IO_WRITE,
+};
+
+/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
+#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0
+#define MEC_EMI_RANGE_END   (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE)
+
+/* EMI registers are relative to base */
+#define MEC_EMI_BASE 0x800
+#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0)
+#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1)
+#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2)
+#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3)
+#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4)
+#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5)
+#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6)
+#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7)
+
+/*
+ * cros_ec_lpc_mec_init
+ *
+ * Initialize MEC I/O.
+ */
+void cros_ec_lpc_mec_init(void);
+
+/*
+ * cros_ec_lpc_mec_destroy
+ *
+ * Cleanup MEC I/O.
+ */
+void cros_ec_lpc_mec_destroy(void);
+
+/**
+ * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
+ *
+ * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
+ * @offset:  Base read / write address
+ * @length:  Number of bytes to read / write
+ * @buf:     Destination / source buffer
+ *
+ * @return 8-bit checksum of all bytes read / written
+ */
+u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
+			    unsigned int offset, unsigned int length, u8 *buf);
+
+#endif /* __CROS_EC_LPC_MEC_H */
diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.c b/drivers/platform/chrome/cros_ec_lpc_reg.c
index dcc7a3e30604..fc23d535c404 100644
--- a/drivers/platform/chrome/cros_ec_lpc_reg.c
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.c
@@ -24,7 +24,8 @@
 #include <linux/io.h>
 #include <linux/mfd/cros_ec.h>
 #include <linux/mfd/cros_ec_commands.h>
-#include <linux/mfd/cros_ec_lpc_mec.h>
+
+#include "cros_ec_lpc_mec.h"
 
 static u8 lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest)
 {
diff --git a/drivers/platform/chrome/cros_ec_lpc_reg.h b/drivers/platform/chrome/cros_ec_lpc_reg.h
new file mode 100644
index 000000000000..1c12c38b306a
--- /dev/null
+++ b/drivers/platform/chrome/cros_ec_lpc_reg.h
@@ -0,0 +1,61 @@
+/*
+ * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
+ *
+ * Copyright (C) 2016 Google, Inc
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * This driver uses the Chrome OS EC byte-level message-based protocol for
+ * communicating the keyboard state (which keys are pressed) from a keyboard EC
+ * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
+ * but everything else (including deghosting) is done here.  The main
+ * motivation for this is to keep the EC firmware as simple as possible, since
+ * it cannot be easily upgraded and EC flash/IRAM space is relatively
+ * expensive.
+ */
+
+#ifndef __CROS_EC_LPC_REG_H
+#define __CROS_EC_LPC_REG_H
+
+/**
+ * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes read.
+ *
+ * @offset: Base read address
+ * @length: Number of bytes to read
+ * @dest: Destination buffer
+ */
+u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
+
+/**
+ * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address.
+ * Returns 8-bit checksum of all bytes written.
+ *
+ * @offset: Base write address
+ * @length: Number of bytes to write
+ * @msg: Write data buffer
+ */
+u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
+
+/**
+ * cros_ec_lpc_reg_init
+ *
+ * Initialize register I/O.
+ */
+void cros_ec_lpc_reg_init(void);
+
+/**
+ * cros_ec_lpc_reg_destroy
+ *
+ * Cleanup reg I/O.
+ */
+void cros_ec_lpc_reg_destroy(void);
+
+#endif /* __CROS_EC_LPC_REG_H */
diff --git a/include/linux/mfd/cros_ec_lpc_mec.h b/include/linux/mfd/cros_ec_lpc_mec.h
deleted file mode 100644
index 176496ddc66c..000000000000
--- a/include/linux/mfd/cros_ec_lpc_mec.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * cros_ec_lpc_mec - LPC variant I/O for Microchip EC
- *
- * Copyright (C) 2016 Google, Inc
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver uses the Chrome OS EC byte-level message-based protocol for
- * communicating the keyboard state (which keys are pressed) from a keyboard EC
- * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
- * but everything else (including deghosting) is done here.  The main
- * motivation for this is to keep the EC firmware as simple as possible, since
- * it cannot be easily upgraded and EC flash/IRAM space is relatively
- * expensive.
- */
-
-#ifndef __LINUX_MFD_CROS_EC_MEC_H
-#define __LINUX_MFD_CROS_EC_MEC_H
-
-#include <linux/mfd/cros_ec_commands.h>
-
-enum cros_ec_lpc_mec_emi_access_mode {
-	/* 8-bit access */
-	ACCESS_TYPE_BYTE = 0x0,
-	/* 16-bit access */
-	ACCESS_TYPE_WORD = 0x1,
-	/* 32-bit access */
-	ACCESS_TYPE_LONG = 0x2,
-	/*
-	 * 32-bit access, read or write of MEC_EMI_EC_DATA_B3 causes the
-	 * EC data register to be incremented.
-	 */
-	ACCESS_TYPE_LONG_AUTO_INCREMENT = 0x3,
-};
-
-enum cros_ec_lpc_mec_io_type {
-	MEC_IO_READ,
-	MEC_IO_WRITE,
-};
-
-/* Access IO ranges 0x800 thru 0x9ff using EMI interface instead of LPC */
-#define MEC_EMI_RANGE_START EC_HOST_CMD_REGION0
-#define MEC_EMI_RANGE_END   (EC_LPC_ADDR_MEMMAP + EC_MEMMAP_SIZE)
-
-/* EMI registers are relative to base */
-#define MEC_EMI_BASE 0x800
-#define MEC_EMI_HOST_TO_EC (MEC_EMI_BASE + 0)
-#define MEC_EMI_EC_TO_HOST (MEC_EMI_BASE + 1)
-#define MEC_EMI_EC_ADDRESS_B0 (MEC_EMI_BASE + 2)
-#define MEC_EMI_EC_ADDRESS_B1 (MEC_EMI_BASE + 3)
-#define MEC_EMI_EC_DATA_B0 (MEC_EMI_BASE + 4)
-#define MEC_EMI_EC_DATA_B1 (MEC_EMI_BASE + 5)
-#define MEC_EMI_EC_DATA_B2 (MEC_EMI_BASE + 6)
-#define MEC_EMI_EC_DATA_B3 (MEC_EMI_BASE + 7)
-
-/*
- * cros_ec_lpc_mec_init
- *
- * Initialize MEC I/O.
- */
-void cros_ec_lpc_mec_init(void);
-
-/*
- * cros_ec_lpc_mec_destroy
- *
- * Cleanup MEC I/O.
- */
-void cros_ec_lpc_mec_destroy(void);
-
-/**
- * cros_ec_lpc_io_bytes_mec - Read / write bytes to MEC EMI port
- *
- * @io_type: MEC_IO_READ or MEC_IO_WRITE, depending on request
- * @offset:  Base read / write address
- * @length:  Number of bytes to read / write
- * @buf:     Destination / source buffer
- *
- * @return 8-bit checksum of all bytes read / written
- */
-u8 cros_ec_lpc_io_bytes_mec(enum cros_ec_lpc_mec_io_type io_type,
-			    unsigned int offset, unsigned int length, u8 *buf);
-
-#endif /* __LINUX_MFD_CROS_EC_MEC_H */
diff --git a/include/linux/mfd/cros_ec_lpc_reg.h b/include/linux/mfd/cros_ec_lpc_reg.h
deleted file mode 100644
index 5560bef63c2b..000000000000
--- a/include/linux/mfd/cros_ec_lpc_reg.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * cros_ec_lpc_reg - LPC access to the Chrome OS Embedded Controller
- *
- * Copyright (C) 2016 Google, Inc
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver uses the Chrome OS EC byte-level message-based protocol for
- * communicating the keyboard state (which keys are pressed) from a keyboard EC
- * to the AP over some bus (such as i2c, lpc, spi).  The EC does debouncing,
- * but everything else (including deghosting) is done here.  The main
- * motivation for this is to keep the EC firmware as simple as possible, since
- * it cannot be easily upgraded and EC flash/IRAM space is relatively
- * expensive.
- */
-
-#ifndef __LINUX_MFD_CROS_EC_REG_H
-#define __LINUX_MFD_CROS_EC_REG_H
-
-/**
- * cros_ec_lpc_read_bytes - Read bytes from a given LPC-mapped address.
- * Returns 8-bit checksum of all bytes read.
- *
- * @offset: Base read address
- * @length: Number of bytes to read
- * @dest: Destination buffer
- */
-u8 cros_ec_lpc_read_bytes(unsigned int offset, unsigned int length, u8 *dest);
-
-/**
- * cros_ec_lpc_write_bytes - Write bytes to a given LPC-mapped address.
- * Returns 8-bit checksum of all bytes written.
- *
- * @offset: Base write address
- * @length: Number of bytes to write
- * @msg: Write data buffer
- */
-u8 cros_ec_lpc_write_bytes(unsigned int offset, unsigned int length, u8 *msg);
-
-/**
- * cros_ec_lpc_reg_init
- *
- * Initialize register I/O.
- */
-void cros_ec_lpc_reg_init(void);
-
-/**
- * cros_ec_lpc_reg_destroy
- *
- * Cleanup reg I/O.
- */
-void cros_ec_lpc_reg_destroy(void);
-
-#endif /* __LINUX_MFD_CROS_EC_REG_H */
-- 
cgit v1.2.3


From e2bbf91cad09118d7500f1fdaaa83d7741d30395 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 18 Jul 2018 18:09:56 +0200
Subject: mfd: cros_ec: Fix and improve kerneldoc comments.

cros-ec includes inside the MFD subsystem, specially the file
cros_ec_commands.h, has been modified several times and it has grown a
lot, unfortunately, we didn't have care too much about the documentation.
This patch tries to improve the documentation and also fixes all the
issues reported by kerneldoc script.

Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Benson Leung <bleung@chromium.org>
---
 drivers/mfd/cros_ec_dev.h            |  13 +-
 include/linux/mfd/cros_ec.h          | 214 +++++++++++++------------
 include/linux/mfd/cros_ec_commands.h | 295 ++++++++++++++++++++++-------------
 3 files changed, 310 insertions(+), 212 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/cros_ec_dev.h b/drivers/mfd/cros_ec_dev.h
index 45e9453608c5..978d836a0248 100644
--- a/drivers/mfd/cros_ec_dev.h
+++ b/drivers/mfd/cros_ec_dev.h
@@ -26,12 +26,13 @@
 
 #define CROS_EC_DEV_VERSION "1.0.0"
 
-/*
- * @offset: within EC_LPC_ADDR_MEMMAP region
- * @bytes: number of bytes to read. zero means "read a string" (including '\0')
- *         (at most only EC_MEMMAP_SIZE bytes can be read)
- * @buffer: where to store the result
- * ioctl returns the number of bytes read, negative on error
+/**
+ * struct cros_ec_readmem - Struct used to read mapped memory.
+ * @offset: Within EC_LPC_ADDR_MEMMAP region.
+ * @bytes: Number of bytes to read. Zero means "read a string" (including '\0')
+ *         At most only EC_MEMMAP_SIZE bytes can be read.
+ * @buffer: Where to store the result. The ioctl returns the number of bytes
+ *         read or negative on error.
  */
 struct cros_ec_readmem {
 	uint32_t offset;
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index 20949dde35cd..e44e3ec8a9c7 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -36,7 +36,7 @@
  * I2C requires 1 additional byte for requests.
  * I2C requires 2 additional bytes for responses.
  * SPI requires up to 32 additional bytes for responses.
- * */
+ */
 #define EC_PROTO_VERSION_UNKNOWN	0
 #define EC_MAX_REQUEST_OVERHEAD		1
 #define EC_MAX_RESPONSE_OVERHEAD	32
@@ -58,13 +58,14 @@ enum {
 	EC_MAX_MSG_BYTES		= 64 * 1024,
 };
 
-/*
- * @version: Command version number (often 0)
- * @command: Command to send (EC_CMD_...)
- * @outsize: Outgoing length in bytes
- * @insize: Max number of bytes to accept from EC
- * @result: EC's response to the command (separate from communication failure)
- * @data: Where to put the incoming data from EC and outgoing data to EC
+/**
+ * struct cros_ec_command - Information about a ChromeOS EC command.
+ * @version: Command version number (often 0).
+ * @command: Command to send (EC_CMD_...).
+ * @outsize: Outgoing length in bytes.
+ * @insize: Max number of bytes to accept from the EC.
+ * @result: EC's response to the command (separate from communication failure).
+ * @data: Where to put the incoming data from EC and outgoing data to EC.
  */
 struct cros_ec_command {
 	uint32_t version;
@@ -76,48 +77,55 @@ struct cros_ec_command {
 };
 
 /**
- * struct cros_ec_device - Information about a ChromeOS EC device
- *
- * @phys_name: name of physical comms layer (e.g. 'i2c-4')
+ * struct cros_ec_device - Information about a ChromeOS EC device.
+ * @phys_name: Name of physical comms layer (e.g. 'i2c-4').
  * @dev: Device pointer for physical comms device
- * @was_wake_device: true if this device was set to wake the system from
- * sleep at the last suspend
- * @cmd_readmem: direct read of the EC memory-mapped region, if supported
- *     @offset is within EC_LPC_ADDR_MEMMAP region.
- *     @bytes: number of bytes to read. zero means "read a string" (including
- *     the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be read.
- *     Caller must ensure that the buffer is large enough for the result when
- *     reading a string.
- *
- * @priv: Private data
- * @irq: Interrupt to use
- * @id: Device id
- * @din: input buffer (for data from EC)
- * @dout: output buffer (for data to EC)
- * \note
- * These two buffers will always be dword-aligned and include enough
- * space for up to 7 word-alignment bytes also, so we can ensure that
- * the body of the message is always dword-aligned (64-bit).
- * We use this alignment to keep ARM and x86 happy. Probably word
- * alignment would be OK, there might be a small performance advantage
- * to using dword.
- * @din_size: size of din buffer to allocate (zero to use static din)
- * @dout_size: size of dout buffer to allocate (zero to use static dout)
- * @wake_enabled: true if this device can wake the system from sleep
- * @suspended: true if this device had been suspended
- * @cmd_xfer: send command to EC and get response
- *     Returns the number of bytes received if the communication succeeded, but
- *     that doesn't mean the EC was happy with the command. The caller
- *     should check msg.result for the EC's result code.
- * @pkt_xfer: send packet to EC and get response
- * @lock: one transaction at a time
- * @mkbp_event_supported: true if this EC supports the MKBP event protocol.
- * @event_notifier: interrupt event notifier for transport devices.
- * @event_data: raw payload transferred with the MKBP event.
- * @event_size: size in bytes of the event data.
+ * @was_wake_device: True if this device was set to wake the system from
+ *                   sleep at the last suspend.
+ * @cros_class: The class structure for this device.
+ * @cmd_readmem: Direct read of the EC memory-mapped region, if supported.
+ *     @offset: Is within EC_LPC_ADDR_MEMMAP region.
+ *     @bytes: Number of bytes to read. zero means "read a string" (including
+ *             the trailing '\0'). At most only EC_MEMMAP_SIZE bytes can be
+ *             read. Caller must ensure that the buffer is large enough for the
+ *             result when reading a string.
+ * @max_request: Max size of message requested.
+ * @max_response: Max size of message response.
+ * @max_passthru: Max sice of passthru message.
+ * @proto_version: The protocol version used for this device.
+ * @priv: Private data.
+ * @irq: Interrupt to use.
+ * @id: Device id.
+ * @din: Input buffer (for data from EC). This buffer will always be
+ *       dword-aligned and include enough space for up to 7 word-alignment
+ *       bytes also, so we can ensure that the body of the message is always
+ *       dword-aligned (64-bit). We use this alignment to keep ARM and x86
+ *       happy. Probably word alignment would be OK, there might be a small
+ *       performance advantage to using dword.
+ * @dout: Output buffer (for data to EC). This buffer will always be
+ *        dword-aligned and include enough space for up to 7 word-alignment
+ *        bytes also, so we can ensure that the body of the message is always
+ *        dword-aligned (64-bit). We use this alignment to keep ARM and x86
+ *        happy. Probably word alignment would be OK, there might be a small
+ *        performance advantage to using dword.
+ * @din_size: Size of din buffer to allocate (zero to use static din).
+ * @dout_size: Size of dout buffer to allocate (zero to use static dout).
+ * @wake_enabled: True if this device can wake the system from sleep.
+ * @suspended: True if this device had been suspended.
+ * @cmd_xfer: Send command to EC and get response.
+ *            Returns the number of bytes received if the communication
+ *            succeeded, but that doesn't mean the EC was happy with the
+ *            command. The caller should check msg.result for the EC's result
+ *            code.
+ * @pkt_xfer: Send packet to EC and get response.
+ * @lock: One transaction at a time.
+ * @mkbp_event_supported: True if this EC supports the MKBP event protocol.
+ * @event_notifier: Interrupt event notifier for transport devices.
+ * @event_data: Raw payload transferred with the MKBP event.
+ * @event_size: Size in bytes of the event data.
+ * @host_event_wake_mask: Mask of host events that cause wake from suspend.
  */
 struct cros_ec_device {
-
 	/* These are used by other drivers that want to talk to the EC */
 	const char *phys_name;
 	struct device *dev;
@@ -153,20 +161,19 @@ struct cros_ec_device {
 };
 
 /**
- * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information
- *
+ * struct cros_ec_sensor_platform - ChromeOS EC sensor platform information.
  * @sensor_num: Id of the sensor, as reported by the EC.
  */
 struct cros_ec_sensor_platform {
 	u8 sensor_num;
 };
 
-/* struct cros_ec_platform - ChromeOS EC platform information
- *
- * @ec_name: name of EC device (e.g. 'cros-ec', 'cros-pd', ...)
- * used in /dev/ and sysfs.
- * @cmd_offset: offset to apply for each command. Set when
- * registering a devicde behind another one.
+/**
+ * struct cros_ec_platform - ChromeOS EC platform information.
+ * @ec_name: Name of EC device (e.g. 'cros-ec', 'cros-pd', ...)
+ *           used in /dev/ and sysfs.
+ * @cmd_offset: Offset to apply for each command. Set when
+ *              registering a device behind another one.
  */
 struct cros_ec_platform {
 	const char *ec_name;
@@ -175,16 +182,16 @@ struct cros_ec_platform {
 
 struct cros_ec_debugfs;
 
-/*
- * struct cros_ec_dev - ChromeOS EC device entry point
- *
- * @class_dev: Device structure used in sysfs
- * @cdev: Character device structure in /dev
- * @ec_dev: cros_ec_device structure to talk to the physical device
- * @dev: pointer to the platform device
- * @debug_info: cros_ec_debugfs structure for debugging information
- * @has_kb_wake_angle: true if at least 2 accelerometer are connected to the EC.
- * @cmd_offset: offset to apply for each command.
+/**
+ * struct cros_ec_dev - ChromeOS EC device entry point.
+ * @class_dev: Device structure used in sysfs.
+ * @cdev: Character device structure in /dev.
+ * @ec_dev: cros_ec_device structure to talk to the physical device.
+ * @dev: Pointer to the platform device.
+ * @debug_info: cros_ec_debugfs structure for debugging information.
+ * @has_kb_wake_angle: True if at least 2 accelerometer are connected to the EC.
+ * @cmd_offset: Offset to apply for each command.
+ * @features: Features supported by the EC.
  */
 struct cros_ec_dev {
 	struct device class_dev;
@@ -200,124 +207,129 @@ struct cros_ec_dev {
 #define to_cros_ec_dev(dev)  container_of(dev, struct cros_ec_dev, class_dev)
 
 /**
- * cros_ec_suspend - Handle a suspend operation for the ChromeOS EC device
+ * cros_ec_suspend() - Handle a suspend operation for the ChromeOS EC device.
+ * @ec_dev: Device to suspend.
  *
  * This can be called by drivers to handle a suspend event.
  *
- * ec_dev: Device to suspend
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_suspend(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_resume - Handle a resume operation for the ChromeOS EC device
+ * cros_ec_resume() - Handle a resume operation for the ChromeOS EC device.
+ * @ec_dev: Device to resume.
  *
  * This can be called by drivers to handle a resume event.
  *
- * @ec_dev: Device to resume
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_resume(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_prepare_tx - Prepare an outgoing message in the output buffer
+ * cros_ec_prepare_tx() - Prepare an outgoing message in the output buffer.
+ * @ec_dev: Device to register.
+ * @msg: Message to write.
  *
  * This is intended to be used by all ChromeOS EC drivers, but at present
  * only SPI uses it. Once LPC uses the same protocol it can start using it.
  * I2C could use it now, with a refactor of the existing code.
  *
- * @ec_dev: Device to register
- * @msg: Message to write
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 		       struct cros_ec_command *msg);
 
 /**
- * cros_ec_check_result - Check ec_msg->result
+ * cros_ec_check_result() - Check ec_msg->result.
+ * @ec_dev: EC device.
+ * @msg: Message to check.
  *
  * This is used by ChromeOS EC drivers to check the ec_msg->result for
  * errors and to warn about them.
  *
- * @ec_dev: EC device
- * @msg: Message to check
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_check_result(struct cros_ec_device *ec_dev,
 			 struct cros_ec_command *msg);
 
 /**
- * cros_ec_cmd_xfer - Send a command to the ChromeOS EC
+ * cros_ec_cmd_xfer() - Send a command to the ChromeOS EC.
+ * @ec_dev: EC device.
+ * @msg: Message to write.
  *
  * Call this to send a command to the ChromeOS EC.  This should be used
  * instead of calling the EC's cmd_xfer() callback directly.
  *
- * @ec_dev: EC device
- * @msg: Message to write
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
 		     struct cros_ec_command *msg);
 
 /**
- * cros_ec_cmd_xfer_status - Send a command to the ChromeOS EC
+ * cros_ec_cmd_xfer_status() - Send a command to the ChromeOS EC.
+ * @ec_dev: EC device.
+ * @msg: Message to write.
  *
  * This function is identical to cros_ec_cmd_xfer, except it returns success
  * status only if both the command was transmitted successfully and the EC
  * replied with success status. It's not necessary to check msg->result when
  * using this function.
  *
- * @ec_dev: EC device
- * @msg: Message to write
- * @return: Num. of bytes transferred on success, <0 on failure
+ * Return: The number of bytes transferred on success or negative error code.
  */
 int cros_ec_cmd_xfer_status(struct cros_ec_device *ec_dev,
 			    struct cros_ec_command *msg);
 
 /**
- * cros_ec_remove - Remove a ChromeOS EC
+ * cros_ec_remove() - Remove a ChromeOS EC.
+ * @ec_dev: Device to register.
  *
  * Call this to deregister a ChromeOS EC, then clean up any private data.
  *
- * @ec_dev: Device to register
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_remove(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_register - Register a new ChromeOS EC, using the provided info
+ * cros_ec_register() - Register a new ChromeOS EC, using the provided info.
+ * @ec_dev: Device to register.
  *
  * Before calling this, allocate a pointer to a new device and then fill
  * in all the fields up to the --private-- marker.
  *
- * @ec_dev: Device to register
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_register(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_query_all -  Query the protocol version supported by the ChromeOS EC
+ * cros_ec_query_all() -  Query the protocol version supported by the
+ *         ChromeOS EC.
+ * @ec_dev: Device to register.
  *
- * @ec_dev: Device to register
- * @return 0 if ok, -ve on error
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_query_all(struct cros_ec_device *ec_dev);
 
 /**
- * cros_ec_get_next_event -  Fetch next event from the ChromeOS EC
- *
- * @ec_dev: Device to fetch event from
+ * cros_ec_get_next_event() - Fetch next event from the ChromeOS EC.
+ * @ec_dev: Device to fetch event from.
  * @wake_event: Pointer to a bool set to true upon return if the event might be
  *              treated as a wake event. Ignored if null.
  *
- * Returns: 0 on success, Linux error number on failure
+ * Return: 0 on success or negative error code.
  */
 int cros_ec_get_next_event(struct cros_ec_device *ec_dev, bool *wake_event);
 
 /**
- * cros_ec_get_host_event - Return a mask of event set by the EC.
+ * cros_ec_get_host_event() - Return a mask of event set by the ChromeOS EC.
+ * @ec_dev: Device to fetch event from.
  *
- * When MKBP is supported, when the EC raises an interrupt,
- * We collect the events raised and call the functions in the ec notifier.
+ * When MKBP is supported, when the EC raises an interrupt, we collect the
+ * events raised and call the functions in the ec notifier. This function
+ * is a helper to know which events are raised.
  *
- * This function is a helper to know which events are raised.
+ * Return: 0 on success or negative error code.
  */
 u32 cros_ec_get_host_event(struct cros_ec_device *ec_dev);
 
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 6e1ab9bead28..88100bddf1ab 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -306,15 +306,18 @@ enum host_event_code {
 /* Host event mask */
 #define EC_HOST_EVENT_MASK(event_code) (1UL << ((event_code) - 1))
 
-/* Arguments at EC_LPC_ADDR_HOST_ARGS */
+/**
+ * struct ec_lpc_host_args - Arguments at EC_LPC_ADDR_HOST_ARGS
+ * @flags: The host argument flags.
+ * @command_version: Command version.
+ * @data_size: The length of data.
+ * @checksum: Checksum; sum of command + flags + command_version + data_size +
+ *            all params/response data bytes.
+ */
 struct ec_lpc_host_args {
 	uint8_t flags;
 	uint8_t command_version;
 	uint8_t data_size;
-	/*
-	 * Checksum; sum of command + flags + command_version + data_size +
-	 * all params/response data bytes.
-	 */
 	uint8_t checksum;
 } __packed;
 
@@ -468,54 +471,43 @@ struct ec_lpc_host_args {
 
 #define EC_HOST_REQUEST_VERSION 3
 
-/* Version 3 request from host */
+/**
+ * struct ec_host_request - Version 3 request from host.
+ * @struct_version: Should be 3. The EC will return EC_RES_INVALID_HEADER if it
+ *                  receives a header with a version it doesn't know how to
+ *                  parse.
+ * @checksum: Checksum of request and data; sum of all bytes including checksum
+ *            should total to 0.
+ * @command: Command to send (EC_CMD_...)
+ * @command_version: Command version.
+ * @reserved: Unused byte in current protocol version; set to 0.
+ * @data_len: Length of data which follows this header.
+ */
 struct ec_host_request {
-	/* Struct version (=3)
-	 *
-	 * EC will return EC_RES_INVALID_HEADER if it receives a header with a
-	 * version it doesn't know how to parse.
-	 */
 	uint8_t struct_version;
-
-	/*
-	 * Checksum of request and data; sum of all bytes including checksum
-	 * should total to 0.
-	 */
 	uint8_t checksum;
-
-	/* Command code */
 	uint16_t command;
-
-	/* Command version */
 	uint8_t command_version;
-
-	/* Unused byte in current protocol version; set to 0 */
 	uint8_t reserved;
-
-	/* Length of data which follows this header */
 	uint16_t data_len;
 } __packed;
 
 #define EC_HOST_RESPONSE_VERSION 3
 
-/* Version 3 response from EC */
+/**
+ * struct ec_host_response - Version 3 response from EC.
+ * @struct_version: Struct version (=3).
+ * @checksum: Checksum of response and data; sum of all bytes including
+ *            checksum should total to 0.
+ * @result: EC's response to the command (separate from communication failure)
+ * @data_len: Length of data which follows this header.
+ * @reserved: Unused bytes in current protocol version; set to 0.
+ */
 struct ec_host_response {
-	/* Struct version (=3) */
 	uint8_t struct_version;
-
-	/*
-	 * Checksum of response and data; sum of all bytes including checksum
-	 * should total to 0.
-	 */
 	uint8_t checksum;
-
-	/* Result code (EC_RES_*) */
 	uint16_t result;
-
-	/* Length of data which follows this header */
 	uint16_t data_len;
-
-	/* Unused bytes in current protocol version; set to 0 */
 	uint16_t reserved;
 } __packed;
 
@@ -540,6 +532,10 @@ struct ec_host_response {
  */
 #define EC_CMD_PROTO_VERSION 0x00
 
+/**
+ * struct ec_response_proto_version - Response to the proto version command.
+ * @version: The protocol version.
+ */
 struct ec_response_proto_version {
 	uint32_t version;
 } __packed;
@@ -550,12 +546,20 @@ struct ec_response_proto_version {
  */
 #define EC_CMD_HELLO 0x01
 
+/**
+ * struct ec_params_hello - Parameters to the hello command.
+ * @in_data: Pass anything here.
+ */
 struct ec_params_hello {
-	uint32_t in_data;  /* Pass anything here */
+	uint32_t in_data;
 } __packed;
 
+/**
+ * struct ec_response_hello - Response to the hello command.
+ * @out_data: Output will be in_data + 0x01020304.
+ */
 struct ec_response_hello {
-	uint32_t out_data;  /* Output will be in_data + 0x01020304 */
+	uint32_t out_data;
 } __packed;
 
 /* Get version number */
@@ -567,22 +571,37 @@ enum ec_current_image {
 	EC_IMAGE_RW
 };
 
+/**
+ * struct ec_response_get_version - Response to the get version command.
+ * @version_string_ro: Null-terminated RO firmware version string.
+ * @version_string_rw: Null-terminated RW firmware version string.
+ * @reserved: Unused bytes; was previously RW-B firmware version string.
+ * @current_image: One of ec_current_image.
+ */
 struct ec_response_get_version {
-	/* Null-terminated version strings for RO, RW */
 	char version_string_ro[32];
 	char version_string_rw[32];
-	char reserved[32];       /* Was previously RW-B string */
-	uint32_t current_image;  /* One of ec_current_image */
+	char reserved[32];
+	uint32_t current_image;
 } __packed;
 
 /* Read test */
 #define EC_CMD_READ_TEST 0x03
 
+/**
+ * struct ec_params_read_test - Parameters for the read test command.
+ * @offset: Starting value for read buffer.
+ * @size: Size to read in bytes.
+ */
 struct ec_params_read_test {
-	uint32_t offset;   /* Starting value for read buffer */
-	uint32_t size;     /* Size to read in bytes */
+	uint32_t offset;
+	uint32_t size;
 } __packed;
 
+/**
+ * struct ec_response_read_test - Response to the read test command.
+ * @data: Data returned by the read test command.
+ */
 struct ec_response_read_test {
 	uint32_t data[32];
 } __packed;
@@ -597,18 +616,27 @@ struct ec_response_read_test {
 /* Get chip info */
 #define EC_CMD_GET_CHIP_INFO 0x05
 
+/**
+ * struct ec_response_get_chip_info - Response to the get chip info command.
+ * @vendor: Null-terminated string for chip vendor.
+ * @name: Null-terminated string for chip name.
+ * @revision: Null-terminated string for chip mask version.
+ */
 struct ec_response_get_chip_info {
-	/* Null-terminated strings */
 	char vendor[32];
 	char name[32];
-	char revision[32];  /* Mask version */
+	char revision[32];
 } __packed;
 
 /* Get board HW version */
 #define EC_CMD_GET_BOARD_VERSION 0x06
 
+/**
+ * struct ec_response_board_version - Response to the board version command.
+ * @board_version: A monotonously incrementing number.
+ */
 struct ec_response_board_version {
-	uint16_t board_version;  /* A monotonously incrementing number. */
+	uint16_t board_version;
 } __packed;
 
 /*
@@ -621,27 +649,42 @@ struct ec_response_board_version {
  */
 #define EC_CMD_READ_MEMMAP 0x07
 
+/**
+ * struct ec_params_read_memmap - Parameters for the read memory map command.
+ * @offset: Offset in memmap (EC_MEMMAP_*).
+ * @size: Size to read in bytes.
+ */
 struct ec_params_read_memmap {
-	uint8_t offset;   /* Offset in memmap (EC_MEMMAP_*) */
-	uint8_t size;     /* Size to read in bytes */
+	uint8_t offset;
+	uint8_t size;
 } __packed;
 
 /* Read versions supported for a command */
 #define EC_CMD_GET_CMD_VERSIONS 0x08
 
+/**
+ * struct ec_params_get_cmd_versions - Parameters for the get command versions.
+ * @cmd: Command to check.
+ */
 struct ec_params_get_cmd_versions {
-	uint8_t cmd;      /* Command to check */
+	uint8_t cmd;
 } __packed;
 
+/**
+ * struct ec_params_get_cmd_versions_v1 - Parameters for the get command
+ *         versions (v1)
+ * @cmd: Command to check.
+ */
 struct ec_params_get_cmd_versions_v1 {
-	uint16_t cmd;     /* Command to check */
+	uint16_t cmd;
 } __packed;
 
+/**
+ * struct ec_response_get_cmd_version - Response to the get command versions.
+ * @version_mask: Mask of supported versions; use EC_VER_MASK() to compare with
+ *                a desired version.
+ */
 struct ec_response_get_cmd_versions {
-	/*
-	 * Mask of supported versions; use EC_VER_MASK() to compare with a
-	 * desired version.
-	 */
 	uint32_t version_mask;
 } __packed;
 
@@ -659,6 +702,11 @@ enum ec_comms_status {
 	EC_COMMS_STATUS_PROCESSING	= 1 << 0,	/* Processing cmd */
 };
 
+/**
+ * struct ec_response_get_comms_status - Response to the get comms status
+ *         command.
+ * @flags: Mask of enum ec_comms_status.
+ */
 struct ec_response_get_comms_status {
 	uint32_t flags;		/* Mask of enum ec_comms_status */
 } __packed;
@@ -685,19 +733,19 @@ struct ec_response_test_protocol {
 /* EC_RES_IN_PROGRESS may be returned if a command is slow */
 #define EC_PROTOCOL_INFO_IN_PROGRESS_SUPPORTED (1 << 0)
 
+/**
+ * struct ec_response_get_protocol_info - Response to the get protocol info.
+ * @protocol_versions: Bitmask of protocol versions supported (1 << n means
+ *                     version n).
+ * @max_request_packet_size: Maximum request packet size in bytes.
+ * @max_response_packet_size: Maximum response packet size in bytes.
+ * @flags: see EC_PROTOCOL_INFO_*
+ */
 struct ec_response_get_protocol_info {
 	/* Fields which exist if at least protocol version 3 supported */
-
-	/* Bitmask of protocol versions supported (1 << n means version n)*/
 	uint32_t protocol_versions;
-
-	/* Maximum request packet size, in bytes */
 	uint16_t max_request_packet_size;
-
-	/* Maximum response packet size, in bytes */
 	uint16_t max_response_packet_size;
-
-	/* Flags; see EC_PROTOCOL_INFO_* */
 	uint32_t flags;
 } __packed;
 
@@ -708,8 +756,10 @@ struct ec_response_get_protocol_info {
 /* The upper byte of .flags tells what to do (nothing means "get") */
 #define EC_GSV_SET        0x80000000
 
-/* The lower three bytes of .flags identifies the parameter, if that has
-   meaning for an individual command. */
+/*
+ * The lower three bytes of .flags identifies the parameter, if that has
+ * meaning for an individual command.
+ */
 #define EC_GSV_PARAM_MASK 0x00ffffff
 
 struct ec_params_get_set_value {
@@ -810,6 +860,7 @@ enum ec_feature_code {
 
 #define EC_FEATURE_MASK_0(event_code) (1UL << (event_code % 32))
 #define EC_FEATURE_MASK_1(event_code) (1UL << (event_code - 32))
+
 struct ec_response_get_features {
 	uint32_t flags[2];
 } __packed;
@@ -820,24 +871,22 @@ struct ec_response_get_features {
 /* Get flash info */
 #define EC_CMD_FLASH_INFO 0x10
 
-/* Version 0 returns these fields */
+/**
+ * struct ec_response_flash_info - Response to the flash info command.
+ * @flash_size: Usable flash size in bytes.
+ * @write_block_size: Write block size. Write offset and size must be a
+ *                    multiple of this.
+ * @erase_block_size: Erase block size. Erase offset and size must be a
+ *                    multiple of this.
+ * @protect_block_size: Protection block size. Protection offset and size
+ *                      must be a multiple of this.
+ *
+ * Version 0 returns these fields.
+ */
 struct ec_response_flash_info {
-	/* Usable flash size, in bytes */
 	uint32_t flash_size;
-	/*
-	 * Write block size.  Write offset and size must be a multiple
-	 * of this.
-	 */
 	uint32_t write_block_size;
-	/*
-	 * Erase block size.  Erase offset and size must be a multiple
-	 * of this.
-	 */
 	uint32_t erase_block_size;
-	/*
-	 * Protection block size.  Protection offset and size must be a
-	 * multiple of this.
-	 */
 	uint32_t protect_block_size;
 } __packed;
 
@@ -845,7 +894,22 @@ struct ec_response_flash_info {
 /* EC flash erases bits to 0 instead of 1 */
 #define EC_FLASH_INFO_ERASE_TO_0 (1 << 0)
 
-/*
+/**
+ * struct ec_response_flash_info_1 - Response to the flash info v1 command.
+ * @flash_size: Usable flash size in bytes.
+ * @write_block_size: Write block size. Write offset and size must be a
+ *                    multiple of this.
+ * @erase_block_size: Erase block size. Erase offset and size must be a
+ *                    multiple of this.
+ * @protect_block_size: Protection block size. Protection offset and size
+ *                      must be a multiple of this.
+ * @write_ideal_size: Ideal write size in bytes.  Writes will be fastest if
+ *                    size is exactly this and offset is a multiple of this.
+ *                    For example, an EC may have a write buffer which can do
+ *                    half-page operations if data is aligned, and a slower
+ *                    word-at-a-time write mode.
+ * @flags: Flags; see EC_FLASH_INFO_*
+ *
  * Version 1 returns the same initial fields as version 0, with additional
  * fields following.
  *
@@ -860,15 +924,7 @@ struct ec_response_flash_info_1 {
 	uint32_t protect_block_size;
 
 	/* Version 1 adds these fields: */
-	/*
-	 * Ideal write size in bytes.  Writes will be fastest if size is
-	 * exactly this and offset is a multiple of this.  For example, an EC
-	 * may have a write buffer which can do half-page operations if data is
-	 * aligned, and a slower word-at-a-time write mode.
-	 */
 	uint32_t write_ideal_size;
-
-	/* Flags; see EC_FLASH_INFO_* */
 	uint32_t flags;
 } __packed;
 
@@ -879,9 +935,14 @@ struct ec_response_flash_info_1 {
  */
 #define EC_CMD_FLASH_READ 0x11
 
+/**
+ * struct ec_params_flash_read - Parameters for the flash read command.
+ * @offset: Byte offset to read.
+ * @size: Size to read in bytes.
+ */
 struct ec_params_flash_read {
-	uint32_t offset;   /* Byte offset to read */
-	uint32_t size;     /* Size to read in bytes */
+	uint32_t offset;
+	uint32_t size;
 } __packed;
 
 /* Write flash */
@@ -891,18 +952,28 @@ struct ec_params_flash_read {
 /* Version 0 of the flash command supported only 64 bytes of data */
 #define EC_FLASH_WRITE_VER0_SIZE 64
 
+/**
+ * struct ec_params_flash_write - Parameters for the flash write command.
+ * @offset: Byte offset to write.
+ * @size: Size to write in bytes.
+ */
 struct ec_params_flash_write {
-	uint32_t offset;   /* Byte offset to write */
-	uint32_t size;     /* Size to write in bytes */
+	uint32_t offset;
+	uint32_t size;
 	/* Followed by data to write */
 } __packed;
 
 /* Erase flash */
 #define EC_CMD_FLASH_ERASE 0x13
 
+/**
+ * struct ec_params_flash_erase - Parameters for the flash erase command.
+ * @offset: Byte offset to erase.
+ * @size: Size to erase in bytes.
+ */
 struct ec_params_flash_erase {
-	uint32_t offset;   /* Byte offset to erase */
-	uint32_t size;     /* Size to erase in bytes */
+	uint32_t offset;
+	uint32_t size;
 } __packed;
 
 /*
@@ -941,21 +1012,28 @@ struct ec_params_flash_erase {
 /* Entile flash code protected when the EC boots */
 #define EC_FLASH_PROTECT_ALL_AT_BOOT        (1 << 6)
 
+/**
+ * struct ec_params_flash_protect - Parameters for the flash protect command.
+ * @mask: Bits in flags to apply.
+ * @flags: New flags to apply.
+ */
 struct ec_params_flash_protect {
-	uint32_t mask;   /* Bits in flags to apply */
-	uint32_t flags;  /* New flags to apply */
+	uint32_t mask;
+	uint32_t flags;
 } __packed;
 
+/**
+ * struct ec_response_flash_protect - Response to the flash protect command.
+ * @flags: Current value of flash protect flags.
+ * @valid_flags: Flags which are valid on this platform. This allows the
+ *               caller to distinguish between flags which aren't set vs. flags
+ *               which can't be set on this platform.
+ * @writable_flags: Flags which can be changed given the current protection
+ *                  state.
+ */
 struct ec_response_flash_protect {
-	/* Current value of flash protect flags */
 	uint32_t flags;
-	/*
-	 * Flags which are valid on this platform.  This allows the caller
-	 * to distinguish between flags which aren't set vs. flags which can't
-	 * be set on this platform.
-	 */
 	uint32_t valid_flags;
-	/* Flags which can be changed given the current protection state */
 	uint32_t writable_flags;
 } __packed;
 
@@ -982,8 +1060,13 @@ enum ec_flash_region {
 	EC_FLASH_REGION_COUNT,
 };
 
+/**
+ * struct ec_params_flash_region_info - Parameters for the flash region info
+ *         command.
+ * @region: Flash region; see EC_FLASH_REGION_*
+ */
 struct ec_params_flash_region_info {
-	uint32_t region;  /* enum ec_flash_region */
+	uint32_t region;
 } __packed;
 
 struct ec_response_flash_region_info {
@@ -1094,7 +1177,9 @@ struct rgb_s {
 };
 
 #define LB_BATTERY_LEVELS 4
-/* List of tweakable parameters. NOTE: It's __packed so it can be sent in a
+
+/*
+ * List of tweakable parameters. NOTE: It's __packed so it can be sent in a
  * host command, but the alignment is the same regardless. Keep it that way.
  */
 struct lightbar_params_v0 {
-- 
cgit v1.2.3


From 1c96a2f67cd9b617b013f0a7580d76aae7dcd0d7 Mon Sep 17 00:00:00 2001
From: David Frey <dpfrey@gmail.com>
Date: Sat, 1 Sep 2018 09:50:41 -0700
Subject: regmap: split up regmap_config.use_single_rw

Split regmap_config.use_single_rw into use_single_read and
use_single_write. This change enables drivers of devices which only
support bulk operations in one direction to use the regmap_bulk_*()
functions for both directions and have their bulk operation split into
single operations only when necessary.

Update all struct regmap_config instances where use_single_rw==true to
instead set both use_single_read and use_single_write. No attempt was
made to evaluate whether it is possible to set only one of
use_single_read or use_single_write.

Signed-off-by: David Frey <dpfrey@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/regmap.c             |  4 ++--
 drivers/edac/altera_edac.c               |  3 ++-
 drivers/hwmon/lm75.c                     |  3 ++-
 drivers/hwmon/lm95245.c                  |  3 ++-
 drivers/hwmon/tmp102.c                   |  3 ++-
 drivers/hwmon/tmp108.c                   |  3 ++-
 drivers/iio/light/apds9960.c             |  3 ++-
 drivers/iio/light/max44000.c             | 23 ++++++++++++-----------
 drivers/iio/temperature/mlx90632.c       |  3 ++-
 drivers/input/touchscreen/tsc200x-core.c |  3 ++-
 drivers/mfd/altera-a10sr.c               |  3 ++-
 drivers/mfd/da9052-spi.c                 |  3 ++-
 drivers/mfd/mc13xxx-spi.c                |  3 ++-
 drivers/mfd/twl6040.c                    |  3 ++-
 drivers/regulator/ltc3589.c              |  3 ++-
 drivers/regulator/ltc3676.c              |  3 ++-
 include/linux/regmap.h                   | 12 ++++++++----
 sound/hda/hdac_regmap.c                  |  3 ++-
 sound/soc/codecs/cs35l33.c               |  3 ++-
 sound/soc/codecs/cs35l35.c               |  3 ++-
 sound/soc/codecs/cs43130.c               |  4 +++-
 sound/soc/codecs/es8328.c                |  3 ++-
 sound/soc/codecs/rt1305.c                |  3 ++-
 sound/soc/codecs/rt5514.c                |  3 ++-
 sound/soc/codecs/rt5616.c                |  3 ++-
 sound/soc/codecs/rt5640.c                |  3 ++-
 sound/soc/codecs/rt5645.c                |  9 ++++++---
 sound/soc/codecs/rt5651.c                |  3 ++-
 sound/soc/codecs/rt5660.c                |  3 ++-
 sound/soc/codecs/rt5663.c                |  9 ++++++---
 sound/soc/codecs/rt5665.c                |  3 ++-
 sound/soc/codecs/rt5668.c                |  3 ++-
 sound/soc/codecs/rt5670.c                |  3 ++-
 sound/soc/codecs/rt5682.c                |  3 ++-
 34 files changed, 93 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 0360a90ad6b6..78a778c08f92 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -762,8 +762,8 @@ struct regmap *__regmap_init(struct device *dev,
 		map->reg_stride_order = ilog2(map->reg_stride);
 	else
 		map->reg_stride_order = -1;
-	map->use_single_read = config->use_single_rw || !bus || !bus->read;
-	map->use_single_write = config->use_single_rw || !bus || !bus->write;
+	map->use_single_read = config->use_single_read || !bus || !bus->read;
+	map->use_single_write = config->use_single_write || !bus || !bus->write;
 	map->can_multi_write = config->can_multi_write && bus && bus->write;
 	if (bus) {
 		map->max_raw_read = bus->max_raw_read;
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 5762c3c383f2..ab7c5a937ab0 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -599,7 +599,8 @@ static const struct regmap_config s10_sdram_regmap_cfg = {
 	.volatile_reg = s10_sdram_volatile_reg,
 	.reg_read = s10_protected_reg_read,
 	.reg_write = s10_protected_reg_write,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int altr_s10_sdram_probe(struct platform_device *pdev)
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index 49f4b33a5685..542dc4058831 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -254,7 +254,8 @@ static const struct regmap_config lm75_regmap_config = {
 	.volatile_reg = lm75_is_volatile_reg,
 	.val_format_endian = REGMAP_ENDIAN_BIG,
 	.cache_type = REGCACHE_RBTREE,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static void lm75_remove(void *data)
diff --git a/drivers/hwmon/lm95245.c b/drivers/hwmon/lm95245.c
index 27cb06d65594..996b50246175 100644
--- a/drivers/hwmon/lm95245.c
+++ b/drivers/hwmon/lm95245.c
@@ -541,7 +541,8 @@ static const struct regmap_config lm95245_regmap_config = {
 	.writeable_reg = lm95245_is_writeable_reg,
 	.volatile_reg = lm95245_is_volatile_reg,
 	.cache_type = REGCACHE_RBTREE,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static const u32 lm95245_chip_config[] = {
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index dfc40c740d07..6778283e36f9 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -212,7 +212,8 @@ static const struct regmap_config tmp102_regmap_config = {
 	.volatile_reg = tmp102_is_volatile_reg,
 	.val_format_endian = REGMAP_ENDIAN_BIG,
 	.cache_type = REGCACHE_RBTREE,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int tmp102_probe(struct i2c_client *client,
diff --git a/drivers/hwmon/tmp108.c b/drivers/hwmon/tmp108.c
index 91bb94639286..429bfeae4ca8 100644
--- a/drivers/hwmon/tmp108.c
+++ b/drivers/hwmon/tmp108.c
@@ -345,7 +345,8 @@ static const struct regmap_config tmp108_regmap_config = {
 	.volatile_reg = tmp108_is_volatile_reg,
 	.val_format_endian = REGMAP_ENDIAN_BIG,
 	.cache_type = REGCACHE_RBTREE,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int tmp108_probe(struct i2c_client *client,
diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index 1f112ae15f3c..b09b8b60bd83 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -206,7 +206,8 @@ static const struct regmap_config apds9960_regmap_config = {
 	.name = APDS9960_REGMAP_NAME,
 	.reg_bits = 8,
 	.val_bits = 8,
-	.use_single_rw = 1,
+	.use_single_read = true,
+	.use_single_write = true,
 
 	.volatile_table = &apds9960_volatile_table,
 	.precious_table = &apds9960_precious_table,
diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c
index bcdb0eb9e537..4067dff2ff6a 100644
--- a/drivers/iio/light/max44000.c
+++ b/drivers/iio/light/max44000.c
@@ -473,17 +473,18 @@ static bool max44000_precious_reg(struct device *dev, unsigned int reg)
 }
 
 static const struct regmap_config max44000_regmap_config = {
-	.reg_bits	= 8,
-	.val_bits	= 8,
-
-	.max_register	= MAX44000_REG_PRX_DATA,
-	.readable_reg	= max44000_readable_reg,
-	.writeable_reg	= max44000_writeable_reg,
-	.volatile_reg	= max44000_volatile_reg,
-	.precious_reg	= max44000_precious_reg,
-
-	.use_single_rw	= 1,
-	.cache_type	= REGCACHE_RBTREE,
+	.reg_bits		= 8,
+	.val_bits		= 8,
+
+	.max_register		= MAX44000_REG_PRX_DATA,
+	.readable_reg		= max44000_readable_reg,
+	.writeable_reg		= max44000_writeable_reg,
+	.volatile_reg		= max44000_volatile_reg,
+	.precious_reg		= max44000_precious_reg,
+
+	.use_single_read	= true,
+	.use_single_write	= true,
+	.cache_type		= REGCACHE_RBTREE,
 };
 
 static irqreturn_t max44000_trigger_handler(int irq, void *p)
diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c
index 9851311aa3fd..be03be719efe 100644
--- a/drivers/iio/temperature/mlx90632.c
+++ b/drivers/iio/temperature/mlx90632.c
@@ -140,7 +140,8 @@ static const struct regmap_config mlx90632_regmap = {
 	.rd_table = &mlx90632_readable_regs_tbl,
 	.wr_table = &mlx90632_writeable_regs_tbl,
 
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.reg_format_endian = REGMAP_ENDIAN_BIG,
 	.val_format_endian = REGMAP_ENDIAN_BIG,
 	.cache_type = REGCACHE_RBTREE,
diff --git a/drivers/input/touchscreen/tsc200x-core.c b/drivers/input/touchscreen/tsc200x-core.c
index e0fde590df8e..62973ac01381 100644
--- a/drivers/input/touchscreen/tsc200x-core.c
+++ b/drivers/input/touchscreen/tsc200x-core.c
@@ -68,7 +68,8 @@ const struct regmap_config tsc200x_regmap_config = {
 	.read_flag_mask = TSC200X_REG_READ,
 	.write_flag_mask = TSC200X_REG_PND0,
 	.wr_table = &tsc200x_writable_table,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 EXPORT_SYMBOL_GPL(tsc200x_regmap_config);
 
diff --git a/drivers/mfd/altera-a10sr.c b/drivers/mfd/altera-a10sr.c
index 96e7d2cb7b89..400e0b51844b 100644
--- a/drivers/mfd/altera-a10sr.c
+++ b/drivers/mfd/altera-a10sr.c
@@ -108,7 +108,8 @@ static const struct regmap_config altr_a10sr_regmap_config = {
 
 	.cache_type = REGCACHE_NONE,
 
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.read_flag_mask = 1,
 	.write_flag_mask = 0,
 
diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index abfb11818fdc..fdae1288bc6d 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c
@@ -46,7 +46,8 @@ static int da9052_spi_probe(struct spi_device *spi)
 	config.reg_bits = 7;
 	config.pad_bits = 1;
 	config.val_bits = 8;
-	config.use_single_rw = 1;
+	config.use_single_read = true;
+	config.use_single_write = true;
 
 	da9052->regmap = devm_regmap_init_spi(spi, &config);
 	if (IS_ERR(da9052->regmap)) {
diff --git a/drivers/mfd/mc13xxx-spi.c b/drivers/mfd/mc13xxx-spi.c
index cbc1e5ed599c..ee3411cc5ce4 100644
--- a/drivers/mfd/mc13xxx-spi.c
+++ b/drivers/mfd/mc13xxx-spi.c
@@ -57,7 +57,8 @@ static const struct regmap_config mc13xxx_regmap_spi_config = {
 	.max_register = MC13XXX_NUMREGS,
 
 	.cache_type = REGCACHE_NONE,
-	.use_single_rw = 1,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int mc13xxx_spi_read(void *context, const void *reg, size_t reg_size,
diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index dd19f17a1b63..7c3c5fd5fcd0 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -613,7 +613,8 @@ static const struct regmap_config twl6040_regmap_config = {
 	.writeable_reg = twl6040_writeable_reg,
 
 	.cache_type = REGCACHE_RBTREE,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static const struct regmap_irq twl6040_irqs[] = {
diff --git a/drivers/regulator/ltc3589.c b/drivers/regulator/ltc3589.c
index 18d5b01ddcb2..63f724f260ef 100644
--- a/drivers/regulator/ltc3589.c
+++ b/drivers/regulator/ltc3589.c
@@ -404,7 +404,8 @@ static const struct regmap_config ltc3589_regmap_config = {
 	.max_register = LTC3589_L2DTV2,
 	.reg_defaults = ltc3589_reg_defaults,
 	.num_reg_defaults = ARRAY_SIZE(ltc3589_reg_defaults),
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.cache_type = REGCACHE_RBTREE,
 };
 
diff --git a/drivers/regulator/ltc3676.c b/drivers/regulator/ltc3676.c
index 9dec1609ff66..71fd0f2a4b76 100644
--- a/drivers/regulator/ltc3676.c
+++ b/drivers/regulator/ltc3676.c
@@ -321,7 +321,8 @@ static const struct regmap_config ltc3676_regmap_config = {
 	.readable_reg = ltc3676_readable_reg,
 	.volatile_reg = ltc3676_volatile_reg,
 	.max_register = LTC3676_CLIRQ,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.cache_type = REGCACHE_RBTREE,
 };
 
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 379505a53722..6ea9bf9377cb 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -315,9 +315,12 @@ typedef void (*regmap_unlock)(void *);
  *                   masks are used.
  * @zero_flag_mask: If set, read_flag_mask and write_flag_mask are used even
  *                   if they are both empty.
- * @use_single_rw: If set, converts the bulk read and write operations into
- *		    a series of single read and write operations. This is useful
- *		    for device that does not support bulk read and write.
+ * @use_single_read: If set, converts the bulk read operation into a series of
+ *                   single read operations. This is useful for a device that
+ *                   does not support  bulk read.
+ * @use_single_write: If set, converts the bulk write operation into a series of
+ *                    single write operations. This is useful for a device that
+ *                    does not support bulk write.
  * @can_multi_write: If set, the device supports the multi write mode of bulk
  *                   write operations, if clear multi write requests will be
  *                   split into individual write operations
@@ -380,7 +383,8 @@ struct regmap_config {
 	unsigned long write_flag_mask;
 	bool zero_flag_mask;
 
-	bool use_single_rw;
+	bool use_single_read;
+	bool use_single_write;
 	bool can_multi_write;
 
 	enum regmap_endian reg_format_endian;
diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index 419e285e0226..996dbc850224 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -359,7 +359,8 @@ static const struct regmap_config hda_regmap_cfg = {
 	.cache_type = REGCACHE_RBTREE,
 	.reg_read = hda_reg_read,
 	.reg_write = hda_reg_write,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 /**
diff --git a/sound/soc/codecs/cs35l33.c b/sound/soc/codecs/cs35l33.c
index 668cd3754209..e9b7f72d880b 100644
--- a/sound/soc/codecs/cs35l33.c
+++ b/sound/soc/codecs/cs35l33.c
@@ -857,7 +857,8 @@ static const struct regmap_config cs35l33_regmap = {
 	.readable_reg = cs35l33_readable_register,
 	.writeable_reg = cs35l33_writeable_register,
 	.cache_type = REGCACHE_RBTREE,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static int __maybe_unused cs35l33_runtime_resume(struct device *dev)
diff --git a/sound/soc/codecs/cs35l35.c b/sound/soc/codecs/cs35l35.c
index bd6226bde45f..9f4a59871cee 100644
--- a/sound/soc/codecs/cs35l35.c
+++ b/sound/soc/codecs/cs35l35.c
@@ -1105,7 +1105,8 @@ static struct regmap_config cs35l35_regmap = {
 	.readable_reg = cs35l35_readable_register,
 	.precious_reg = cs35l35_precious_register,
 	.cache_type = REGCACHE_RBTREE,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static irqreturn_t cs35l35_irq(int irq, void *data)
diff --git a/sound/soc/codecs/cs43130.c b/sound/soc/codecs/cs43130.c
index 80dc42197154..3f7b255587e6 100644
--- a/sound/soc/codecs/cs43130.c
+++ b/sound/soc/codecs/cs43130.c
@@ -2362,7 +2362,9 @@ static const struct regmap_config cs43130_regmap = {
 	.precious_reg		= cs43130_precious_register,
 	.volatile_reg		= cs43130_volatile_register,
 	.cache_type		= REGCACHE_RBTREE,
-	.use_single_rw		= true, /* needed for regcache_sync */
+	/* needed for regcache_sync */
+	.use_single_read	= true,
+	.use_single_write	= true,
 };
 
 static u16 const cs43130_dc_threshold[CS43130_DC_THRESHOLD] = {
diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c
index e9fc2fd97d2f..4b5827dc23aa 100644
--- a/sound/soc/codecs/es8328.c
+++ b/sound/soc/codecs/es8328.c
@@ -824,7 +824,8 @@ const struct regmap_config es8328_regmap_config = {
 	.val_bits	= 8,
 	.max_register	= ES8328_REG_MAX,
 	.cache_type	= REGCACHE_RBTREE,
-	.use_single_rw	= true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 EXPORT_SYMBOL_GPL(es8328_regmap_config);
 
diff --git a/sound/soc/codecs/rt1305.c b/sound/soc/codecs/rt1305.c
index c4452efc7970..c2c8a68cec97 100644
--- a/sound/soc/codecs/rt1305.c
+++ b/sound/soc/codecs/rt1305.c
@@ -963,7 +963,8 @@ static const struct regmap_config rt1305_regmap = {
 	.num_reg_defaults = ARRAY_SIZE(rt1305_reg),
 	.ranges = rt1305_ranges,
 	.num_ranges = ARRAY_SIZE(rt1305_ranges),
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 #if defined(CONFIG_OF)
diff --git a/sound/soc/codecs/rt5514.c b/sound/soc/codecs/rt5514.c
index dca82dd6e3bf..f78183a6e80d 100644
--- a/sound/soc/codecs/rt5514.c
+++ b/sound/soc/codecs/rt5514.c
@@ -1201,7 +1201,8 @@ static const struct regmap_config rt5514_regmap = {
 	.cache_type = REGCACHE_RBTREE,
 	.reg_defaults = rt5514_reg,
 	.num_reg_defaults = ARRAY_SIZE(rt5514_reg),
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static const struct i2c_device_id rt5514_i2c_id[] = {
diff --git a/sound/soc/codecs/rt5616.c b/sound/soc/codecs/rt5616.c
index 3dc795f444ce..36a9f1c56c8d 100644
--- a/sound/soc/codecs/rt5616.c
+++ b/sound/soc/codecs/rt5616.c
@@ -1313,7 +1313,8 @@ static const struct snd_soc_component_driver soc_component_dev_rt5616 = {
 static const struct regmap_config rt5616_regmap = {
 	.reg_bits = 8,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.max_register = RT5616_DEVICE_ID + 1 + (ARRAY_SIZE(rt5616_ranges) *
 					       RT5616_PR_SPACING),
 	.volatile_reg = rt5616_volatile_register,
diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c
index 27770143ae8f..fc530481a6e4 100644
--- a/sound/soc/codecs/rt5640.c
+++ b/sound/soc/codecs/rt5640.c
@@ -2704,7 +2704,8 @@ static const struct snd_soc_component_driver soc_component_dev_rt5640 = {
 static const struct regmap_config rt5640_regmap = {
 	.reg_bits = 8,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 
 	.max_register = RT5640_VENDOR_ID2 + 1 + (ARRAY_SIZE(rt5640_ranges) *
 					       RT5640_PR_SPACING),
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index 1dc70f452c1b..be674688dc40 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3559,7 +3559,8 @@ static const struct snd_soc_component_driver soc_component_dev_rt5645 = {
 static const struct regmap_config rt5645_regmap = {
 	.reg_bits = 8,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.max_register = RT5645_VENDOR_ID2 + 1 + (ARRAY_SIZE(rt5645_ranges) *
 					       RT5645_PR_SPACING),
 	.volatile_reg = rt5645_volatile_register,
@@ -3575,7 +3576,8 @@ static const struct regmap_config rt5645_regmap = {
 static const struct regmap_config rt5650_regmap = {
 	.reg_bits = 8,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.max_register = RT5645_VENDOR_ID2 + 1 + (ARRAY_SIZE(rt5645_ranges) *
 					       RT5645_PR_SPACING),
 	.volatile_reg = rt5645_volatile_register,
@@ -3592,7 +3594,8 @@ static const struct regmap_config temp_regmap = {
 	.name="nocache",
 	.reg_bits = 8,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.max_register = RT5645_VENDOR_ID2 + 1,
 	.cache_type = REGCACHE_NONE,
 };
diff --git a/sound/soc/codecs/rt5651.c b/sound/soc/codecs/rt5651.c
index 985852fd9723..5bcedbc7eb4a 100644
--- a/sound/soc/codecs/rt5651.c
+++ b/sound/soc/codecs/rt5651.c
@@ -2124,7 +2124,8 @@ static const struct regmap_config rt5651_regmap = {
 	.num_reg_defaults = ARRAY_SIZE(rt5651_reg),
 	.ranges = rt5651_ranges,
 	.num_ranges = ARRAY_SIZE(rt5651_ranges),
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 #if defined(CONFIG_OF)
diff --git a/sound/soc/codecs/rt5660.c b/sound/soc/codecs/rt5660.c
index 20a755137e63..27f7445b2432 100644
--- a/sound/soc/codecs/rt5660.c
+++ b/sound/soc/codecs/rt5660.c
@@ -1217,7 +1217,8 @@ static const struct snd_soc_component_driver soc_component_dev_rt5660 = {
 static const struct regmap_config rt5660_regmap = {
 	.reg_bits = 8,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 
 	.max_register = RT5660_VENDOR_ID2 + 1 + (ARRAY_SIZE(rt5660_ranges) *
 					       RT5660_PR_SPACING),
diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c
index 9bd24ad42240..70441661ea4a 100644
--- a/sound/soc/codecs/rt5663.c
+++ b/sound/soc/codecs/rt5663.c
@@ -3252,7 +3252,8 @@ static const struct snd_soc_component_driver soc_component_dev_rt5663 = {
 static const struct regmap_config rt5663_v2_regmap = {
 	.reg_bits = 16,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.max_register = 0x07fa,
 	.volatile_reg = rt5663_v2_volatile_register,
 	.readable_reg = rt5663_v2_readable_register,
@@ -3264,7 +3265,8 @@ static const struct regmap_config rt5663_v2_regmap = {
 static const struct regmap_config rt5663_regmap = {
 	.reg_bits = 16,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.max_register = 0x03f3,
 	.volatile_reg = rt5663_volatile_register,
 	.readable_reg = rt5663_readable_register,
@@ -3277,7 +3279,8 @@ static const struct regmap_config temp_regmap = {
 	.name = "nocache",
 	.reg_bits = 16,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.max_register = 0x03f3,
 	.cache_type = REGCACHE_NONE,
 };
diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index 6ba99f5ed3f4..f2ad3a4c3b7f 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -4633,7 +4633,8 @@ static const struct regmap_config rt5665_regmap = {
 	.cache_type = REGCACHE_RBTREE,
 	.reg_defaults = rt5665_reg,
 	.num_reg_defaults = ARRAY_SIZE(rt5665_reg),
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static const struct i2c_device_id rt5665_i2c_id[] = {
diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c
index 3c19d03f2446..3f6046a66b56 100644
--- a/sound/soc/codecs/rt5668.c
+++ b/sound/soc/codecs/rt5668.c
@@ -2375,7 +2375,8 @@ static const struct regmap_config rt5668_regmap = {
 	.cache_type = REGCACHE_RBTREE,
 	.reg_defaults = rt5668_reg,
 	.num_reg_defaults = ARRAY_SIZE(rt5668_reg),
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static const struct i2c_device_id rt5668_i2c_id[] = {
diff --git a/sound/soc/codecs/rt5670.c b/sound/soc/codecs/rt5670.c
index 732ef928b25d..f0f8debc2829 100644
--- a/sound/soc/codecs/rt5670.c
+++ b/sound/soc/codecs/rt5670.c
@@ -2814,7 +2814,8 @@ static const struct snd_soc_component_driver soc_component_dev_rt5670 = {
 static const struct regmap_config rt5670_regmap = {
 	.reg_bits = 8,
 	.val_bits = 16,
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 	.max_register = RT5670_VENDOR_ID2 + 1 + (ARRAY_SIZE(rt5670_ranges) *
 					       RT5670_PR_SPACING),
 	.volatile_reg = rt5670_volatile_register,
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index 640d400ca013..23515bfef257 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -2419,7 +2419,8 @@ static const struct regmap_config rt5682_regmap = {
 	.cache_type = REGCACHE_RBTREE,
 	.reg_defaults = rt5682_reg,
 	.num_reg_defaults = ARRAY_SIZE(rt5682_reg),
-	.use_single_rw = true,
+	.use_single_read = true,
+	.use_single_write = true,
 };
 
 static const struct i2c_device_id rt5682_i2c_id[] = {
-- 
cgit v1.2.3


From 00b7d1cf46676ad5e6338e80dc6230e1b6e71b86 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 29 Aug 2018 17:21:45 -0500
Subject: of: make default address and size cells sizes private

Only some old OpenFirmware implementations rely on default sizes. Any
FDT and modern implementation should have explicit properties. Make the
OF_ROOT_NODE_*_CELLS_DEFAULT defines private so we don't get any outside
users.

This also gets us one step closer to removing the asm/prom.h dependency on
Sparc.

Cc: "David S. Miller" <davem@davemloft.net>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: sparclinux@vger.kernel.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 arch/sparc/include/asm/prom.h | 3 ---
 drivers/of/of_private.h       | 8 ++++++++
 include/linux/of.h            | 6 ------
 3 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h
index d955c8df62d6..1902db27ff4b 100644
--- a/arch/sparc/include/asm/prom.h
+++ b/arch/sparc/include/asm/prom.h
@@ -24,9 +24,6 @@
 #include <linux/atomic.h>
 #include <linux/irqdomain.h>
 
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT	2
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT	1
-
 #define of_compat_cmp(s1, s2, l)	strncmp((s1), (s2), (l))
 #define of_prop_cmp(s1, s2)		strcasecmp((s1), (s2))
 #define of_node_cmp(s1, s2)		strcmp((s1), (s2))
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index 216175d11d3d..5d1567025358 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -27,6 +27,14 @@ struct alias_prop {
 	char stem[0];
 };
 
+#if defined(CONFIG_SPARC)
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 2
+#else
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
+#endif
+
+#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
+
 extern struct mutex of_mutex;
 extern struct list_head aliases_lookup;
 extern struct kset *of_kset;
diff --git a/include/linux/of.h b/include/linux/of.h
index 99b0ebf49632..2141490b368f 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -247,12 +247,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
 #include <asm/prom.h>
 #endif
 
-/* Default #address and #size cells.  Allow arch asm/prom.h to override */
-#if !defined(OF_ROOT_NODE_ADDR_CELLS_DEFAULT)
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
-#endif
-
 #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
 #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
 
-- 
cgit v1.2.3


From ccf640f4c9988653ef884672381b03b9be247bec Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Aug 2018 09:40:24 +0200
Subject: dma-mapping: remove dma_configure

There is no good reason for this indirection given that the method
always exists.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/base/dd.c           |  8 +++++---
 include/linux/dma-mapping.h |  6 ------
 kernel/dma/mapping.c        | 10 ----------
 3 files changed, 5 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index edfc9f0b1180..65128cf8427c 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -480,9 +480,11 @@ re_probe:
 	if (ret)
 		goto pinctrl_bind_failed;
 
-	ret = dma_configure(dev);
-	if (ret)
-		goto dma_failed;
+	if (dev->bus->dma_configure) {
+		ret = dev->bus->dma_configure(dev);
+		if (ret)
+			goto dma_failed;
+	}
 
 	if (driver_sysfs_add(dev)) {
 		printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n",
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 1db6a6b46d0d..1c6c7c09bcf2 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -754,14 +754,8 @@ dma_mark_declared_memory_occupied(struct device *dev,
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
 #ifdef CONFIG_HAS_DMA
-int dma_configure(struct device *dev);
 void dma_deconfigure(struct device *dev);
 #else
-static inline int dma_configure(struct device *dev)
-{
-	return 0;
-}
-
 static inline void dma_deconfigure(struct device *dev) {}
 #endif
 
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index d2a92ddaac4d..25607ceb4a50 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -328,16 +328,6 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
 }
 #endif
 
-/*
- * enables DMA API use for a device
- */
-int dma_configure(struct device *dev)
-{
-	if (dev->bus->dma_configure)
-		return dev->bus->dma_configure(dev);
-	return 0;
-}
-
 void dma_deconfigure(struct device *dev)
 {
 	of_dma_deconfigure(dev);
-- 
cgit v1.2.3


From dc3c05504d38849f77149cb962caeaedd1efa127 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Aug 2018 10:28:18 +0200
Subject: dma-mapping: remove dma_deconfigure

This goes through a lot of hooks just to call arch_teardown_dma_ops.
Replace it with a direct call instead.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
---
 drivers/acpi/arm64/iort.c   |  2 +-
 drivers/acpi/scan.c         | 10 ----------
 drivers/base/dd.c           |  4 ++--
 drivers/of/device.c         | 12 ------------
 include/acpi/acpi_bus.h     |  1 -
 include/linux/acpi.h        |  2 --
 include/linux/dma-mapping.h |  6 ------
 include/linux/of_device.h   |  3 ---
 kernel/dma/mapping.c        |  6 ------
 9 files changed, 3 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 08f26db2da7e..2a361e22d38d 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1428,7 +1428,7 @@ static int __init iort_add_platform_device(struct acpi_iort_node *node,
 	return 0;
 
 dma_deconfigure:
-	acpi_dma_deconfigure(&pdev->dev);
+	arch_teardown_dma_ops(&pdev->dev);
 dev_put:
 	platform_device_put(pdev);
 
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e1b6231cfa1c..56676a56b3e3 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1469,16 +1469,6 @@ int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr)
 }
 EXPORT_SYMBOL_GPL(acpi_dma_configure);
 
-/**
- * acpi_dma_deconfigure - Tear-down DMA configuration for the device.
- * @dev: The pointer to the device
- */
-void acpi_dma_deconfigure(struct device *dev)
-{
-	arch_teardown_dma_ops(dev);
-}
-EXPORT_SYMBOL_GPL(acpi_dma_deconfigure);
-
 static void acpi_init_coherency(struct acpi_device *adev)
 {
 	unsigned long long cca = 0;
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 65128cf8427c..169412ee4ae8 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -539,7 +539,7 @@ re_probe:
 	goto done;
 
 probe_failed:
-	dma_deconfigure(dev);
+	arch_teardown_dma_ops(dev);
 dma_failed:
 	if (dev->bus)
 		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
@@ -968,7 +968,7 @@ static void __device_release_driver(struct device *dev, struct device *parent)
 			drv->remove(dev);
 
 		device_links_driver_cleanup(dev);
-		dma_deconfigure(dev);
+		arch_teardown_dma_ops(dev);
 
 		devres_release_all(dev);
 		dev->driver = NULL;
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 5957cd4fa262..c7fa5a9697c9 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -170,18 +170,6 @@ int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
 }
 EXPORT_SYMBOL_GPL(of_dma_configure);
 
-/**
- * of_dma_deconfigure - Clean up DMA configuration
- * @dev:	Device for which to clean up DMA configuration
- *
- * Clean up all configuration performed by of_dma_configure_ops() and free all
- * resources that have been allocated.
- */
-void of_dma_deconfigure(struct device *dev)
-{
-	arch_teardown_dma_ops(dev);
-}
-
 int of_device_register(struct platform_device *pdev)
 {
 	device_initialize(&pdev->dev);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index ba4dd54f2c82..53600f527a70 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -595,7 +595,6 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev);
 int acpi_dma_get_range(struct device *dev, u64 *dma_addr, u64 *offset,
 		       u64 *size);
 int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr);
-void acpi_dma_deconfigure(struct device *dev);
 
 struct acpi_device *acpi_find_child_device(struct acpi_device *parent,
 					   u64 address, bool check_children);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index de8d3d3fa651..af4628979d13 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -831,8 +831,6 @@ static inline int acpi_dma_configure(struct device *dev,
 	return 0;
 }
 
-static inline void acpi_dma_deconfigure(struct device *dev) { }
-
 #define ACPI_PTR(_ptr)	(NULL)
 
 static inline void acpi_device_set_enumerated(struct acpi_device *adev)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 1c6c7c09bcf2..1423b69f3cc9 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -753,12 +753,6 @@ dma_mark_declared_memory_occupied(struct device *dev,
 }
 #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */
 
-#ifdef CONFIG_HAS_DMA
-void dma_deconfigure(struct device *dev);
-#else
-static inline void dma_deconfigure(struct device *dev) {}
-#endif
-
 /*
  * Managed DMA API
  */
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 165fd302b442..8d31e39dd564 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -58,7 +58,6 @@ static inline struct device_node *of_cpu_device_node_get(int cpu)
 int of_dma_configure(struct device *dev,
 		     struct device_node *np,
 		     bool force_dma);
-void of_dma_deconfigure(struct device *dev);
 #else /* CONFIG_OF */
 
 static inline int of_driver_match_device(struct device *dev,
@@ -113,8 +112,6 @@ static inline int of_dma_configure(struct device *dev,
 {
 	return 0;
 }
-static inline void of_dma_deconfigure(struct device *dev)
-{}
 #endif /* CONFIG_OF */
 
 #endif /* _LINUX_OF_DEVICE_H */
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 25607ceb4a50..3540cb399bd2 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -327,9 +327,3 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
 	vunmap(cpu_addr);
 }
 #endif
-
-void dma_deconfigure(struct device *dev)
-{
-	of_dma_deconfigure(dev);
-	acpi_dma_deconfigure(dev);
-}
-- 
cgit v1.2.3


From 46053c73685411915d3de50c5a0045beef32806b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Aug 2018 10:29:02 +0200
Subject: dma-mapping: clear dev->dma_ops in arch_teardown_dma_ops

There is no reason to leave the per-device dma_ops around when
deconfiguring a device, so move this code from arm64 into the
common code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
---
 arch/arm64/include/asm/dma-mapping.h | 5 -----
 arch/arm64/mm/dma-mapping.c          | 5 -----
 include/linux/dma-mapping.h          | 5 ++++-
 3 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index b7847eb8a7bb..0a2d13332545 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -39,11 +39,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent);
 #define arch_setup_dma_ops	arch_setup_dma_ops
 
-#ifdef CONFIG_IOMMU_DMA
-void arch_teardown_dma_ops(struct device *dev);
-#define arch_teardown_dma_ops	arch_teardown_dma_ops
-#endif
-
 /* do not use this function in a driver */
 static inline bool is_device_dma_coherent(struct device *dev)
 {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 072c51fb07d7..cdcb73db9ea2 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -862,11 +862,6 @@ out_err:
 		 dev_name(dev));
 }
 
-void arch_teardown_dma_ops(struct device *dev)
-{
-	dev->dma_ops = NULL;
-}
-
 #else
 
 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 1423b69f3cc9..eafd6f318e78 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -664,7 +664,10 @@ static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
 #endif
 
 #ifndef arch_teardown_dma_ops
-static inline void arch_teardown_dma_ops(struct device *dev) { }
+static inline void arch_teardown_dma_ops(struct device *dev)
+{
+	dev->dma_ops = NULL;
+}
 #endif
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
-- 
cgit v1.2.3


From 02ad0437decf2e5dba975c23b1a89775f4b211e1 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 3 Sep 2018 12:55:30 -0700
Subject: gpio: fix kernel-doc notation warning for 'request_key'

Fix kernel-doc warning for missing struct member 'request_key':

../include/linux/gpio/driver.h:142: warning: Function parameter or member 'request_key' not described in 'gpio_irq_chip'

Fixes: 39c3fd58952d ("kernel/irq: Extend lockdep class for request mutex")

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: linux-gpio@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/gpio/driver.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 0ea328e71ec9..4a4f410b5285 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -66,9 +66,15 @@ struct gpio_irq_chip {
 	/**
 	 * @lock_key:
 	 *
-	 * Per GPIO IRQ chip lockdep classes.
+	 * Per GPIO IRQ chip lockdep class for IRQ lock.
 	 */
 	struct lock_class_key *lock_key;
+
+	/**
+	 * @request_key:
+	 *
+	 * Per GPIO IRQ chip lockdep class for IRQ request.
+	 */
 	struct lock_class_key *request_key;
 
 	/**
-- 
cgit v1.2.3


From 4e6b823867e2b8afc2b33740ba930e50b1f92421 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Sat, 8 Sep 2018 11:23:14 +0200
Subject: gpiolib: export gpiochip_irq_reqres/relres()

GPIO drivers that do not use GPIOLIB_IRQCHIP can hook these into
the irq_request_resource and irq_release_resource callbacks of the
irq_chip so they correctly 'get' the module and lock the gpio line
for IRQ use.

This will simplify driver code.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 55 +++++++++++++++++++++++++++------------------
 include/linux/gpio/driver.h |  2 ++
 2 files changed, 35 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e8f8a1999393..cbab0e744de0 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1804,39 +1804,26 @@ static const struct irq_domain_ops gpiochip_domain_ops = {
 	.xlate	= irq_domain_xlate_twocell,
 };
 
-static int gpiochip_irq_reqres(struct irq_data *d)
+static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
 {
-	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
-	int ret;
-
-	if (!try_module_get(chip->gpiodev->owner))
-		return -ENODEV;
+	if (!gpiochip_irqchip_irq_valid(chip, offset))
+		return -ENXIO;
 
-	ret = gpiochip_lock_as_irq(chip, d->hwirq);
-	if (ret) {
-		chip_err(chip,
-			"unable to lock HW IRQ %lu for IRQ\n",
-			d->hwirq);
-		module_put(chip->gpiodev->owner);
-		return ret;
-	}
-	return 0;
+	return irq_create_mapping(chip->irq.domain, offset);
 }
 
-static void gpiochip_irq_relres(struct irq_data *d)
+static int gpiochip_irq_reqres(struct irq_data *d)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
 
-	gpiochip_unlock_as_irq(chip, d->hwirq);
-	module_put(chip->gpiodev->owner);
+	return gpiochip_reqres_irq(chip, d->hwirq);
 }
 
-static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
+static void gpiochip_irq_relres(struct irq_data *d)
 {
-	if (!gpiochip_irqchip_irq_valid(chip, offset))
-		return -ENXIO;
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
 
-	return irq_create_mapping(chip->irq.domain, offset);
+	gpiochip_relres_irq(chip, d->hwirq);
 }
 
 /**
@@ -3338,6 +3325,30 @@ bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset)
 }
 EXPORT_SYMBOL_GPL(gpiochip_line_is_irq);
 
+int gpiochip_reqres_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	int ret;
+
+	if (!try_module_get(chip->gpiodev->owner))
+		return -ENODEV;
+
+	ret = gpiochip_lock_as_irq(chip, offset);
+	if (ret) {
+		chip_err(chip, "unable to lock HW IRQ %u for IRQ\n", offset);
+		module_put(chip->gpiodev->owner);
+		return ret;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gpiochip_reqres_irq);
+
+void gpiochip_relres_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	gpiochip_unlock_as_irq(chip, offset);
+	module_put(chip->gpiodev->owner);
+}
+EXPORT_SYMBOL_GPL(gpiochip_relres_irq);
+
 bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset)
 {
 	if (offset >= chip->ngpio)
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 4a4f410b5285..479f1ccaaebd 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -401,6 +401,8 @@ extern struct gpio_chip *gpiochip_find(void *data,
 int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
 void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset);
+int gpiochip_reqres_irq(struct gpio_chip *chip, unsigned int offset);
+void gpiochip_relres_irq(struct gpio_chip *chip, unsigned int offset);
 
 /* Line status inquiry for drivers */
 bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset);
-- 
cgit v1.2.3


From 4e9439ddacea06f35acce4d374bf6bd0acf99bc8 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Sat, 8 Sep 2018 11:23:16 +0200
Subject: gpiolib: add flag to indicate if the irq is disabled

GPIO drivers call gpiochip_(un)lock_as_irq whenever they want to use a gpio
as an interrupt. This is done when the irq is requested and it marks the
gpio as in use by an interrupt.

This is problematic for cases where a gpio pin is used as an interrupt
pin, then, after the irq is disabled, is used as a regular gpio pin.
Currently it is not possible to do this other than by first freeing
the interrupt so gpiochip_unlock_as_irq is called, since an attempt to
switch the gpio direction for output will fail since gpiolib believes
that the gpio is in use for an interrupt and it does not know that it
the irq is actually disabled.

There are currently two drivers that would like to be able to do this:
the tda998x_drv.c driver where a regular gpio pin needs to be temporarily
reconfigured as an interrupt pin during CEC calibration, and the cec-gpio
driver where you want to configure the gpio pin as an interrupt while
waiting for traffic over the CEC bus, or as a regular pin when receiving or
transmitting a CEC message.

The solution is to add a new flag that is set when the irq is enabled,
and have gpiod_direction_output check for that flag.

We also add functions that drivers that do not use GPIOLIB_IRQCHIP
can call when they enable/disable the irq.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 29 +++++++++++++++++++++++++++--
 drivers/gpio/gpiolib.h      |  1 +
 include/linux/gpio/driver.h |  2 ++
 3 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index b17f1142c70b..e52fa72f13d7 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2588,8 +2588,9 @@ int gpiod_direction_output(struct gpio_desc *desc, int value)
 	else
 		value = !!value;
 
-	/* GPIOs used for IRQs shall not be set as output */
-	if (test_bit(FLAG_USED_AS_IRQ, &desc->flags)) {
+	/* GPIOs used for enabled IRQs shall not be set as output */
+	if (test_bit(FLAG_USED_AS_IRQ, &desc->flags) &&
+	    test_bit(FLAG_IRQ_IS_ENABLED, &desc->flags)) {
 		gpiod_err(desc,
 			  "%s: tried to set a GPIO tied to an IRQ as output\n",
 			  __func__);
@@ -3276,6 +3277,7 @@ int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 	}
 
 	set_bit(FLAG_USED_AS_IRQ, &desc->flags);
+	set_bit(FLAG_IRQ_IS_ENABLED, &desc->flags);
 
 	/*
 	 * If the consumer has not set up a label (such as when the
@@ -3306,6 +3308,7 @@ void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 		return;
 
 	clear_bit(FLAG_USED_AS_IRQ, &desc->flags);
+	clear_bit(FLAG_IRQ_IS_ENABLED, &desc->flags);
 
 	/* If we only had this marking, erase it */
 	if (desc->label && !strcmp(desc->label, "interrupt"))
@@ -3313,6 +3316,28 @@ void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 }
 EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq);
 
+void gpiochip_disable_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	struct gpio_desc *desc = gpiochip_get_desc(chip, offset);
+
+	if (!IS_ERR(desc) &&
+	    !WARN_ON(!test_bit(FLAG_USED_AS_IRQ, &desc->flags)))
+		clear_bit(FLAG_IRQ_IS_ENABLED, &desc->flags);
+}
+EXPORT_SYMBOL_GPL(gpiochip_disable_irq);
+
+void gpiochip_enable_irq(struct gpio_chip *chip, unsigned int offset)
+{
+	struct gpio_desc *desc = gpiochip_get_desc(chip, offset);
+
+	if (!IS_ERR(desc) &&
+	    !WARN_ON(!test_bit(FLAG_USED_AS_IRQ, &desc->flags))) {
+		WARN_ON(test_bit(FLAG_IS_OUT, &desc->flags));
+		set_bit(FLAG_IRQ_IS_ENABLED, &desc->flags);
+	}
+}
+EXPORT_SYMBOL_GPL(gpiochip_enable_irq);
+
 bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset)
 {
 	if (offset >= chip->ngpio)
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index a7e49fef73d4..e9a86f4b00e3 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -214,6 +214,7 @@ struct gpio_desc {
 #define FLAG_OPEN_DRAIN	7	/* Gpio is open drain type */
 #define FLAG_OPEN_SOURCE 8	/* Gpio is open source type */
 #define FLAG_USED_AS_IRQ 9	/* GPIO is connected to an IRQ */
+#define FLAG_IRQ_IS_ENABLED 10	/* GPIO is connected to an enabled IRQ */
 #define FLAG_IS_HOGGED	11	/* GPIO is hogged */
 #define FLAG_TRANSITORY 12	/* GPIO may lose value in sleep or reset */
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 479f1ccaaebd..9052ccd399fd 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -403,6 +403,8 @@ void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 bool gpiochip_line_is_irq(struct gpio_chip *chip, unsigned int offset);
 int gpiochip_reqres_irq(struct gpio_chip *chip, unsigned int offset);
 void gpiochip_relres_irq(struct gpio_chip *chip, unsigned int offset);
+void gpiochip_disable_irq(struct gpio_chip *chip, unsigned int offset);
+void gpiochip_enable_irq(struct gpio_chip *chip, unsigned int offset);
 
 /* Line status inquiry for drivers */
 bool gpiochip_line_is_open_drain(struct gpio_chip *chip, unsigned int offset);
-- 
cgit v1.2.3


From 461c1a7d4733d1dfd5c47b040cf32a5e7eefbc6c Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Sat, 8 Sep 2018 11:23:17 +0200
Subject: gpiolib: override irq_enable/disable

When using the gpiolib irqchip helpers install irq_enable/disable
hooks for the irqchip to ensure that gpiolib knows when the irq
is enabled or disabled, allowing drivers to disable the irq and then
use it as an output pin, and later switch the direction to input and
re-enable the irq.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 43 +++++++++++++++++++++++++++++++++++++++----
 include/linux/gpio/driver.h | 14 ++++++++++++++
 2 files changed, 53 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e52fa72f13d7..efce534a269b 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1826,6 +1826,28 @@ static void gpiochip_irq_relres(struct irq_data *d)
 	gpiochip_relres_irq(chip, d->hwirq);
 }
 
+static void gpiochip_irq_enable(struct irq_data *d)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+	gpiochip_enable_irq(chip, d->hwirq);
+	if (chip->irq.irq_enable)
+		chip->irq.irq_enable(d);
+	else
+		chip->irq.chip->irq_unmask(d);
+}
+
+static void gpiochip_irq_disable(struct irq_data *d)
+{
+	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+	if (chip->irq.irq_disable)
+		chip->irq.irq_disable(d);
+	else
+		chip->irq.chip->irq_mask(d);
+	gpiochip_disable_irq(chip, d->hwirq);
+}
+
 static void gpiochip_set_irq_hooks(struct gpio_chip *gpiochip)
 {
 	struct irq_chip *irqchip = gpiochip->irq.chip;
@@ -1835,6 +1857,12 @@ static void gpiochip_set_irq_hooks(struct gpio_chip *gpiochip)
 		irqchip->irq_request_resources = gpiochip_irq_reqres;
 		irqchip->irq_release_resources = gpiochip_irq_relres;
 	}
+	if (WARN_ON(gpiochip->irq.irq_enable))
+		return;
+	gpiochip->irq.irq_enable = irqchip->irq_enable;
+	gpiochip->irq.irq_disable = irqchip->irq_disable;
+	irqchip->irq_enable = gpiochip_irq_enable;
+	irqchip->irq_disable = gpiochip_irq_disable;
 }
 
 /**
@@ -1954,11 +1982,18 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip)
 		irq_domain_remove(gpiochip->irq.domain);
 	}
 
-	if (irqchip &&
-	    irqchip->irq_request_resources == gpiochip_irq_reqres) {
-		irqchip->irq_request_resources = NULL;
-		irqchip->irq_release_resources = NULL;
+	if (irqchip) {
+		if (irqchip->irq_request_resources == gpiochip_irq_reqres) {
+			irqchip->irq_request_resources = NULL;
+			irqchip->irq_release_resources = NULL;
+		}
+		if (irqchip->irq_enable == gpiochip_irq_enable) {
+			irqchip->irq_enable = gpiochip->irq.irq_enable;
+			irqchip->irq_disable = gpiochip->irq.irq_disable;
+		}
 	}
+	gpiochip->irq.irq_enable = NULL;
+	gpiochip->irq.irq_disable = NULL;
 	gpiochip->irq.chip = NULL;
 
 	gpiochip_irqchip_free_valid_mask(gpiochip);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 9052ccd399fd..d8dcd0e44cab 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -144,6 +144,20 @@ struct gpio_irq_chip {
 	 * will allocate and map all IRQs during initialization.
 	 */
 	unsigned int first;
+
+	/**
+	 * @irq_enable:
+	 *
+	 * Store old irq_chip irq_enable callback
+	 */
+	void		(*irq_enable)(struct irq_data *data);
+
+	/**
+	 * @irq_disable:
+	 *
+	 * Store old irq_chip irq_disable callback
+	 */
+	void		(*irq_disable)(struct irq_data *data);
 };
 
 static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip)
-- 
cgit v1.2.3


From 05484e0984487d42e97c417cbb0697fa9d16e7e9 Mon Sep 17 00:00:00 2001
From: Morten Rasmussen <morten.rasmussen@arm.com>
Date: Fri, 20 Jul 2018 14:32:31 +0100
Subject: sched/topology: Add SD_ASYM_CPUCAPACITY flag detection

The SD_ASYM_CPUCAPACITY sched_domain flag is supposed to mark the
sched_domain in the hierarchy where all CPU capacities are visible for
any CPU's point of view on asymmetric CPU capacity systems. The
scheduler can then take to take capacity asymmetry into account when
balancing at this level. It also serves as an indicator for how wide
task placement heuristics have to search to consider all available CPU
capacities as asymmetric systems might often appear symmetric at
smallest level(s) of the sched_domain hierarchy.

The flag has been around for while but so far only been set by
out-of-tree code in Android kernels. One solution is to let each
architecture provide the flag through a custom sched_domain topology
array and associated mask and flag functions. However,
SD_ASYM_CPUCAPACITY is special in the sense that it depends on the
capacity and presence of all CPUs in the system, i.e. when hotplugging
all CPUs out except those with one particular CPU capacity the flag
should disappear even if the sched_domains don't collapse. Similarly,
the flag is affected by cpusets where load-balancing is turned off.
Detecting when the flags should be set therefore depends not only on
topology information but also the cpuset configuration and hotplug
state. The arch code doesn't have easy access to the cpuset
configuration.

Instead, this patch implements the flag detection in generic code where
cpusets and hotplug state is already taken care of. All the arch is
responsible for is to implement arch_scale_cpu_capacity() and force a
full rebuild of the sched_domain hierarchy if capacities are updated,
e.g. later in the boot process when cpufreq has initialized.

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dietmar.eggemann@arm.com
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Link: http://lkml.kernel.org/r/1532093554-30504-2-git-send-email-morten.rasmussen@arm.com
[ Fixed 'CPU' capitalization. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched/topology.h |  6 ++--
 kernel/sched/topology.c        | 81 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 78 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 26347741ba50..6b9976180c1e 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -23,10 +23,10 @@
 #define SD_BALANCE_FORK		0x0008	/* Balance on fork, clone */
 #define SD_BALANCE_WAKE		0x0010  /* Balance on wakeup */
 #define SD_WAKE_AFFINE		0x0020	/* Wake task to waking CPU */
-#define SD_ASYM_CPUCAPACITY	0x0040  /* Groups have different max cpu capacities */
-#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share cpu capacity */
+#define SD_ASYM_CPUCAPACITY	0x0040  /* Domain members have different CPU capacities */
+#define SD_SHARE_CPUCAPACITY	0x0080	/* Domain members share CPU capacity */
 #define SD_SHARE_POWERDOMAIN	0x0100	/* Domain members share power domain */
-#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share cpu pkg resources */
+#define SD_SHARE_PKG_RESOURCES	0x0200	/* Domain members share CPU pkg resources */
 #define SD_SERIALIZE		0x0400	/* Only a single load balancing instance */
 #define SD_ASYM_PACKING		0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING	0x1000	/* Prefer to place tasks in a sibling domain */
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 505a41c42b96..5c4d583d53ee 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1061,7 +1061,6 @@ static struct cpumask		***sched_domains_numa_masks;
  *   SD_SHARE_PKG_RESOURCES - describes shared caches
  *   SD_NUMA                - describes NUMA topologies
  *   SD_SHARE_POWERDOMAIN   - describes shared power domain
- *   SD_ASYM_CPUCAPACITY    - describes mixed capacity topologies
  *
  * Odd one out, which beside describing the topology has a quirk also
  * prescribes the desired behaviour that goes along with it:
@@ -1073,13 +1072,12 @@ static struct cpumask		***sched_domains_numa_masks;
 	 SD_SHARE_PKG_RESOURCES |	\
 	 SD_NUMA		|	\
 	 SD_ASYM_PACKING	|	\
-	 SD_ASYM_CPUCAPACITY	|	\
 	 SD_SHARE_POWERDOMAIN)
 
 static struct sched_domain *
 sd_init(struct sched_domain_topology_level *tl,
 	const struct cpumask *cpu_map,
-	struct sched_domain *child, int cpu)
+	struct sched_domain *child, int dflags, int cpu)
 {
 	struct sd_data *sdd = &tl->data;
 	struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -1100,6 +1098,9 @@ sd_init(struct sched_domain_topology_level *tl,
 			"wrong sd_flags in topology description\n"))
 		sd_flags &= ~TOPOLOGY_SD_FLAGS;
 
+	/* Apply detected topology flags */
+	sd_flags |= dflags;
+
 	*sd = (struct sched_domain){
 		.min_interval		= sd_weight,
 		.max_interval		= 2*sd_weight,
@@ -1604,9 +1605,9 @@ static void __sdt_free(const struct cpumask *cpu_map)
 
 static struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
 		const struct cpumask *cpu_map, struct sched_domain_attr *attr,
-		struct sched_domain *child, int cpu)
+		struct sched_domain *child, int dflags, int cpu)
 {
-	struct sched_domain *sd = sd_init(tl, cpu_map, child, cpu);
+	struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
 
 	if (child) {
 		sd->level = child->level + 1;
@@ -1632,6 +1633,65 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
 	return sd;
 }
 
+/*
+ * Find the sched_domain_topology_level where all CPU capacities are visible
+ * for all CPUs.
+ */
+static struct sched_domain_topology_level
+*asym_cpu_capacity_level(const struct cpumask *cpu_map)
+{
+	int i, j, asym_level = 0;
+	bool asym = false;
+	struct sched_domain_topology_level *tl, *asym_tl = NULL;
+	unsigned long cap;
+
+	/* Is there any asymmetry? */
+	cap = arch_scale_cpu_capacity(NULL, cpumask_first(cpu_map));
+
+	for_each_cpu(i, cpu_map) {
+		if (arch_scale_cpu_capacity(NULL, i) != cap) {
+			asym = true;
+			break;
+		}
+	}
+
+	if (!asym)
+		return NULL;
+
+	/*
+	 * Examine topology from all CPU's point of views to detect the lowest
+	 * sched_domain_topology_level where a highest capacity CPU is visible
+	 * to everyone.
+	 */
+	for_each_cpu(i, cpu_map) {
+		unsigned long max_capacity = arch_scale_cpu_capacity(NULL, i);
+		int tl_id = 0;
+
+		for_each_sd_topology(tl) {
+			if (tl_id < asym_level)
+				goto next_level;
+
+			for_each_cpu_and(j, tl->mask(i), cpu_map) {
+				unsigned long capacity;
+
+				capacity = arch_scale_cpu_capacity(NULL, j);
+
+				if (capacity <= max_capacity)
+					continue;
+
+				max_capacity = capacity;
+				asym_level = tl_id;
+				asym_tl = tl;
+			}
+next_level:
+			tl_id++;
+		}
+	}
+
+	return asym_tl;
+}
+
+
 /*
  * Build sched domains for a given set of CPUs and attach the sched domains
  * to the individual CPUs
@@ -1644,18 +1704,27 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 	struct s_data d;
 	struct rq *rq = NULL;
 	int i, ret = -ENOMEM;
+	struct sched_domain_topology_level *tl_asym;
 
 	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
 	if (alloc_state != sa_rootdomain)
 		goto error;
 
+	tl_asym = asym_cpu_capacity_level(cpu_map);
+
 	/* Set up domains for CPUs specified by the cpu_map: */
 	for_each_cpu(i, cpu_map) {
 		struct sched_domain_topology_level *tl;
 
 		sd = NULL;
 		for_each_sd_topology(tl) {
-			sd = build_sched_domain(tl, cpu_map, attr, sd, i);
+			int dflags = 0;
+
+			if (tl == tl_asym)
+				dflags |= SD_ASYM_CPUCAPACITY;
+
+			sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
+
 			if (tl == sched_domain_topology)
 				*per_cpu_ptr(d.sd, i) = sd;
 			if (tl->flags & SDTL_OVERLAP)
-- 
cgit v1.2.3


From bb1fbdd3c3fd12b612c7d8cdf13bd6bfeebdefa3 Mon Sep 17 00:00:00 2001
From: Morten Rasmussen <morten.rasmussen@arm.com>
Date: Fri, 20 Jul 2018 14:32:32 +0100
Subject: sched/topology, drivers/base/arch_topology: Rebuild the sched_domain
 hierarchy when capacities change

The setting of SD_ASYM_CPUCAPACITY depends on the per-CPU capacities.
These might not have their final values when the hierarchy is initially
built as the values depend on cpufreq to be initialized or the values
being set through sysfs. To ensure that the flags are set correctly we
need to rebuild the sched_domain hierarchy whenever the reported per-CPU
capacity (arch_scale_cpu_capacity()) changes.

This patch ensure that a full sched_domain rebuild happens when CPU
capacity changes occur.

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dietmar.eggemann@arm.com
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Link: http://lkml.kernel.org/r/1532093554-30504-3-git-send-email-morten.rasmussen@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 drivers/base/arch_topology.c  | 26 ++++++++++++++++++++++++++
 include/linux/arch_topology.h |  1 +
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index e7cb0c6ade81..edfcf8d982e4 100644
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/sched/topology.h>
+#include <linux/cpuset.h>
 
 DEFINE_PER_CPU(unsigned long, freq_scale) = SCHED_CAPACITY_SCALE;
 
@@ -47,6 +48,9 @@ static ssize_t cpu_capacity_show(struct device *dev,
 	return sprintf(buf, "%lu\n", topology_get_cpu_scale(NULL, cpu->dev.id));
 }
 
+static void update_topology_flags_workfn(struct work_struct *work);
+static DECLARE_WORK(update_topology_flags_work, update_topology_flags_workfn);
+
 static ssize_t cpu_capacity_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf,
@@ -72,6 +76,8 @@ static ssize_t cpu_capacity_store(struct device *dev,
 		topology_set_cpu_scale(i, new_capacity);
 	mutex_unlock(&cpu_scale_mutex);
 
+	schedule_work(&update_topology_flags_work);
+
 	return count;
 }
 
@@ -96,6 +102,25 @@ static int register_cpu_capacity_sysctl(void)
 }
 subsys_initcall(register_cpu_capacity_sysctl);
 
+static int update_topology;
+
+int topology_update_cpu_topology(void)
+{
+	return update_topology;
+}
+
+/*
+ * Updating the sched_domains can't be done directly from cpufreq callbacks
+ * due to locking, so queue the work for later.
+ */
+static void update_topology_flags_workfn(struct work_struct *work)
+{
+	update_topology = 1;
+	rebuild_sched_domains();
+	pr_debug("sched_domain hierarchy rebuilt, flags updated\n");
+	update_topology = 0;
+}
+
 static u32 capacity_scale;
 static u32 *raw_capacity;
 
@@ -201,6 +226,7 @@ init_cpu_capacity_callback(struct notifier_block *nb,
 
 	if (cpumask_empty(cpus_to_visit)) {
 		topology_normalize_cpu_scale();
+		schedule_work(&update_topology_flags_work);
 		free_raw_capacity();
 		pr_debug("cpu_capacity: parsing done\n");
 		schedule_work(&parsing_done_work);
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index 2b709416de05..d9bdc1a7f4e7 100644
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -9,6 +9,7 @@
 #include <linux/percpu.h>
 
 void topology_normalize_cpu_scale(void);
+int topology_update_cpu_topology(void);
 
 struct device_node;
 bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu);
-- 
cgit v1.2.3


From 925b9cd1b89a94b7124d128c80dfc48f78a63098 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 6 Sep 2018 16:18:34 -0400
Subject: locking/rwsem: Make owner store task pointer of last owning reader

Currently, when a reader acquires a lock, it only sets the
RWSEM_READER_OWNED bit in the owner field. The other bits are simply
not used. When debugging hanging cases involving rwsems and readers,
the owner value does not provide much useful information at all.

This patch modifies the current behavior to always store the task_struct
pointer of the last rwsem-acquiring reader in a reader-owned rwsem. This
may be useful in debugging rwsem hanging cases especially if only one
reader is involved. However, the task in the owner field may not the
real owner or one of the real owners at all when the owner value is
examined, for example, in a crash dump. So it is just an additional
hint about the past history.

If CONFIG_DEBUG_RWSEMS=y is enabled, the owner field will be checked at
unlock time too to make sure the task pointer value is valid. That does
have a slight performance cost and so is only enabled as part of that
debug option.

From the performance point of view, it is expected that the changes
shouldn't have any noticeable performance impact. A rwsem microbenchmark
(with 48 worker threads and 1:1 reader/writer ratio) was ran on a
2-socket 24-core 48-thread Haswell system.  The locking rates on a
4.19-rc1 based kernel were as follows:

  1) Unpatched kernel:				543.3 kops/s
  2) Patched kernel:				549.2 kops/s
  3) Patched kernel (CONFIG_DEBUG_RWSEMS on):	546.6 kops/s

There was actually a slight increase in performance (1.1%) in this
particular case. Maybe it was caused by the elimination of a branch or
just a testing noise. Turning on the CONFIG_DEBUG_RWSEMS option also
had less than the expected impact on performance.

The least significant 2 bits of the owner value are now used to designate
the rwsem is readers owned and the owners are anonymous.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1536265114-10842-1-git-send-email-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/rwsem.h       |  4 +-
 kernel/locking/rwsem-xadd.c |  2 +-
 kernel/locking/rwsem.c      |  7 ++--
 kernel/locking/rwsem.h      | 95 +++++++++++++++++++++++++++++++++------------
 4 files changed, 78 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index ab93b6eae696..67dbb57508b1 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -45,10 +45,10 @@ struct rw_semaphore {
 };
 
 /*
- * Setting bit 0 of the owner field with other non-zero bits will indicate
+ * Setting bit 1 of the owner field but not bit 0 will indicate
  * that the rwsem is writer-owned with an unknown owner.
  */
-#define RWSEM_OWNER_UNKNOWN	((struct task_struct *)-1L)
+#define RWSEM_OWNER_UNKNOWN	((struct task_struct *)-2L)
 
 extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
 extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index 01fcb807598c..09b180063ee1 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -180,7 +180,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
 		 * but it gives the spinners an early indication that the
 		 * readers now have the lock.
 		 */
-		rwsem_set_reader_owned(sem);
+		__rwsem_set_reader_owned(sem, waiter->task);
 	}
 
 	/*
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 776308d2fa9e..e586f0d03ad3 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -117,8 +117,9 @@ EXPORT_SYMBOL(down_write_trylock);
 void up_read(struct rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
-	DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);
+	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
 
+	rwsem_clear_reader_owned(sem);
 	__up_read(sem);
 }
 
@@ -181,7 +182,7 @@ void down_read_non_owner(struct rw_semaphore *sem)
 	might_sleep();
 
 	__down_read(sem);
-	rwsem_set_reader_owned(sem);
+	__rwsem_set_reader_owned(sem, NULL);
 }
 
 EXPORT_SYMBOL(down_read_non_owner);
@@ -215,7 +216,7 @@ EXPORT_SYMBOL(down_write_killable_nested);
 
 void up_read_non_owner(struct rw_semaphore *sem)
 {
-	DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED);
+	DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
 	__up_read(sem);
 }
 
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index b9d0e72aa80f..bad2bca0268b 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -1,24 +1,30 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * The owner field of the rw_semaphore structure will be set to
- * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear
- * the owner field when it unlocks. A reader, on the other hand, will
- * not touch the owner field when it unlocks.
+ * The least significant 2 bits of the owner value has the following
+ * meanings when set.
+ *  - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers
+ *  - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned,
+ *    i.e. the owner(s) cannot be readily determined. It can be reader
+ *    owned or the owning writer is indeterminate.
  *
- * In essence, the owner field now has the following 4 states:
- *  1) 0
- *     - lock is free or the owner hasn't set the field yet
- *  2) RWSEM_READER_OWNED
- *     - lock is currently or previously owned by readers (lock is free
- *       or not set by owner yet)
- *  3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well
- *     - lock is owned by an anonymous writer, so spinning on the lock
- *       owner should be disabled.
- *  4) Other non-zero value
- *     - a writer owns the lock and other writers can spin on the lock owner.
+ * When a writer acquires a rwsem, it puts its task_struct pointer
+ * into the owner field. It is cleared after an unlock.
+ *
+ * When a reader acquires a rwsem, it will also puts its task_struct
+ * pointer into the owner field with both the RWSEM_READER_OWNED and
+ * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will
+ * largely be left untouched. So for a free or reader-owned rwsem,
+ * the owner value may contain information about the last reader that
+ * acquires the rwsem. The anonymous bit is set because that particular
+ * reader may or may not still own the lock.
+ *
+ * That information may be helpful in debugging cases where the system
+ * seems to hang on a reader owned rwsem especially if only one reader
+ * is involved. Ideally we would like to track all the readers that own
+ * a rwsem, but the overhead is simply too big.
  */
-#define RWSEM_ANONYMOUSLY_OWNED	(1UL << 0)
-#define RWSEM_READER_OWNED	((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED)
+#define RWSEM_READER_OWNED	(1UL << 0)
+#define RWSEM_ANONYMOUSLY_OWNED	(1UL << 1)
 
 #ifdef CONFIG_DEBUG_RWSEMS
 # define DEBUG_RWSEMS_WARN_ON(c)	DEBUG_LOCKS_WARN_ON(c)
@@ -44,15 +50,26 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem)
 	WRITE_ONCE(sem->owner, NULL);
 }
 
+/*
+ * The task_struct pointer of the last owning reader will be left in
+ * the owner field.
+ *
+ * Note that the owner value just indicates the task has owned the rwsem
+ * previously, it may not be the real owner or one of the real owners
+ * anymore when that field is examined, so take it with a grain of salt.
+ */
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
+					    struct task_struct *owner)
+{
+	unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED
+						 | RWSEM_ANONYMOUSLY_OWNED;
+
+	WRITE_ONCE(sem->owner, (struct task_struct *)val);
+}
+
 static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
 {
-	/*
-	 * We check the owner value first to make sure that we will only
-	 * do a write to the rwsem cacheline when it is really necessary
-	 * to minimize cacheline contention.
-	 */
-	if (READ_ONCE(sem->owner) != RWSEM_READER_OWNED)
-		WRITE_ONCE(sem->owner, RWSEM_READER_OWNED);
+	__rwsem_set_reader_owned(sem, current);
 }
 
 /*
@@ -72,6 +89,25 @@ static inline bool rwsem_has_anonymous_owner(struct task_struct *owner)
 {
 	return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED;
 }
+
+#ifdef CONFIG_DEBUG_RWSEMS
+/*
+ * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
+ * is a task pointer in owner of a reader-owned rwsem, it will be the
+ * real owner or one of the real owners. The only exception is when the
+ * unlock is done by up_read_non_owner().
+ */
+#define rwsem_clear_reader_owned rwsem_clear_reader_owned
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
+{
+	unsigned long val = (unsigned long)current | RWSEM_READER_OWNED
+						   | RWSEM_ANONYMOUSLY_OWNED;
+	if (READ_ONCE(sem->owner) == (struct task_struct *)val)
+		cmpxchg_relaxed((unsigned long *)&sem->owner, val,
+				RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED);
+}
+#endif
+
 #else
 static inline void rwsem_set_owner(struct rw_semaphore *sem)
 {
@@ -81,7 +117,18 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem)
 {
 }
 
+static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
+					   struct task_struct *owner)
+{
+}
+
 static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
 {
 }
 #endif
+
+#ifndef rwsem_clear_reader_owned
+static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
+{
+}
+#endif
-- 
cgit v1.2.3


From 719cf71cada1abc4f8727eac918b639a4a502a59 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 20 Aug 2018 16:45:41 +0300
Subject: ACPI / glue: Split dev_is_platform() out of module for wide use

There would be useful to have in future the similar API in platform
core, as we have, for example, for PCI subsystem, to check if device
belongs to it.

Thus, split out conditional to a macro dev_is_platform() for wide use.

No functional change intended.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/glue.c             | 2 +-
 include/linux/platform_device.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 3be1433853bf..12ba2bee8789 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -320,7 +320,7 @@ static int acpi_platform_notify(struct device *dev)
 	if (!adev)
 		goto out;
 
-	if (dev->bus == &platform_bus_type)
+	if (dev_is_platform(dev))
 		acpi_configure_pmsi_domain(dev);
 
 	if (type && type->setup)
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 1a9f38f27f65..c7c081dc6034 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -40,6 +40,7 @@ struct platform_device {
 
 #define platform_get_device_id(pdev)	((pdev)->id_entry)
 
+#define dev_is_platform(dev) ((dev)->bus == &platform_bus_type)
 #define to_platform_device(x) container_of((x), struct platform_device, dev)
 
 extern int platform_device_register(struct platform_device *);
-- 
cgit v1.2.3


From d31ca7e5d0fa066d59540859af32ab6f95adb519 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Thu, 30 Aug 2018 14:20:04 +0100
Subject: mfd: madera: Add irqchip data pointer into struct madera

Put the pointer to struct regmap_irq_chip_data into the parent
mfd structure so that the child irqchip driver does not need
a trivial private structure to store only this pointer. As
the irqchip child driver already has a pointer to the parent
struct madera it can use that to store the pointer. This also
means that the irqchip driver does not need a double-indirection
from its local struct to get at the parent struct madera.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/madera/core.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/madera/core.h b/include/linux/mfd/madera/core.h
index c332681848ef..fe69c0f4398f 100644
--- a/include/linux/mfd/madera/core.h
+++ b/include/linux/mfd/madera/core.h
@@ -148,6 +148,7 @@ struct snd_soc_dapm_context;
  * @internal_dcvdd:	true if DCVDD is supplied from the internal LDO1
  * @pdata:		our pdata
  * @irq_dev:		the irqchip child driver device
+ * @irq_data:		pointer to irqchip data for the child irqchip driver
  * @irq:		host irq number from SPI or I2C configuration
  * @out_clamp:		indicates output clamp state for each analogue output
  * @out_shorted:	indicates short circuit state for each analogue output
@@ -175,6 +176,7 @@ struct madera {
 	struct madera_pdata pdata;
 
 	struct device *irq_dev;
+	struct regmap_irq_chip_data *irq_data;
 	int irq;
 
 	unsigned int num_micbias;
-- 
cgit v1.2.3


From a318c2432218d3cd189ec8228b8a795666899c2a Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 31 Aug 2018 14:34:02 +0200
Subject: mfd: da9063: Fix DT probing with constraints

Commit 1c892e38ce59 ("regulator: da9063: Handle less LDOs on DA9063L")
reordered the da9063_regulator_info[] array, but not the DA9063_ID_*
regulator ids and not the da9063_matches[] array, because ids are used
as indices in the array initializer. This mismatch between regulator id
and da9063_regulator_info[] array index causes the driver probe to fail
because constraints from DT are not applied to the correct regulator:

  da9063 0-0058: Device detected (chip-ID: 0x61, var-ID: 0x50)
  DA9063_BMEM: Bringing 900000uV into 3300000-3300000uV
  DA9063_LDO9: Bringing 3300000uV into 2500000-2500000uV
  DA9063_LDO1: Bringing 900000uV into 3300000-3300000uV
  DA9063_LDO1: failed to apply 3300000-3300000uV constraint(-22)

This patch reorders the DA9063_ID_* as apparently intended, and with
them the entries in the da90630_matches[] array.

Fixes: 1c892e38ce59 ("regulator: da9063: Handle less LDOs on DA9063L")
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/da9063/pdata.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/da9063/pdata.h b/include/linux/mfd/da9063/pdata.h
index 8a125701ef7b..50bed4f89c1a 100644
--- a/include/linux/mfd/da9063/pdata.h
+++ b/include/linux/mfd/da9063/pdata.h
@@ -21,7 +21,7 @@
 /*
  * Regulator configuration
  */
-/* DA9063 regulator IDs */
+/* DA9063 and DA9063L regulator IDs */
 enum {
 	/* BUCKs */
 	DA9063_ID_BCORE1,
@@ -37,18 +37,20 @@ enum {
 	DA9063_ID_BMEM_BIO_MERGED,
 	/* When two BUCKs are merged, they cannot be reused separately */
 
-	/* LDOs */
+	/* LDOs on both DA9063 and DA9063L */
+	DA9063_ID_LDO3,
+	DA9063_ID_LDO7,
+	DA9063_ID_LDO8,
+	DA9063_ID_LDO9,
+	DA9063_ID_LDO11,
+
+	/* DA9063-only LDOs */
 	DA9063_ID_LDO1,
 	DA9063_ID_LDO2,
-	DA9063_ID_LDO3,
 	DA9063_ID_LDO4,
 	DA9063_ID_LDO5,
 	DA9063_ID_LDO6,
-	DA9063_ID_LDO7,
-	DA9063_ID_LDO8,
-	DA9063_ID_LDO9,
 	DA9063_ID_LDO10,
-	DA9063_ID_LDO11,
 };
 
 /* Regulators platform data */
-- 
cgit v1.2.3


From 1a63fe9a2b1f47af5b2b7436b41824b14999c17a Mon Sep 17 00:00:00 2001
From: Quentin Perret <quentin.perret@arm.com>
Date: Mon, 10 Sep 2018 17:28:10 +0100
Subject: firmware: arm_scmi: add a getter for power of performance states

The SCMI protocol can be used to get power estimates from firmware
corresponding to each performance state of a device. Although these power
costs are already managed by the SCMI firmware driver, they are not
exposed to any external subsystem yet.

Fix this by adding a new get_power() interface to the exisiting perf_ops
defined for the SCMI protocol.

Signed-off-by: Quentin Perret <quentin.perret@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
---
 drivers/firmware/arm_scmi/perf.c | 28 ++++++++++++++++++++++++++++
 include/linux/scmi_protocol.h    |  4 ++++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 87c99d296ecd..3c8ae7cc35de 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -427,6 +427,33 @@ static int scmi_dvfs_freq_get(const struct scmi_handle *handle, u32 domain,
 	return ret;
 }
 
+static int scmi_dvfs_est_power_get(const struct scmi_handle *handle, u32 domain,
+				   unsigned long *freq, unsigned long *power)
+{
+	struct scmi_perf_info *pi = handle->perf_priv;
+	struct perf_dom_info *dom;
+	unsigned long opp_freq;
+	int idx, ret = -EINVAL;
+	struct scmi_opp *opp;
+
+	dom = pi->dom_info + domain;
+	if (!dom)
+		return -EIO;
+
+	for (opp = dom->opp, idx = 0; idx < dom->opp_count; idx++, opp++) {
+		opp_freq = opp->perf * dom->mult_factor;
+		if (opp_freq < *freq)
+			continue;
+
+		*freq = opp_freq;
+		*power = opp->power;
+		ret = 0;
+		break;
+	}
+
+	return ret;
+}
+
 static struct scmi_perf_ops perf_ops = {
 	.limits_set = scmi_perf_limits_set,
 	.limits_get = scmi_perf_limits_get,
@@ -437,6 +464,7 @@ static struct scmi_perf_ops perf_ops = {
 	.device_opps_add = scmi_dvfs_device_opps_add,
 	.freq_set = scmi_dvfs_freq_set,
 	.freq_get = scmi_dvfs_freq_get,
+	.est_power_get = scmi_dvfs_est_power_get,
 };
 
 static int scmi_perf_protocol_init(struct scmi_handle *handle)
diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h
index f4c9fc0fc755..3105055c00a7 100644
--- a/include/linux/scmi_protocol.h
+++ b/include/linux/scmi_protocol.h
@@ -91,6 +91,8 @@ struct scmi_clk_ops {
  *	to sustained performance level mapping
  * @freq_get: gets the frequency for a given device using sustained frequency
  *	to sustained performance level mapping
+ * @est_power_get: gets the estimated power cost for a given performance domain
+ *	at a given frequency
  */
 struct scmi_perf_ops {
 	int (*limits_set)(const struct scmi_handle *handle, u32 domain,
@@ -110,6 +112,8 @@ struct scmi_perf_ops {
 			unsigned long rate, bool poll);
 	int (*freq_get)(const struct scmi_handle *handle, u32 domain,
 			unsigned long *rate, bool poll);
+	int (*est_power_get)(const struct scmi_handle *handle, u32 domain,
+			     unsigned long *rate, unsigned long *power);
 };
 
 /**
-- 
cgit v1.2.3


From 8b69bd7d8a8927d537f134c37bcca6cbfa58e1b2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 11 Aug 2018 18:43:38 -0700
Subject: ppp: Remove direct skb_queue_head list pointer access.

Add a helper, __skb_peek(), and use it in ppp_mp_reconstruct().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp/ppp_generic.c |  2 +-
 include/linux/skbuff.h        | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 02ad03a2fab7..500bc0027c1b 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -2400,7 +2400,7 @@ ppp_mp_reconstruct(struct ppp *ppp)
 
 	if (ppp->mrru == 0)	/* do nothing until mrru is set */
 		return NULL;
-	head = list->next;
+	head = __skb_peek(list);
 	tail = NULL;
 	skb_queue_walk_safe(list, p, tmp) {
 	again:
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 17a13e4785fc..89283b77294d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1592,6 +1592,17 @@ static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_)
 	return skb;
 }
 
+/**
+ *	__skb_peek - peek at the head of a non-empty &sk_buff_head
+ *	@list_: list to peek at
+ *
+ *	Like skb_peek(), but the caller knows that the list is not empty.
+ */
+static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_)
+{
+	return list_->next;
+}
+
 /**
  *	skb_peek_next - peek skb following the given one from a queue
  *	@skb: skb to start from
-- 
cgit v1.2.3


From a8305bff685252e80b7c60f4f5e7dd2e63e38218 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 29 Jul 2018 20:42:53 -0700
Subject: net: Add and use skb_mark_not_on_list().

An SKB is not on a list if skb->next is NULL.

Codify this convention into a helper function and use it
where we are dequeueing an SKB and need to mark it as such.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h                  | 5 +++++
 net/core/dev.c                          | 8 ++++----
 net/core/sock.c                         | 2 +-
 net/ieee802154/6lowpan/reassembly.c     | 2 +-
 net/ipv4/ip_fragment.c                  | 2 +-
 net/ipv4/ip_input.c                     | 2 +-
 net/ipv4/ip_output.c                    | 4 ++--
 net/ipv6/ip6_output.c                   | 2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +-
 net/ipv6/reassembly.c                   | 2 +-
 net/netfilter/nfnetlink_queue.c         | 2 +-
 net/rxrpc/input.c                       | 2 +-
 net/sched/sch_cake.c                    | 6 +++---
 net/sched/sch_fq.c                      | 2 +-
 net/sched/sch_fq_codel.c                | 2 +-
 net/sched/sch_generic.c                 | 4 ++--
 net/sched/sch_hhf.c                     | 2 +-
 net/sched/sch_netem.c                   | 2 +-
 net/sched/sch_tbf.c                     | 2 +-
 net/tipc/bearer.c                       | 2 +-
 net/xfrm/xfrm_device.c                  | 2 +-
 net/xfrm/xfrm_output.c                  | 2 +-
 22 files changed, 33 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 89283b77294d..c4c9e3f5cd9a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1339,6 +1339,11 @@ static inline void skb_zcopy_abort(struct sk_buff *skb)
 	}
 }
 
+static inline void skb_mark_not_on_list(struct sk_buff *skb)
+{
+	skb->next = NULL;
+}
+
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
diff --git a/net/core/dev.c b/net/core/dev.c
index ca78dc5a79a3..f76dd7e14dd6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3231,7 +3231,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
 	while (skb) {
 		struct sk_buff *next = skb->next;
 
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		rc = xmit_one(skb, dev, txq, next != NULL);
 		if (unlikely(!dev_xmit_complete(rc))) {
 			skb->next = next;
@@ -3331,7 +3331,7 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d
 
 	for (; skb != NULL; skb = next) {
 		next = skb->next;
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 
 		/* in case skb wont be segmented, point to itself */
 		skb->prev = skb;
@@ -5296,7 +5296,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
 		list_del(&skb->list);
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		napi_gro_complete(skb);
 		napi->gro_hash[index].count--;
 	}
@@ -5482,7 +5482,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 
 	if (pp) {
 		list_del(&pp->list);
-		pp->next = NULL;
+		skb_mark_not_on_list(pp);
 		napi_gro_complete(pp);
 		napi->gro_hash[hash].count--;
 	}
diff --git a/net/core/sock.c b/net/core/sock.c
index 3730eb855095..8537b6ca72c5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2332,7 +2332,7 @@ static void __release_sock(struct sock *sk)
 			next = skb->next;
 			prefetch(next);
 			WARN_ON_ONCE(skb_dst_is_noref(skb));
-			skb->next = NULL;
+			skb_mark_not_on_list(skb);
 			sk_backlog_rcv(sk, skb);
 
 			cond_resched();
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index e7857a8ac86d..09ffbf5ce8fa 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -260,7 +260,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
 	}
 	sub_frag_mem_limit(fq->q.net, sum_truesize);
 
-	head->next = NULL;
+	skb_mark_not_on_list(head);
 	head->dev = ldev;
 	head->tstamp = fq->q.stamp;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 330f62353b11..cab3e4a5124b 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -623,7 +623,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
 	sub_frag_mem_limit(qp->q.net, head->truesize);
 
 	*nextp = NULL;
-	head->next = NULL;
+	skb_mark_not_on_list(head);
 	head->prev = NULL;
 	head->dev = dev;
 	head->tstamp = qp->q.stamp;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3196cf58f418..eba7f3883230 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -535,7 +535,7 @@ static void ip_sublist_rcv_finish(struct list_head *head)
 		/* Handle ip{6}_forward case, as sch_direct_xmit have
 		 * another kind of SKB-list usage (see validate_xmit_skb_list)
 		 */
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		dst_input(skb);
 	}
 }
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9c4e72e9c60a..c09219e7f230 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -278,7 +278,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk,
 		struct sk_buff *nskb = segs->next;
 		int err;
 
-		segs->next = NULL;
+		skb_mark_not_on_list(segs);
 		err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
 
 		if (err && ret == 0)
@@ -684,7 +684,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
 			skb = frag;
 			frag = skb->next;
-			skb->next = NULL;
+			skb_mark_not_on_list(skb);
 		}
 
 		if (err == 0) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 16f200f06500..9a8934ac053b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -727,7 +727,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
 
 			skb = frag;
 			frag = skb->next;
-			skb->next = NULL;
+			skb_mark_not_on_list(skb);
 		}
 
 		kfree(tmp_hdr);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 2a14d8b65924..00e20004d241 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -449,7 +449,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev,  struct net_devic
 	sub_frag_mem_limit(fq->q.net, head->truesize);
 
 	head->ignore_df = 1;
-	head->next = NULL;
+	skb_mark_not_on_list(head);
 	head->dev = dev;
 	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 5c5b4f79296e..f1b1ff30fe5b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -388,7 +388,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
 	}
 	sub_frag_mem_limit(fq->q.net, sum_truesize);
 
-	head->next = NULL;
+	skb_mark_not_on_list(head);
 	head->dev = dev;
 	head->tstamp = fq->q.stamp;
 	ipv6_hdr(head)->payload_len = htons(payload_len);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index ea4ba551abb2..5207eb8a5864 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -764,7 +764,7 @@ __nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
 		return ret;
 	}
 
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 
 	entry_seg = nf_queue_entry_dup(entry);
 	if (entry_seg) {
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index cfdc199c6351..ee8e7e1d5c0f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -259,7 +259,7 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
 	while (list) {
 		skb = list;
 		list = skb->next;
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
 	}
 }
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index c07c30b916d5..dc539295ae65 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -812,7 +812,7 @@ static struct sk_buff *dequeue_head(struct cake_flow *flow)
 
 	if (skb) {
 		flow->head = skb->next;
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 	}
 
 	return skb;
@@ -1252,7 +1252,7 @@ found:
 	else
 		flow->head = elig_ack->next;
 
-	elig_ack->next = NULL;
+	skb_mark_not_on_list(elig_ack);
 
 	return elig_ack;
 }
@@ -1675,7 +1675,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 
 		while (segs) {
 			nskb = segs->next;
-			segs->next = NULL;
+			skb_mark_not_on_list(segs);
 			qdisc_skb_cb(segs)->pkt_len = segs->len;
 			cobalt_set_enqueue_time(segs, now);
 			get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4808713c73b9..b27ba36a269c 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -319,7 +319,7 @@ static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
 
 	if (skb) {
 		flow->head = skb->next;
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		flow->qlen--;
 		qdisc_qstats_backlog_dec(sch, skb);
 		sch->q.qlen--;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 6c0a9d5dbf94..cd04d40c30b6 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -124,7 +124,7 @@ static inline struct sk_buff *dequeue_head(struct fq_codel_flow *flow)
 	struct sk_buff *skb = flow->head;
 
 	flow->head = skb->next;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 	return skb;
 }
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69078c82963e..a64132a5db36 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -184,7 +184,7 @@ static void try_bulk_dequeue_skb(struct Qdisc *q,
 		skb = nskb;
 		(*packets)++; /* GSO counts as one pkt */
 	}
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 }
 
 /* This variant of try_bulk_dequeue_skb() makes sure
@@ -210,7 +210,7 @@ static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
 		skb = nskb;
 	} while (++cnt < 8);
 	(*packets) += cnt;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 }
 
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index c3a8388dcdf6..9d6a47697406 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -330,7 +330,7 @@ static struct sk_buff *dequeue_head(struct wdrr_bucket *bucket)
 	struct sk_buff *skb = bucket->head;
 
 	bucket->head = skb->next;
-	skb->next = NULL;
+	skb_mark_not_on_list(skb);
 	return skb;
 }
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b9541ce4d672..506e1960ed7f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -568,7 +568,7 @@ finish_segs:
 	if (segs) {
 		while (segs) {
 			skb2 = segs->next;
-			segs->next = NULL;
+			skb_mark_not_on_list(segs);
 			qdisc_skb_cb(segs)->pkt_len = segs->len;
 			last_len = segs->len;
 			rc = qdisc_enqueue(segs, sch, to_free);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 6f74a426f159..a4530e85bd02 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -162,7 +162,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch,
 	nb = 0;
 	while (segs) {
 		nskb = segs->next;
-		segs->next = NULL;
+		skb_mark_not_on_list(segs);
 		qdisc_skb_cb(segs)->pkt_len = segs->len;
 		len += segs->len;
 		ret = qdisc_enqueue(segs, q->qdisc, to_free);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 418f03d0be90..91891041e5e1 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -577,7 +577,7 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
 		rcu_dereference_rtnl(orig_dev->tipc_ptr);
 	if (likely(b && test_bit(0, &b->up) &&
 		   (skb->pkt_type <= PACKET_MULTICAST))) {
-		skb->next = NULL;
+		skb_mark_not_on_list(skb);
 		tipc_rcv(dev_net(b->pt.dev), skb, b);
 		rcu_read_unlock();
 		return NET_RX_SUCCESS;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 5611b7521020..260fbba4f03e 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -99,7 +99,7 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
 
 	do {
 		struct sk_buff *nskb = skb2->next;
-		skb2->next = NULL;
+		skb_mark_not_on_list(skb2);
 
 		xo = xfrm_offload(skb2);
 		xo->flags |= XFRM_DEV_RESUME;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 45ba07ab3e4f..2d42cb0c94b8 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -189,7 +189,7 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb
 		struct sk_buff *nskb = segs->next;
 		int err;
 
-		segs->next = NULL;
+		skb_mark_not_on_list(segs);
 		err = xfrm_output2(net, sk, segs);
 
 		if (unlikely(err)) {
-- 
cgit v1.2.3


From 992cba7e276d438ac8b0a8c17b147b37c8c286f7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 31 Jul 2018 15:27:56 -0700
Subject: net: Add and use skb_list_del_init().

It documents what is happening, and eliminates the spurious list
pointer poisoning.

In the long term, in order to get proper list head debugging, we
might want to use the list poison value as the indicator that
an SKB is a singleton and not on a list.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ++++++
 net/core/dev.c         | 6 ++----
 net/ipv4/ip_input.c    | 6 +-----
 3 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c4c9e3f5cd9a..e3a53ca4a9b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1344,6 +1344,12 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
 	skb->next = NULL;
 }
 
+static inline void skb_list_del_init(struct sk_buff *skb)
+{
+	__list_del_entry(&skb->list);
+	skb_mark_not_on_list(skb);
+}
+
 /**
  *	skb_queue_empty - check if a queue is empty
  *	@list: queue head
diff --git a/net/core/dev.c b/net/core/dev.c
index f76dd7e14dd6..0b2d777e5b9e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5295,8 +5295,7 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
 	list_for_each_entry_safe_reverse(skb, p, head, list) {
 		if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
 			return;
-		list_del(&skb->list);
-		skb_mark_not_on_list(skb);
+		skb_list_del_init(skb);
 		napi_gro_complete(skb);
 		napi->gro_hash[index].count--;
 	}
@@ -5481,8 +5480,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
 
 	if (pp) {
-		list_del(&pp->list);
-		skb_mark_not_on_list(pp);
+		skb_list_del_init(pp);
 		napi_gro_complete(pp);
 		napi->gro_hash[hash].count--;
 	}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index eba7f3883230..35a786c0aaa0 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -531,11 +531,7 @@ static void ip_sublist_rcv_finish(struct list_head *head)
 	struct sk_buff *skb, *next;
 
 	list_for_each_entry_safe(skb, next, head, list) {
-		list_del(&skb->list);
-		/* Handle ip{6}_forward case, as sch_direct_xmit have
-		 * another kind of SKB-list usage (see validate_xmit_skb_list)
-		 */
-		skb_mark_not_on_list(skb);
+		skb_list_del_init(skb);
 		dst_input(skb);
 	}
 }
-- 
cgit v1.2.3


From 4e69817b106e4fb98f7af463d2f951e7b3603c12 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 6 Aug 2018 02:29:53 +0000
Subject: usb: ehci-sh: convert to SPDX identifiers

This patch updates license to use SPDX-License-Identifier
instead of verbose license text.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/platform_data/ehci-sh.h | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/ehci-sh.h b/include/linux/platform_data/ehci-sh.h
index 5c15a738e116..219bd79dabfc 100644
--- a/include/linux/platform_data/ehci-sh.h
+++ b/include/linux/platform_data/ehci-sh.h
@@ -1,21 +1,9 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * EHCI SuperH driver platform data
  *
  * Copyright (C) 2012  Nobuhiro Iwamatsu <nobuhiro.iwamatsu.yj@renesas.com>
  * Copyright (C) 2012  Renesas Solutions Corp.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 
 #ifndef __USB_EHCI_SH_H
-- 
cgit v1.2.3


From e7a3ff92eaf19eab14e8149758428e680c61706b Mon Sep 17 00:00:00 2001
From: Angelo Dureghello <angelo@sysam.it>
Date: Sun, 19 Aug 2018 19:27:16 +0200
Subject: dmaengine: fsl-edma: add ColdFire mcf5441x edma support

This patch adds support for ColdFire mcf5441x-family edma
module.

The ColdFire edma module is slightly different from fsl-edma,
so a new driver is added. But most of the code is common
between fsl-edma and mcf-edma so it has been collected into a
separate common module fsl-edma-common (patch 1/3).

Signed-off-by: Angelo Dureghello <angelo@sysam.it>
Tested-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig                        |  11 +
 drivers/dma/Makefile                       |   1 +
 drivers/dma/fsl-edma-common.c              |  24 ++-
 drivers/dma/mcf-edma.c                     | 317 +++++++++++++++++++++++++++++
 include/linux/platform_data/dma-mcf-edma.h |  38 ++++
 5 files changed, 387 insertions(+), 4 deletions(-)
 create mode 100644 drivers/dma/mcf-edma.c
 create mode 100644 include/linux/platform_data/dma-mcf-edma.h

(limited to 'include/linux')

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index dacf3f42426d..05104325d685 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -321,6 +321,17 @@ config LPC18XX_DMAMUX
 	  Enable support for DMA on NXP LPC18xx/43xx platforms
 	  with PL080 and multiplexed DMA request lines.
 
+config MCF_EDMA
+	tristate "Freescale eDMA engine support, ColdFire mcf5441x SoCs"
+	depends on M5441x || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support the Freescale ColdFire eDMA engine, 64-channel
+	  implementation that performs complex data transfers with
+	  minimal intervention from a host processor.
+	  This module can be found on Freescale ColdFire mcf5441x SoCs.
+
 config MMP_PDMA
 	bool "MMP PDMA support"
 	depends on ARCH_MMP || ARCH_PXA || COMPILE_TEST
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 21db4e51b246..7fcc4d8e336d 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_DW_DMAC_CORE) += dw/
 obj-$(CONFIG_EP93XX_DMA) += ep93xx_dma.o
 obj-$(CONFIG_FSL_DMA) += fsldma.o
 obj-$(CONFIG_FSL_EDMA) += fsl-edma.o fsl-edma-common.o
+obj-$(CONFIG_MCF_EDMA) += mcf-edma.o fsl-edma-common.o
 obj-$(CONFIG_FSL_RAID) += fsl_raid.o
 obj-$(CONFIG_HSU_DMA) += hsu/
 obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c
index 227034de256e..8ba80f4b6f55 100644
--- a/drivers/dma/fsl-edma-common.c
+++ b/drivers/dma/fsl-edma-common.c
@@ -46,8 +46,16 @@ static void fsl_edma_enable_request(struct fsl_edma_chan *fsl_chan)
 	struct edma_regs *regs = &fsl_chan->edma->regs;
 	u32 ch = fsl_chan->vchan.chan.chan_id;
 
-	edma_writeb(fsl_chan->edma, EDMA_SEEI_SEEI(ch), regs->seei);
-	edma_writeb(fsl_chan->edma, ch, regs->serq);
+	if (fsl_chan->edma->version == v1) {
+		edma_writeb(fsl_chan->edma, EDMA_SEEI_SEEI(ch), regs->seei);
+		edma_writeb(fsl_chan->edma, ch, regs->serq);
+	} else {
+		/* ColdFire is big endian, and accesses natively
+		 * big endian I/O peripherals
+		 */
+		iowrite8(EDMA_SEEI_SEEI(ch), regs->seei);
+		iowrite8(ch, regs->serq);
+	}
 }
 
 void fsl_edma_disable_request(struct fsl_edma_chan *fsl_chan)
@@ -55,8 +63,16 @@ void fsl_edma_disable_request(struct fsl_edma_chan *fsl_chan)
 	struct edma_regs *regs = &fsl_chan->edma->regs;
 	u32 ch = fsl_chan->vchan.chan.chan_id;
 
-	edma_writeb(fsl_chan->edma, ch, regs->cerq);
-	edma_writeb(fsl_chan->edma, EDMA_CEEI_CEEI(ch), regs->ceei);
+	if (fsl_chan->edma->version == v1) {
+		edma_writeb(fsl_chan->edma, ch, regs->cerq);
+		edma_writeb(fsl_chan->edma, EDMA_CEEI_CEEI(ch), regs->ceei);
+	} else {
+		/* ColdFire is big endian, and accesses natively
+		 * big endian I/O peripherals
+		 */
+		iowrite8(ch, regs->cerq);
+		iowrite8(EDMA_CEEI_CEEI(ch), regs->ceei);
+	}
 }
 EXPORT_SYMBOL_GPL(fsl_edma_disable_request);
 
diff --git a/drivers/dma/mcf-edma.c b/drivers/dma/mcf-edma.c
new file mode 100644
index 000000000000..4d30d5302649
--- /dev/null
+++ b/drivers/dma/mcf-edma.c
@@ -0,0 +1,317 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (c) 2013-2014 Freescale Semiconductor, Inc
+// Copyright (c) 2017 Sysam, Angelo Dureghello  <angelo@sysam.it>
+
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/platform_device.h>
+#include <linux/platform_data/dma-mcf-edma.h>
+
+#include "fsl-edma-common.h"
+
+#define EDMA_CHANNELS		64
+#define EDMA_MASK_CH(x)		((x) & GENMASK(5, 0))
+
+static irqreturn_t mcf_edma_tx_handler(int irq, void *dev_id)
+{
+	struct fsl_edma_engine *mcf_edma = dev_id;
+	struct edma_regs *regs = &mcf_edma->regs;
+	unsigned int ch;
+	struct fsl_edma_chan *mcf_chan;
+	u64 intmap;
+
+	intmap = ioread32(regs->inth);
+	intmap <<= 32;
+	intmap |= ioread32(regs->intl);
+	if (!intmap)
+		return IRQ_NONE;
+
+	for (ch = 0; ch < mcf_edma->n_chans; ch++) {
+		if (intmap & BIT(ch)) {
+			iowrite8(EDMA_MASK_CH(ch), regs->cint);
+
+			mcf_chan = &mcf_edma->chans[ch];
+
+			spin_lock(&mcf_chan->vchan.lock);
+			if (!mcf_chan->edesc->iscyclic) {
+				list_del(&mcf_chan->edesc->vdesc.node);
+				vchan_cookie_complete(&mcf_chan->edesc->vdesc);
+				mcf_chan->edesc = NULL;
+				mcf_chan->status = DMA_COMPLETE;
+				mcf_chan->idle = true;
+			} else {
+				vchan_cyclic_callback(&mcf_chan->edesc->vdesc);
+			}
+
+			if (!mcf_chan->edesc)
+				fsl_edma_xfer_desc(mcf_chan);
+
+			spin_unlock(&mcf_chan->vchan.lock);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t mcf_edma_err_handler(int irq, void *dev_id)
+{
+	struct fsl_edma_engine *mcf_edma = dev_id;
+	struct edma_regs *regs = &mcf_edma->regs;
+	unsigned int err, ch;
+
+	err = ioread32(regs->errl);
+	if (!err)
+		return IRQ_NONE;
+
+	for (ch = 0; ch < (EDMA_CHANNELS / 2); ch++) {
+		if (err & BIT(ch)) {
+			fsl_edma_disable_request(&mcf_edma->chans[ch]);
+			iowrite8(EDMA_CERR_CERR(ch), regs->cerr);
+			mcf_edma->chans[ch].status = DMA_ERROR;
+			mcf_edma->chans[ch].idle = true;
+		}
+	}
+
+	err = ioread32(regs->errh);
+	if (!err)
+		return IRQ_NONE;
+
+	for (ch = (EDMA_CHANNELS / 2); ch < EDMA_CHANNELS; ch++) {
+		if (err & (BIT(ch - (EDMA_CHANNELS / 2)))) {
+			fsl_edma_disable_request(&mcf_edma->chans[ch]);
+			iowrite8(EDMA_CERR_CERR(ch), regs->cerr);
+			mcf_edma->chans[ch].status = DMA_ERROR;
+			mcf_edma->chans[ch].idle = true;
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mcf_edma_irq_init(struct platform_device *pdev,
+				struct fsl_edma_engine *mcf_edma)
+{
+	int ret = 0, i;
+	struct resource *res;
+
+	res = platform_get_resource_byname(pdev,
+				IORESOURCE_IRQ, "edma-tx-00-15");
+	if (!res)
+		return -1;
+
+	for (ret = 0, i = res->start; i <= res->end; ++i)
+		ret |= request_irq(i, mcf_edma_tx_handler, 0, "eDMA", mcf_edma);
+	if (ret)
+		return ret;
+
+	res = platform_get_resource_byname(pdev,
+			IORESOURCE_IRQ, "edma-tx-16-55");
+	if (!res)
+		return -1;
+
+	for (ret = 0, i = res->start; i <= res->end; ++i)
+		ret |= request_irq(i, mcf_edma_tx_handler, 0, "eDMA", mcf_edma);
+	if (ret)
+		return ret;
+
+	ret = platform_get_irq_byname(pdev, "edma-tx-56-63");
+	if (ret != -ENXIO) {
+		ret = request_irq(ret, mcf_edma_tx_handler,
+				  0, "eDMA", mcf_edma);
+		if (ret)
+			return ret;
+	}
+
+	ret = platform_get_irq_byname(pdev, "edma-err");
+	if (ret != -ENXIO) {
+		ret = request_irq(ret, mcf_edma_err_handler,
+				  0, "eDMA", mcf_edma);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void mcf_edma_irq_free(struct platform_device *pdev,
+				struct fsl_edma_engine *mcf_edma)
+{
+	int irq;
+	struct resource *res;
+
+	res = platform_get_resource_byname(pdev,
+			IORESOURCE_IRQ, "edma-tx-00-15");
+	if (res) {
+		for (irq = res->start; irq <= res->end; irq++)
+			free_irq(irq, mcf_edma);
+	}
+
+	res = platform_get_resource_byname(pdev,
+			IORESOURCE_IRQ, "edma-tx-16-55");
+	if (res) {
+		for (irq = res->start; irq <= res->end; irq++)
+			free_irq(irq, mcf_edma);
+	}
+
+	irq = platform_get_irq_byname(pdev, "edma-tx-56-63");
+	if (irq != -ENXIO)
+		free_irq(irq, mcf_edma);
+
+	irq = platform_get_irq_byname(pdev, "edma-err");
+	if (irq != -ENXIO)
+		free_irq(irq, mcf_edma);
+}
+
+static int mcf_edma_probe(struct platform_device *pdev)
+{
+	struct mcf_edma_platform_data *pdata;
+	struct fsl_edma_engine *mcf_edma;
+	struct fsl_edma_chan *mcf_chan;
+	struct edma_regs *regs;
+	struct resource *res;
+	int ret, i, len, chans;
+
+	pdata = dev_get_platdata(&pdev->dev);
+	if (!pdata) {
+		dev_err(&pdev->dev, "no platform data supplied\n");
+		return -EINVAL;
+	}
+
+	chans = pdata->dma_channels;
+	len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
+	mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!mcf_edma)
+		return -ENOMEM;
+
+	mcf_edma->n_chans = chans;
+
+	/* Set up version for ColdFire edma */
+	mcf_edma->version = v2;
+	mcf_edma->big_endian = 1;
+
+	if (!mcf_edma->n_chans) {
+		dev_info(&pdev->dev, "setting default channel number to 64");
+		mcf_edma->n_chans = 64;
+	}
+
+	mutex_init(&mcf_edma->fsl_edma_mutex);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+
+	mcf_edma->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(mcf_edma->membase))
+		return PTR_ERR(mcf_edma->membase);
+
+	fsl_edma_setup_regs(mcf_edma);
+	regs = &mcf_edma->regs;
+
+	INIT_LIST_HEAD(&mcf_edma->dma_dev.channels);
+	for (i = 0; i < mcf_edma->n_chans; i++) {
+		struct fsl_edma_chan *mcf_chan = &mcf_edma->chans[i];
+
+		mcf_chan->edma = mcf_edma;
+		mcf_chan->slave_id = i;
+		mcf_chan->idle = true;
+		mcf_chan->vchan.desc_free = fsl_edma_free_desc;
+		vchan_init(&mcf_chan->vchan, &mcf_edma->dma_dev);
+		iowrite32(0x0, &regs->tcd[i].csr);
+	}
+
+	iowrite32(~0, regs->inth);
+	iowrite32(~0, regs->intl);
+
+	ret = mcf_edma_irq_init(pdev, mcf_edma);
+	if (ret)
+		return ret;
+
+	dma_cap_set(DMA_PRIVATE, mcf_edma->dma_dev.cap_mask);
+	dma_cap_set(DMA_SLAVE, mcf_edma->dma_dev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, mcf_edma->dma_dev.cap_mask);
+
+	mcf_edma->dma_dev.dev = &pdev->dev;
+	mcf_edma->dma_dev.device_alloc_chan_resources =
+			fsl_edma_alloc_chan_resources;
+	mcf_edma->dma_dev.device_free_chan_resources =
+			fsl_edma_free_chan_resources;
+	mcf_edma->dma_dev.device_config = fsl_edma_slave_config;
+	mcf_edma->dma_dev.device_prep_dma_cyclic =
+			fsl_edma_prep_dma_cyclic;
+	mcf_edma->dma_dev.device_prep_slave_sg = fsl_edma_prep_slave_sg;
+	mcf_edma->dma_dev.device_tx_status = fsl_edma_tx_status;
+	mcf_edma->dma_dev.device_pause = fsl_edma_pause;
+	mcf_edma->dma_dev.device_resume = fsl_edma_resume;
+	mcf_edma->dma_dev.device_terminate_all = fsl_edma_terminate_all;
+	mcf_edma->dma_dev.device_issue_pending = fsl_edma_issue_pending;
+
+	mcf_edma->dma_dev.src_addr_widths = FSL_EDMA_BUSWIDTHS;
+	mcf_edma->dma_dev.dst_addr_widths = FSL_EDMA_BUSWIDTHS;
+	mcf_edma->dma_dev.directions =
+			BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+
+	mcf_edma->dma_dev.filter.fn = mcf_edma_filter_fn;
+	mcf_edma->dma_dev.filter.map = pdata->slave_map;
+	mcf_edma->dma_dev.filter.mapcnt = pdata->slavecnt;
+
+	platform_set_drvdata(pdev, mcf_edma);
+
+	ret = dma_async_device_register(&mcf_edma->dma_dev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Can't register Freescale eDMA engine. (%d)\n", ret);
+		return ret;
+	}
+
+	/* Enable round robin arbitration */
+	iowrite32(EDMA_CR_ERGA | EDMA_CR_ERCA, regs->cr);
+
+	return 0;
+}
+
+static int mcf_edma_remove(struct platform_device *pdev)
+{
+	struct fsl_edma_engine *mcf_edma = platform_get_drvdata(pdev);
+
+	mcf_edma_irq_free(pdev, mcf_edma);
+	fsl_edma_cleanup_vchan(&mcf_edma->dma_dev);
+	dma_async_device_unregister(&mcf_edma->dma_dev);
+
+	return 0;
+}
+
+static struct platform_driver mcf_edma_driver = {
+	.driver		= {
+		.name	= "mcf-edma",
+	},
+	.probe		= mcf_edma_probe,
+	.remove		= mcf_edma_remove,
+};
+
+bool mcf_edma_filter_fn(struct dma_chan *chan, void *param)
+{
+	if (chan->device->dev->driver == &mcf_edma_driver.driver) {
+		struct fsl_edma_chan *mcf_chan = to_fsl_edma_chan(chan);
+
+		return (mcf_chan->slave_id == (u32)param);
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(mcf_edma_filter_fn);
+
+static int __init mcf_edma_init(void)
+{
+	return platform_driver_register(&mcf_edma_driver);
+}
+subsys_initcall(mcf_edma_init);
+
+static void __exit mcf_edma_exit(void)
+{
+	platform_driver_unregister(&mcf_edma_driver);
+}
+module_exit(mcf_edma_exit);
+
+MODULE_ALIAS("platform:mcf-edma");
+MODULE_DESCRIPTION("Freescale eDMA engine driver, ColdFire family");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/dma-mcf-edma.h b/include/linux/platform_data/dma-mcf-edma.h
new file mode 100644
index 000000000000..d718ccfa3421
--- /dev/null
+++ b/include/linux/platform_data/dma-mcf-edma.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Freescale eDMA platform data, ColdFire SoC's family.
+ *
+ * Copyright (c) 2017 Angelo Dureghello <angelo@sysam.it>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __LINUX_PLATFORM_DATA_MCF_EDMA_H__
+#define __LINUX_PLATFORM_DATA_MCF_EDMA_H__
+
+struct dma_slave_map;
+
+bool mcf_edma_filter_fn(struct dma_chan *chan, void *param);
+
+#define MCF_EDMA_FILTER_PARAM(ch)	((void *)ch)
+
+/**
+ * struct mcf_edma_platform_data - platform specific data for eDMA engine
+ *
+ * @ver			The eDMA module version.
+ * @dma_channels	The number of eDMA channels.
+ */
+struct mcf_edma_platform_data {
+	int dma_channels;
+	const struct dma_slave_map *slave_map;
+	int slavecnt;
+};
+
+#endif /* __LINUX_PLATFORM_DATA_MCF_EDMA_H__ */
-- 
cgit v1.2.3


From 04cfcc7ab358e331b32cabde1e853a125f3f8735 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 22 Aug 2018 10:54:02 +0200
Subject: fbdev: Drop FBINFO_CAN_FORCE_OUTPUT flag

This was only added for the drm's fbdev emulation support, so that it
would try harder to show the Oops.

Unfortunately this never really worked reliably, and in practice ended
up pushing the real Oops off the screen due to plentyfull locking,
sleep-while-atomic and other issues. So we removed all that support
from the fbdev emulation a while back. Aside: We've also removed the
kgdb support, for similar reasons.

Since it's such a small patch I figured I don't split this up into the
usual 3-phase removal.

Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Alexander Kapshuk <alexander.kapshuk@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thierry Reding <treding@nvidia.com>
Cc: David Lechner <david@lechnology.com>
Cc: nouveau@lists.freedesktop.org
Cc: linux-fbdev@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180822085405.10787-1-daniel.vetter@ffwll.ch
---
 drivers/gpu/drm/nouveau/nouveau_fbcon.c | 1 -
 drivers/staging/vboxvideo/vbox_fb.c     | 3 +--
 drivers/video/fbdev/core/fbcon.c        | 1 -
 include/linux/fb.h                      | 4 ----
 4 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 844498c4267c..20a260887be3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -379,7 +379,6 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 		info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA |
 			      FBINFO_HWACCEL_FILLRECT |
 			      FBINFO_HWACCEL_IMAGEBLIT;
-	info->flags |= FBINFO_CAN_FORCE_OUTPUT;
 	info->fbops = &nouveau_fbcon_sw_ops;
 	info->fix.smem_start = fb->nvbo->bo.mem.bus.base +
 			       fb->nvbo->bo.mem.bus.offset;
diff --git a/drivers/staging/vboxvideo/vbox_fb.c b/drivers/staging/vboxvideo/vbox_fb.c
index 43c39eca4ae1..034f8ffa8f20 100644
--- a/drivers/staging/vboxvideo/vbox_fb.c
+++ b/drivers/staging/vboxvideo/vbox_fb.c
@@ -155,8 +155,7 @@ static int vboxfb_create(struct drm_fb_helper *helper,
 	 * The last flag forces a mode set on VT switches even if the kernel
 	 * does not think it is needed.
 	 */
-	info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT |
-		      FBINFO_MISC_ALWAYS_SETPAR;
+	info->flags = FBINFO_DEFAULT | FBINFO_MISC_ALWAYS_SETPAR;
 	info->fbops = &vboxfb_ops;
 
 	/*
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 75ebbbf0a1fb..9fd99681a7f2 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1104,7 +1104,6 @@ static void fbcon_init(struct vc_data *vc, int init)
 	if (p->userfont)
 		charcnt = FNTCHARCNT(p->fontdata);
 
-	vc->vc_panic_force_write = !!(info->flags & FBINFO_CAN_FORCE_OUTPUT);
 	vc->vc_can_do_color = (fb_get_color_depth(&info->var, &info->fix)!=1);
 	vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
 	if (charcnt == 256) {
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 3cd375dafd0e..b1db5c5eb671 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -457,10 +457,6 @@ struct fb_tile_ops {
  */
 #define FBINFO_BE_MATH  0x100000
 
-/* report to the VT layer that this fb driver can accept forced console
-   output like oopses */
-#define FBINFO_CAN_FORCE_OUTPUT     0x200000
-
 struct fb_info {
 	atomic_t count;
 	int node;
-- 
cgit v1.2.3


From 8d7fc2994f4d1f431e280c9e21a139c18dc435ec Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 22 Aug 2018 10:54:03 +0200
Subject: vt: Remove vc_panic_force_write

It was only used by the panic support in fbcon, which is now gone.
Remove this now dead code too.

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Joe Perches <joe@perches.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Meng Xu <mengxu.gatech@gmail.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Thomas Meyer <thomas@m3y3r.de>
Cc: Mike Frysinger <vapier@chromium.org>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Thierry Reding <treding@nvidia.com>
Cc: David Lechner <david@lechnology.com>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180822085405.10787-2-daniel.vetter@ffwll.ch
---
 drivers/tty/vt/vt.c              | 12 ++++--------
 drivers/video/fbdev/core/fbcon.c |  3 +--
 include/linux/console_struct.h   |  1 -
 include/linux/vt_kern.h          |  7 -------
 4 files changed, 5 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 5f1183b0b89d..55370e651db3 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1004,9 +1004,7 @@ void redraw_screen(struct vc_data *vc, int is_switch)
 			clear_buffer_attributes(vc);
 		}
 
-		/* Forcibly update if we're panicing */
-		if ((update && vc->vc_mode != KD_GRAPHICS) ||
-		    vt_force_oops_output(vc))
+		if (update && vc->vc_mode != KD_GRAPHICS)
 			do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2);
 	}
 	set_cursor(vc);
@@ -1046,7 +1044,6 @@ static void visual_init(struct vc_data *vc, int num, int init)
 	vc->vc_hi_font_mask = 0;
 	vc->vc_complement_mask = 0;
 	vc->vc_can_do_color = 0;
-	vc->vc_panic_force_write = false;
 	vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS;
 	vc->vc_sw->con_init(vc, init);
 	if (!vc->vc_complement_mask)
@@ -2911,7 +2908,7 @@ static void vt_console_print(struct console *co, const char *b, unsigned count)
 		goto quit;
 	}
 
-	if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
+	if (vc->vc_mode != KD_TEXT)
 		goto quit;
 
 	/* undraw cursor first */
@@ -4229,8 +4226,7 @@ void do_unblank_screen(int leaving_gfx)
 		return;
 	}
 	vc = vc_cons[fg_console].d;
-	/* Try to unblank in oops case too */
-	if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
+	if (vc->vc_mode != KD_TEXT)
 		return; /* but leave console_blanked != 0 */
 
 	if (blankinterval) {
@@ -4239,7 +4235,7 @@ void do_unblank_screen(int leaving_gfx)
 	}
 
 	console_blanked = 0;
-	if (vc->vc_sw->con_blank(vc, 0, leaving_gfx) || vt_force_oops_output(vc))
+	if (vc->vc_sw->con_blank(vc, 0, leaving_gfx))
 		/* Low-level driver cannot restore -> do it ourselves */
 		update_screen(vc);
 	if (console_blank_hook)
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index 9fd99681a7f2..8958ccc8b1ac 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -284,8 +284,7 @@ static inline int fbcon_is_inactive(struct vc_data *vc, struct fb_info *info)
 	struct fbcon_ops *ops = info->fbcon_par;
 
 	return (info->state != FBINFO_STATE_RUNNING ||
-		vc->vc_mode != KD_TEXT || ops->graphics) &&
-		!vt_force_oops_output(vc);
+		vc->vc_mode != KD_TEXT || ops->graphics);
 }
 
 static int get_color(struct vc_data *vc, struct fb_info *info,
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h
index fea64f2692a0..ab137f97ecbd 100644
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -141,7 +141,6 @@ struct vc_data {
 	struct uni_pagedir *vc_uni_pagedir;
 	struct uni_pagedir **vc_uni_pagedir_loc; /* [!] Location of uni_pagedir variable for this console */
 	struct uni_screen *vc_uni_screen;	/* unicode screen content */
-	bool vc_panic_force_write; /* when oops/panic this VC can accept forced output/blanking */
 	/* additional information is in vt_kern.h */
 };
 
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 3fd07912909c..8dc77e40bc03 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -135,13 +135,6 @@ extern int do_unbind_con_driver(const struct consw *csw, int first, int last,
 			     int deflt);
 int vty_init(const struct file_operations *console_fops);
 
-static inline bool vt_force_oops_output(struct vc_data *vc)
-{
-	if (oops_in_progress && vc->vc_panic_force_write  && panic_timeout >= 0)
-		return true;
-	return false;
-}
-
 extern char vt_dont_switch;
 extern int default_utf8;
 extern int global_cursor_default;
-- 
cgit v1.2.3


From da6c7707caf3736c1cf968606bd97c07e79625d4 Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Wed, 22 Aug 2018 10:54:04 +0200
Subject: fbdev: Add FBINFO_HIDE_SMEM_START flag

DRM drivers really, really, really don't want random userspace to
share buffer behind it's back, bypassing the dma-buf buffer sharing
machanism. For that reason we've ruthlessly rejected any IOCTL
exposing the physical address of any graphics buffer.

Unfortunately fbdev comes with that built-in. We could just set
smem_start to 0, but that means we'd have to hand-roll our own fb_mmap
implementation. For good reasons many drivers do that, but
smem_start/length is still super convenient.

Hence instead just stop the leak in the ioctl, to keep fb mmap working
as-is. A second patch will set this flag for all drm drivers.

Acked-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: linux-fbdev@vger.kernel.org
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180822085405.10787-3-daniel.vetter@ffwll.ch
---
 drivers/video/fbdev/core/fbmem.c | 4 ++++
 include/linux/fb.h               | 7 +++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c
index 0da75c55660d..861bf8081619 100644
--- a/drivers/video/fbdev/core/fbmem.c
+++ b/drivers/video/fbdev/core/fbmem.c
@@ -1117,6 +1117,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd,
 		if (!lock_fb_info(info))
 			return -ENODEV;
 		fix = info->fix;
+		if (info->flags & FBINFO_HIDE_SMEM_START)
+			fix.smem_start = 0;
 		unlock_fb_info(info);
 
 		ret = copy_to_user(argp, &fix, sizeof(fix)) ? -EFAULT : 0;
@@ -1327,6 +1329,8 @@ static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd,
 	if (!lock_fb_info(info))
 		return -ENODEV;
 	fix = info->fix;
+	if (info->flags & FBINFO_HIDE_SMEM_START)
+		fix.smem_start = 0;
 	unlock_fb_info(info);
 	return do_fscreeninfo_to_user(&fix, compat_ptr(arg));
 }
diff --git a/include/linux/fb.h b/include/linux/fb.h
index b1db5c5eb671..a3cab6dc9b44 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -456,6 +456,13 @@ struct fb_tile_ops {
  * and host endianness. Drivers should not use this flag.
  */
 #define FBINFO_BE_MATH  0x100000
+/*
+ * Hide smem_start in the FBIOGET_FSCREENINFO IOCTL. This is used by modern DRM
+ * drivers to stop userspace from trying to share buffers behind the kernel's
+ * back. Instead dma-buf based buffer sharing should be used.
+ */
+#define FBINFO_HIDE_SMEM_START  0x200000
+
 
 struct fb_info {
 	atomic_t count;
-- 
cgit v1.2.3


From 56df612afb606dcefba8ab38ab32e052218b6050 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 4 Sep 2018 14:41:28 +0200
Subject: spi: davinci: Remove chip select GPIO pdata

The DaVinci SPI can use either:
- Internal chip selects (inside the SPI host)
- External chip selects (using GPIO)
- External chip selects passed in pdata

The last way of passing external chip selects through
platform data is not used in the kernel. Delete it to make
the code simpler when refactoring GPIO.

Cc: Sekhar Nori <nsekhar@ti.com>
Cc: Kevin Hilman <khilman@kernel.org>
Cc: Michele Dionisio <michele.dionisio@gmail.com>
Cc: Frode Isaksen <fisaksen@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-davinci.c                 | 7 -------
 include/linux/platform_data/spi-davinci.h | 4 ----
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 3dabc20b68a1..d502cf504deb 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -432,13 +432,6 @@ static int davinci_spi_setup(struct spi_device *spi)
 			retval = gpio_direction_output(
 				      spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
 			internal_cs = false;
-		} else if (pdata->chip_sel &&
-			   spi->chip_select < pdata->num_chipselect &&
-			   pdata->chip_sel[spi->chip_select] != SPI_INTERN_CS) {
-			spi->cs_gpio = pdata->chip_sel[spi->chip_select];
-			retval = gpio_direction_output(
-				      spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
-			internal_cs = false;
 		}
 
 		if (retval) {
diff --git a/include/linux/platform_data/spi-davinci.h b/include/linux/platform_data/spi-davinci.h
index f4edcb03c40c..0638fb6353bc 100644
--- a/include/linux/platform_data/spi-davinci.h
+++ b/include/linux/platform_data/spi-davinci.h
@@ -36,9 +36,6 @@ enum {
  * @num_chipselect: number of chipselects supported by this SPI master
  * @intr_line:	interrupt line used to connect the SPI IP to the ARM interrupt
  *		controller withn the SoC. Possible values are 0 and 1.
- * @chip_sel:	list of GPIOs which can act as chip-selects for the SPI.
- *		SPI_INTERN_CS denotes internal SPI chip-select. Not necessary
- *		to populate if all chip-selects are internal.
  * @cshold_bug:	set this to true if the SPI controller on your chip requires
  *		a write to CSHOLD bit in between transfers (like in DM355).
  * @dma_event_q: DMA event queue to use if SPI_IO_TYPE_DMA is used for any
@@ -48,7 +45,6 @@ struct davinci_spi_platform_data {
 	u8			version;
 	u8			num_chipselect;
 	u8			intr_line;
-	u8			*chip_sel;
 	u8			prescaler_limit;
 	bool			cshold_bug;
 	enum dma_event_q	dma_event_q;
-- 
cgit v1.2.3


From 01c5f85aebaaddfd7e6051fb2ec80c1d4b463554 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 11 Sep 2018 10:59:53 -0600
Subject: blk-cgroup: increase number of supported policies

After merging the iolatency policy, we potentially now have 4 policies
being registered, but only support 3. This causes one of them to fail
loading. Takashi reports that BFQ no longer works for him, because it
fails to load due to policy registration failure.

Bump to 5 policies, and also add a warning for when we have exceeded
the global amount. If we have to touch this again, we should switch
to a dynamic scheme instead.

Reported-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Jeff Moyer <jmoyer@redhat.com>
Tested-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c     | 4 +++-
 include/linux/blkdev.h | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c19f9078da1e..c630e02836a8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1510,8 +1510,10 @@ int blkcg_policy_register(struct blkcg_policy *pol)
 	for (i = 0; i < BLKCG_MAX_POLS; i++)
 		if (!blkcg_policy[i])
 			break;
-	if (i >= BLKCG_MAX_POLS)
+	if (i >= BLKCG_MAX_POLS) {
+		pr_warn("blkcg_policy_register: BLKCG_MAX_POLS too small\n");
 		goto err_unlock;
+	}
 
 	/* Make sure cpd/pd_alloc_fn and cpd/pd_free_fn in pairs */
 	if ((!pol->cpd_alloc_fn ^ !pol->cpd_free_fn) ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d6869e0e2b64..6980014357d4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -54,7 +54,7 @@ struct blk_stat_callback;
  * Maximum number of blkcg policies allowed to be registered concurrently.
  * Defined here to simplify include dependency.
  */
-#define BLKCG_MAX_POLS		3
+#define BLKCG_MAX_POLS		5
 
 typedef void (rq_end_io_fn)(struct request *, blk_status_t);
 
-- 
cgit v1.2.3


From 961366a01904d460066d65a609c3c2e3051c7903 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 19 Jul 2018 21:31:13 -0500
Subject: signal: Remove the siginfo paramater from kernel_dqueue_signal

None of the callers use the it so remove it.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 drivers/usb/gadget/function/f_mass_storage.c | 2 +-
 fs/jffs2/background.c                        | 2 +-
 include/linux/sched/signal.h                 | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index ca8a4b53c59f..70038a475c9f 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -2311,7 +2311,7 @@ static void handle_exception(struct fsg_common *common)
 	 * into a high-priority EXIT exception.
 	 */
 	for (;;) {
-		int sig = kernel_dequeue_signal(NULL);
+		int sig = kernel_dequeue_signal();
 		if (!sig)
 			break;
 		if (sig != SIGUSR1) {
diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c
index 453a6a1fff34..2b4d5013dc5d 100644
--- a/fs/jffs2/background.c
+++ b/fs/jffs2/background.c
@@ -125,7 +125,7 @@ static int jffs2_garbage_collect_thread(void *_c)
 			if (try_to_freeze())
 				goto again;
 
-			signr = kernel_dequeue_signal(NULL);
+			signr = kernel_dequeue_signal();
 
 			switch(signr) {
 			case SIGSTOP:
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 1be35729c2c5..9b6968cbde14 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -272,14 +272,14 @@ extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
 extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
 
-static inline int kernel_dequeue_signal(siginfo_t *info)
+static inline int kernel_dequeue_signal(void)
 {
 	struct task_struct *tsk = current;
 	siginfo_t __info;
 	int ret;
 
 	spin_lock_irq(&tsk->sighand->siglock);
-	ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info);
+	ret = dequeue_signal(tsk, &tsk->blocked, &__info);
 	spin_unlock_irq(&tsk->sighand->siglock);
 
 	return ret;
-- 
cgit v1.2.3


From 4ff4c31a6e85f4c49fbeebeaa28018d002884b5a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 3 Sep 2018 10:39:04 +0200
Subject: signal: Remove SEND_SIG_FORCED

There are no more users of SEND_SIG_FORCED so it may be safely removed.

Remove the definition of SEND_SIG_FORCED, it's use in is_si_special,
it's use in TP_STORE_SIGINFO, and it's use in __send_signal as without
any users the uses of SEND_SIG_FORCED are now unncessary.

This makes the code simpler, easier to understand and use.  Users of
signal sending functions now no longer need to ask themselves do I
need to use SEND_SIG_FORCED.

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/sched/signal.h  | 1 -
 include/trace/events/signal.h | 3 +--
 kernel/signal.c               | 7 +++----
 3 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 9b6968cbde14..9e07f3521549 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -477,7 +477,6 @@ static inline int kill_cad_pid(int sig, int priv)
 /* These can be the second arg to send_sig_info/send_group_sig_info.  */
 #define SEND_SIG_NOINFO ((struct siginfo *) 0)
 #define SEND_SIG_PRIV	((struct siginfo *) 1)
-#define SEND_SIG_FORCED	((struct siginfo *) 2)
 
 /*
  * True if we are on the alternate signal stack.
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index 86582923d51c..3deeed50ffd0 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -11,8 +11,7 @@
 
 #define TP_STORE_SIGINFO(__entry, info)				\
 	do {							\
-		if (info == SEND_SIG_NOINFO ||			\
-		    info == SEND_SIG_FORCED) {			\
+		if (info == SEND_SIG_NOINFO) {			\
 			__entry->errno	= 0;			\
 			__entry->code	= SI_USER;		\
 		} else if (info == SEND_SIG_PRIV) {		\
diff --git a/kernel/signal.c b/kernel/signal.c
index d7d1adf735f4..ec136fda457a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -736,7 +736,7 @@ static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
 
 static inline int is_si_special(const struct siginfo *info)
 {
-	return info <= SEND_SIG_FORCED;
+	return info <= SEND_SIG_PRIV;
 }
 
 static inline bool si_fromuser(const struct siginfo *info)
@@ -1039,7 +1039,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 
 	result = TRACE_SIGNAL_IGNORED;
 	if (!prepare_signal(sig, t,
-			from_ancestor_ns || (info == SEND_SIG_PRIV) || (info == SEND_SIG_FORCED)))
+			from_ancestor_ns || (info == SEND_SIG_PRIV)))
 		goto ret;
 
 	pending = (type != PIDTYPE_PID) ? &t->signal->shared_pending : &t->pending;
@@ -1057,8 +1057,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
 	 * Skip useless siginfo allocation for SIGKILL SIGSTOP,
 	 * and kernel threads.
 	 */
-	if ((info == SEND_SIG_FORCED) ||
-	    sig_kernel_only(sig) || (t->flags & PF_KTHREAD))
+	if (sig_kernel_only(sig) || (t->flags & PF_KTHREAD))
 		goto out_set;
 
 	/*
-- 
cgit v1.2.3


From bfc456060d0cbcf6902a436d358b60cb1534668c Mon Sep 17 00:00:00 2001
From: Dennis Dalessandro <dennis.dalessandro@intel.com>
Date: Fri, 31 Aug 2018 10:34:14 -0700
Subject: IB/hfi1,PCI: Allow bus reset while probing

Calling into the new API to reset the secondary bus results in a deadlock.
This occurs because the device/bus is already locked at probe time.
Reverting back to the old behavior while the API is improved.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200985
Fixes: c6a44ba950d1 ("PCI: Rename pci_try_reset_bus() to pci_reset_bus()")
Fixes: 409888e0966e ("IB/hfi1: Use pci_try_reset_bus() for initiating PCI Secondary Bus Reset")
Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Cc: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/infiniband/hw/hfi1/pcie.c | 11 ++++-------
 drivers/pci/pci.c                 |  1 +
 include/linux/pci.h               |  3 +++
 3 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index eec83757d55f..6c967dde58e7 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -893,14 +893,11 @@ static int trigger_sbr(struct hfi1_devdata *dd)
 		}
 
 	/*
-	 * A secondary bus reset (SBR) issues a hot reset to our device.
-	 * The following routine does a 1s wait after the reset is dropped
-	 * per PCI Trhfa (recovery time).  PCIe 3.0 section 6.6.1 -
-	 * Conventional Reset, paragraph 3, line 35 also says that a 1s
-	 * delay after a reset is required.  Per spec requirements,
-	 * the link is either working or not after that point.
+	 * This is an end around to do an SBR during probe time. A new API needs
+	 * to be implemented to have cleaner interface but this fixes the
+	 * current brokenness
 	 */
-	return pci_reset_bus(dev);
+	return pci_bridge_secondary_bus_reset(dev->bus->self);
 }
 
 /*
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 30b260332a10..1835f3a7aa8d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4547,6 +4547,7 @@ int pci_bridge_secondary_bus_reset(struct pci_dev *dev)
 
 	return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS);
 }
+EXPORT_SYMBOL_GPL(pci_bridge_secondary_bus_reset);
 
 static int pci_parent_bus_reset(struct pci_dev *dev, int probe)
 {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index e72ca8dd6241..6925828f9f25 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1235,6 +1235,9 @@ void pci_bus_remove_resources(struct pci_bus *bus);
 int devm_request_pci_bus_resources(struct device *dev,
 				   struct list_head *resources);
 
+/* Temporary until new and working PCI SBR API in place */
+int pci_bridge_secondary_bus_reset(struct pci_dev *dev);
+
 #define pci_bus_for_each_resource(bus, res, i)				\
 	for (i = 0;							\
 	    (res = pci_bus_resource_n(bus, i)) || i < PCI_BRIDGE_RESOURCE_NUM; \
-- 
cgit v1.2.3


From d765edbb301c0e196015a59b17420558088ea33f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 10 Aug 2018 23:06:08 +0000
Subject: vmbus: add driver_override support

Add support for overriding the default driver for a VMBus device
in the same way that it can be done for PCI devices. This patch
adds the /sys/bus/vmbus/devices/.../driver_override file
and the logic for matching.

This is used by driverctl tool to do driver override.
https://na01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgitlab.com%2Fdriverctl%2Fdriverctl&amp;data=02%7C01%7Ckys%40microsoft.com%7C42e803feb2c544ef6ea908d5fd538878%7C72f988bf86f141af91ab2d7cd011db47%7C1%7C0%7C636693457619960040&amp;sdata=kEyYHRIjNZCk%2B37moCSqbrZL426YccNQrsWpENcrZdw%3D&amp;reserved=0

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-bus-vmbus |  21 ++++++
 drivers/hv/vmbus_drv.c                    | 115 +++++++++++++++++++++++++-----
 include/linux/hyperv.h                    |   1 +
 3 files changed, 118 insertions(+), 19 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-vmbus

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-vmbus b/Documentation/ABI/testing/sysfs-bus-vmbus
new file mode 100644
index 000000000000..91e6c065973c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-vmbus
@@ -0,0 +1,21 @@
+What:		/sys/bus/vmbus/devices/.../driver_override
+Date:		August 2019
+Contact:	Stephen Hemminger <sthemmin@microsoft.com>
+Description:
+		This file allows the driver for a device to be specified which
+		will override standard static and dynamic ID matching.  When
+		specified, only a driver with a name matching the value written
+		to driver_override will have an opportunity to bind to the
+		device.  The override is specified by writing a string to the
+		driver_override file (echo uio_hv_generic > driver_override) and
+		may be cleared with an empty string (echo > driver_override).
+		This returns the device to standard matching rules binding.
+		Writing to driver_override does not automatically unbind the
+		device from its current driver or make any attempt to
+		automatically load the specified driver.  If no driver with a
+		matching name is currently loaded in the kernel, the device
+		will not bind to any driver.  This also allows devices to
+		opt-out of driver binding using a driver_override name such as
+		"none".  Only a single driver may be specified in the override,
+		there is no support for parsing delimiters.
+
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index b1b548a21f91..e6d8fdac6d8b 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -498,6 +498,54 @@ static ssize_t device_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(device);
 
+static ssize_t driver_override_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+	char *driver_override, *old, *cp;
+
+	/* We need to keep extra room for a newline */
+	if (count >= (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	driver_override = kstrndup(buf, count, GFP_KERNEL);
+	if (!driver_override)
+		return -ENOMEM;
+
+	cp = strchr(driver_override, '\n');
+	if (cp)
+		*cp = '\0';
+
+	device_lock(dev);
+	old = hv_dev->driver_override;
+	if (strlen(driver_override)) {
+		hv_dev->driver_override = driver_override;
+	} else {
+		kfree(driver_override);
+		hv_dev->driver_override = NULL;
+	}
+	device_unlock(dev);
+
+	kfree(old);
+
+	return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct hv_device *hv_dev = device_to_hv_device(dev);
+	ssize_t len;
+
+	device_lock(dev);
+	len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+	device_unlock(dev);
+
+	return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
 /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
 static struct attribute *vmbus_dev_attrs[] = {
 	&dev_attr_id.attr,
@@ -528,6 +576,7 @@ static struct attribute *vmbus_dev_attrs[] = {
 	&dev_attr_channel_vp_mapping.attr,
 	&dev_attr_vendor.attr,
 	&dev_attr_device.attr,
+	&dev_attr_driver_override.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(vmbus_dev);
@@ -563,17 +612,26 @@ static inline bool is_null_guid(const uuid_le *guid)
 	return true;
 }
 
-/*
- * Return a matching hv_vmbus_device_id pointer.
- * If there is no match, return NULL.
- */
-static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
-					const uuid_le *guid)
+static const struct hv_vmbus_device_id *
+hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const uuid_le *guid)
+
+{
+	if (id == NULL)
+		return NULL; /* empty device table */
+
+	for (; !is_null_guid(&id->guid); id++)
+		if (!uuid_le_cmp(id->guid, *guid))
+			return id;
+
+	return NULL;
+}
+
+static const struct hv_vmbus_device_id *
+hv_vmbus_dynid_match(struct hv_driver *drv, const uuid_le *guid)
 {
 	const struct hv_vmbus_device_id *id = NULL;
 	struct vmbus_dynid *dynid;
 
-	/* Look at the dynamic ids first, before the static ones */
 	spin_lock(&drv->dynids.lock);
 	list_for_each_entry(dynid, &drv->dynids.list, node) {
 		if (!uuid_le_cmp(dynid->id.guid, *guid)) {
@@ -583,18 +641,37 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
 	}
 	spin_unlock(&drv->dynids.lock);
 
-	if (id)
-		return id;
+	return id;
+}
 
-	id = drv->id_table;
-	if (id == NULL)
-		return NULL; /* empty device table */
+static const struct hv_vmbus_device_id vmbus_device_null = {
+	.guid = NULL_UUID_LE,
+};
 
-	for (; !is_null_guid(&id->guid); id++)
-		if (!uuid_le_cmp(id->guid, *guid))
-			return id;
+/*
+ * Return a matching hv_vmbus_device_id pointer.
+ * If there is no match, return NULL.
+ */
+static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv,
+							struct hv_device *dev)
+{
+	const uuid_le *guid = &dev->dev_type;
+	const struct hv_vmbus_device_id *id;
 
-	return NULL;
+	/* When driver_override is set, only bind to the matching driver */
+	if (dev->driver_override && strcmp(dev->driver_override, drv->name))
+		return NULL;
+
+	/* Look at the dynamic ids first, before the static ones */
+	id = hv_vmbus_dynid_match(drv, guid);
+	if (!id)
+		id = hv_vmbus_dev_match(drv->id_table, guid);
+
+	/* driver_override will always match, send a dummy id */
+	if (!id && dev->driver_override)
+		id = &vmbus_device_null;
+
+	return id;
 }
 
 /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
@@ -643,7 +720,7 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf,
 	if (retval)
 		return retval;
 
-	if (hv_vmbus_get_id(drv, &guid))
+	if (hv_vmbus_dynid_match(drv, &guid))
 		return -EEXIST;
 
 	retval = vmbus_add_dynid(drv, &guid);
@@ -708,7 +785,7 @@ static int vmbus_match(struct device *device, struct device_driver *driver)
 	if (is_hvsock_channel(hv_dev->channel))
 		return drv->hvsock;
 
-	if (hv_vmbus_get_id(drv, &hv_dev->dev_type))
+	if (hv_vmbus_get_id(drv, hv_dev))
 		return 1;
 
 	return 0;
@@ -725,7 +802,7 @@ static int vmbus_probe(struct device *child_device)
 	struct hv_device *dev = device_to_hv_device(child_device);
 	const struct hv_vmbus_device_id *dev_id;
 
-	dev_id = hv_vmbus_get_id(drv, &dev->dev_type);
+	dev_id = hv_vmbus_get_id(drv, dev);
 	if (drv->probe) {
 		ret = drv->probe(dev, dev_id);
 		if (ret != 0)
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index efda23cf32c7..2c3798bcb01c 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1125,6 +1125,7 @@ struct hv_device {
 	u16 device_id;
 
 	struct device device;
+	char *driver_override; /* Driver name to force a match */
 
 	struct vmbus_channel *channel;
 	struct kset	     *channels_kset;
-- 
cgit v1.2.3


From cdc3d7f346476b4a4d1c4738b7dcdda4b3d5d870 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 5 Aug 2018 23:18:05 -0400
Subject: media: drm: shmobile: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 include/linux/platform_data/shmob_drm.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h
index ee495d707f17..fe815d7d9f58 100644
--- a/include/linux/platform_data/shmob_drm.h
+++ b/include/linux/platform_data/shmob_drm.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm.h  --  SH Mobile DRM driver
  *
  * Copyright (C) 2012 Renesas Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_H__
-- 
cgit v1.2.3


From 7c1a000d466235c875a989971cfda344e6bb1166 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 12 Sep 2018 09:16:07 +0800
Subject: f2fs: add SPDX license identifiers

Remove the verbose license text from f2fs files and replace them with
SPDX tags.  This does not change the license of any of the code.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/acl.c           | 5 +----
 fs/f2fs/acl.h           | 5 +----
 fs/f2fs/checkpoint.c    | 5 +----
 fs/f2fs/data.c          | 5 +----
 fs/f2fs/debug.c         | 5 +----
 fs/f2fs/dir.c           | 5 +----
 fs/f2fs/extent_cache.c  | 5 +----
 fs/f2fs/f2fs.h          | 5 +----
 fs/f2fs/file.c          | 5 +----
 fs/f2fs/gc.c            | 5 +----
 fs/f2fs/gc.h            | 5 +----
 fs/f2fs/hash.c          | 5 +----
 fs/f2fs/inline.c        | 4 +---
 fs/f2fs/inode.c         | 5 +----
 fs/f2fs/namei.c         | 5 +----
 fs/f2fs/node.c          | 5 +----
 fs/f2fs/node.h          | 5 +----
 fs/f2fs/recovery.c      | 5 +----
 fs/f2fs/segment.c       | 5 +----
 fs/f2fs/segment.h       | 5 +----
 fs/f2fs/shrinker.c      | 5 +----
 fs/f2fs/super.c         | 5 +----
 fs/f2fs/sysfs.c         | 5 +----
 fs/f2fs/trace.c         | 5 +----
 fs/f2fs/trace.h         | 5 +----
 fs/f2fs/xattr.c         | 5 +----
 fs/f2fs/xattr.h         | 5 +----
 include/linux/f2fs_fs.h | 5 +----
 28 files changed, 28 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index cd82e1ce5d67..fa707cdd4120 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/acl.c
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext2/acl.c
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/f2fs_fs.h>
 #include "f2fs.h"
diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h
index 2c685185c24d..b96823c59b15 100644
--- a/fs/f2fs/acl.h
+++ b/fs/f2fs/acl.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/acl.h
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext2/acl.h
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_ACL_H__
 #define __F2FS_ACL_H__
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 59d0472013f4..d312d2829d5a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/checkpoint.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/bio.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 26f38b224bb2..c8c4b54e2bbf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/data.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index ebe649d9793c..d3c402183e3c 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs debugging statistics
  *
@@ -5,10 +6,6 @@
  *             http://www.samsung.com/
  * Copyright (c) 2012 Linux Foundation
  * Copyright (c) 2012 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index bd0348cc860f..c77a58038709 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/dir.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index a70cd2580eae..904ad7ba5a45 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs extent cache support
  *
@@ -5,10 +6,6 @@
  * Copyright (c) 2015 Samsung Electronics
  * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
  *          Chao Yu <chao2.yu@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 88b8d5073581..079f525d5764 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/f2fs.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _LINUX_F2FS_H
 #define _LINUX_F2FS_H
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 61f95731c858..357422a4c319 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/file.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 5c8d00422237..a4c1a419611d 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/gc.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/module.h>
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index c8619e408009..bbac9d3787bd 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/gc.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #define GC_THREAD_MIN_WB_PAGES		1	/*
 						 * a threshold to determine
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index eb2e031ea887..cc82f142f811 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/hash.c
  *
@@ -7,10 +8,6 @@
  * Portions of this code from linux/fs/ext3/hash.c
  *
  * Copyright (C) 2002 by Theodore Ts'o
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/types.h>
 #include <linux/fs.h>
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 115dc219344b..425d740f87fd 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -1,11 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/inline.c
  * Copyright (c) 2013, Intel Corporation
  * Authors: Huajun Li <huajun.li@intel.com>
  *          Haicheng Li <haicheng.li@intel.com>
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/fs.h>
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 959df2249875..86e7333d60c1 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/inode.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 1f67e389169f..9ad451ac2cec 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/namei.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index dd2e45a661aa..fa2381c0bc47 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/node.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 0f4db7a61254..1c73d879a9bc 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/node.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 /* start node id of a node block dedicated to the given node id */
 #define	START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index e3aa6eee7a8b..56d34193a74b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/recovery.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 187c848a65b8..9a8d7d415a74 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/segment.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index b3d9e317ff0c..086150028c6d 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/segment.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 36cfd816c160..9e13db994fdf 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs shrinker support
  *   the basic infra was copied from fs/ubifs/shrinker.c
  *
  * Copyright (c) 2015 Motorola Mobility
  * Copyright (c) 2015 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3106da1d9be6..8c536105d5ef 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/super.c
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/module.h>
 #include <linux/init.h>
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 81c0e5337443..c8924c02accd 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs sysfs interface
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
  * Copyright (c) 2017 Chao Yu <chao@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/compiler.h>
 #include <linux/proc_fs.h>
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index a1fcd00bbb2b..ce2a5eb210b6 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs IO tracer
  *
  * Copyright (c) 2014 Motorola Mobility
  * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/trace.h b/fs/f2fs/trace.h
index 67db24ac1e85..e8075fc5b228 100644
--- a/fs/f2fs/trace.h
+++ b/fs/f2fs/trace.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * f2fs IO tracer
  *
  * Copyright (c) 2014 Motorola Mobility
  * Copyright (c) 2014 Jaegeuk Kim <jaegeuk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_TRACE_H__
 #define __F2FS_TRACE_H__
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 77a010e625f5..7261245c208d 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/xattr.c
  *
@@ -13,10 +14,6 @@
  *  suggestion of Luka Renko <luka.renko@hermes.si>.
  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
  *  Red Hat Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #include <linux/rwsem.h>
 #include <linux/f2fs_fs.h>
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index dbcd1d16e669..67db134da0f5 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * fs/f2fs/xattr.h
  *
@@ -9,10 +10,6 @@
  * On-disk format of extended attributes for the ext2 filesystem.
  *
  * (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef __F2FS_XATTR_H__
 #define __F2FS_XATTR_H__
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index f70f8ac9c4f4..1d4b196291d6 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /**
  * include/linux/f2fs_fs.h
  *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _LINUX_F2FS_FS_H
 #define _LINUX_F2FS_FS_H
-- 
cgit v1.2.3


From dfb06cba8c73c0704710b2e3fbe2c35ac66a01b4 Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Wed, 5 Sep 2018 13:31:40 -0700
Subject: uaccess: Fix is_source param for check_copy_size() in
 copy_to_iter_mcsafe()

copy_to_iter_mcsafe() is passing in the is_source parameter as "false"
to check_copy_size(). This is different than what copy_to_iter() does.
Also, the addr parameter passed to check_copy_size() is the source so
therefore we should be passing in "true" instead.

Fixes: 8780356ef630 ("x86/asm/memcpy_mcsafe: Define copy_to_iter_mcsafe()")
Cc: <stable@vger.kernel.org>
Reported-by: Fan Du <fan.du@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Reported-by: Wenwei Tao <wenwei.tww@alibaba-inc.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/uio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 409c845d4cd3..422b1c01ee0d 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -172,7 +172,7 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 static __always_inline __must_check
 size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
 {
-	if (unlikely(!check_copy_size(addr, bytes, false)))
+	if (unlikely(!check_copy_size(addr, bytes, true)))
 		return 0;
 	else
 		return _copy_to_iter_mcsafe(addr, bytes, i);
-- 
cgit v1.2.3


From 41124fa64d4b298b82266b7ddbefc43540b77b44 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:14 +0200
Subject: net: ethernet: Add helper to remove a supported link mode

Some MAC hardware cannot support a subset of link modes. e.g. often
1Gbps Full duplex is supported, but Half duplex is not. Add a helper
to remove such a link mode.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apm/xgene/xgene_enet_hw.c    |  6 +++---
 drivers/net/ethernet/cadence/macb_main.c          |  5 ++---
 drivers/net/ethernet/freescale/fec_main.c         |  3 ++-
 drivers/net/ethernet/microchip/lan743x_main.c     |  2 +-
 drivers/net/ethernet/renesas/ravb_main.c          |  3 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 12 ++++++++----
 drivers/net/phy/phy_device.c                      | 18 ++++++++++++++++++
 drivers/net/usb/lan78xx.c                         |  2 +-
 include/linux/phy.h                               |  1 +
 9 files changed, 38 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 078a04dc1182..4831f9de5945 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -895,9 +895,9 @@ int xgene_enet_phy_connect(struct net_device *ndev)
 	}
 
 	pdata->phy_speed = SPEED_UNKNOWN;
-	phy_dev->supported &= ~SUPPORTED_10baseT_Half &
-			      ~SUPPORTED_100baseT_Half &
-			      ~SUPPORTED_1000baseT_Half;
+	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
+	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 	phy_dev->supported |= SUPPORTED_Pause |
 			      SUPPORTED_Asym_Pause;
 	phy_dev->advertising = phy_dev->supported;
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index bd4095c3a031..96ae8c992810 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -549,9 +549,8 @@ static int macb_mii_probe(struct net_device *dev)
 		phy_set_max_speed(phydev, SPEED_100);
 
 	if (bp->caps & MACB_CAPS_NO_GIGABIT_HALF)
-		phydev->supported &= ~SUPPORTED_1000baseT_Half;
-
-	phydev->advertising = phydev->supported;
+		phy_remove_link_mode(phydev,
+				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 
 	bp->link = 0;
 	bp->speed = 0;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 5e849510c689..0c6fd77b6599 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1947,7 +1947,8 @@ static int fec_enet_mii_probe(struct net_device *ndev)
 	/* mask with MAC supported features */
 	if (fep->quirks & FEC_QUIRK_HAS_GBIT) {
 		phy_set_max_speed(phy_dev, 1000);
-		phy_dev->supported &= ~SUPPORTED_1000baseT_Half;
+		phy_remove_link_mode(phy_dev,
+				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 #if !defined(CONFIG_M5272)
 		phy_dev->supported |= SUPPORTED_Pause;
 #endif
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index e7dce79ff2c9..048307959c01 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -1013,7 +1013,7 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter)
 		goto return_error;
 
 	/* MAC doesn't support 1000T Half */
-	phydev->supported &= ~SUPPORTED_1000baseT_Half;
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 
 	/* support both flow controls */
 	phy->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index aff5516b781e..fb2a1125780d 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1074,7 +1074,8 @@ static int ravb_phy_init(struct net_device *ndev)
 	}
 
 	/* 10BASE is not supported */
-	phydev->supported &= ~PHY_10BT_FEATURES;
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT);
 
 	phy_attached_info(phydev);
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 3d7aec7a050b..3715a0a4af3c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -993,10 +993,14 @@ static int stmmac_init_phy(struct net_device *dev)
 	 * Half-duplex mode not supported with multiqueue
 	 * half-duplex can only works with single queue
 	 */
-	if (tx_cnt > 1)
-		phydev->supported &= ~(SUPPORTED_1000baseT_Half |
-				       SUPPORTED_100baseT_Half |
-				       SUPPORTED_10baseT_Half);
+	if (tx_cnt > 1) {
+		phy_remove_link_mode(phydev,
+				     ETHTOOL_LINK_MODE_10baseT_Half_BIT);
+		phy_remove_link_mode(phydev,
+				     ETHTOOL_LINK_MODE_100baseT_Half_BIT);
+		phy_remove_link_mode(phydev,
+				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+	}
 
 	/*
 	 * Broken HW is sometimes missing the pull-up resistor on the
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index db1172db1e7c..e9ca83a438b0 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1765,6 +1765,24 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed)
 }
 EXPORT_SYMBOL(phy_set_max_speed);
 
+/**
+ * phy_remove_link_mode - Remove a supported link mode
+ * @phydev: phy_device structure to remove link mode from
+ * @link_mode: Link mode to be removed
+ *
+ * Description: Some MACs don't support all link modes which the PHY
+ * does.  e.g. a 1G MAC often does not support 1000Half. Add a helper
+ * to remove a link mode.
+ */
+void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode)
+{
+	WARN_ON(link_mode > 31);
+
+	phydev->supported &= ~BIT(link_mode);
+	phydev->advertising = phydev->supported;
+}
+EXPORT_SYMBOL(phy_remove_link_mode);
+
 static void of_set_phy_supported(struct phy_device *phydev)
 {
 	struct device_node *node = phydev->mdio.dev.of_node;
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 3ce3c66559e4..95a98a20b2e3 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -2166,7 +2166,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
 	}
 
 	/* MAC doesn't support 1000T Half */
-	phydev->supported &= ~SUPPORTED_1000baseT_Half;
+	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 
 	/* support both flow controls */
 	dev->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index cd6f637cbbfb..9c4c3eca8cf2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1049,6 +1049,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd);
 int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
+void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From af8d9bb2f2f405ad541794b46f9d7bc70f13e5cb Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:15 +0200
Subject: net: ethernet: Add helper for MACs which support asym pause

Rather than have the MAC drivers manipulate phydev members to indicate
they support Asym Pause, add a helper function.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c             |  4 ++--
 drivers/net/ethernet/apm/xgene/xgene_enet_hw.c          |  4 +---
 drivers/net/ethernet/broadcom/sb1250-mac.c              |  5 +----
 drivers/net/ethernet/broadcom/tg3.c                     |  8 ++------
 drivers/net/ethernet/cortina/gemini.c                   |  3 +--
 drivers/net/ethernet/dnet.c                             |  4 +---
 drivers/net/ethernet/faraday/ftgmac100.c                |  3 +--
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c          |  3 +--
 drivers/net/ethernet/freescale/gianfar.c                |  4 ++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c |  4 +---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c             |  6 +-----
 drivers/net/ethernet/microchip/lan743x_main.c           |  5 +----
 drivers/net/ethernet/smsc/smsc911x.c                    |  3 +--
 drivers/net/ethernet/smsc/smsc9420.c                    |  3 +--
 drivers/net/ethernet/socionext/sni_ave.c                |  3 ++-
 drivers/net/phy/phy_device.c                            | 13 +++++++++++++
 include/linux/phy.h                                     |  1 +
 17 files changed, 33 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 3ceb4f95ca7c..289129011b9f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -879,8 +879,8 @@ static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
 	phy_write(phy_data->phydev, 0x00, 0x9140);
 
 	phy_data->phydev->supported = PHY_GBIT_FEATURES;
-	phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 	phy_data->phydev->advertising = phy_data->phydev->supported;
+	phy_support_asym_pause(phy_data->phydev);
 
 	netif_dbg(pdata, drv, pdata->netdev,
 		  "Finisar PHY quirk in place\n");
@@ -951,8 +951,8 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
 	phy_write(phy_data->phydev, 0x00, reg & ~0x00800);
 
 	phy_data->phydev->supported = PHY_GBIT_FEATURES;
-	phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 	phy_data->phydev->advertising = phy_data->phydev->supported;
+	phy_support_asym_pause(phy_data->phydev);
 
 	netif_dbg(pdata, drv, pdata->netdev,
 		  "BelFuse PHY quirk in place\n");
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 4831f9de5945..e3560311711a 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -898,9 +898,7 @@ int xgene_enet_phy_connect(struct net_device *ndev)
 	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_10baseT_Half_BIT);
 	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
 	phy_remove_link_mode(phy_dev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
-	phy_dev->supported |= SUPPORTED_Pause |
-			      SUPPORTED_Asym_Pause;
-	phy_dev->advertising = phy_dev->supported;
+	phy_support_asym_pause(phy_dev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 4ce4b097ec05..53acbbb36637 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2358,13 +2358,10 @@ static int sbmac_mii_probe(struct net_device *dev)
 
 	/* Remove any features not supported by the controller */
 	phy_set_max_speed(phy_dev, SPEED_1000);
-	phy_dev->supported |= SUPPORTED_Pause |
-			      SUPPORTED_Asym_Pause;
+	phy_support_asym_pause(phy_dev);
 
 	phy_attached_info(phy_dev);
 
-	phy_dev->advertising = phy_dev->supported;
-
 	sc->phy_dev = phy_dev;
 
 	return 0;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index eab00239a47a..193e990fac7a 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -2123,15 +2123,13 @@ static int tg3_phy_init(struct tg3 *tp)
 	case PHY_INTERFACE_MODE_RGMII:
 		if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) {
 			phy_set_max_speed(phydev, SPEED_1000);
-			phydev->supported |= (SUPPORTED_Pause |
-					      SUPPORTED_Asym_Pause);
+			phy_support_asym_pause(phydev);
 			break;
 		}
 		/* fallthru */
 	case PHY_INTERFACE_MODE_MII:
 		phy_set_max_speed(phydev, SPEED_100);
-		phydev->supported |= (SUPPORTED_Pause |
-				      SUPPORTED_Asym_Pause);
+		phy_support_asym_pause(phydev);
 		break;
 	default:
 		phy_disconnect(mdiobus_get_phy(tp->mdio_bus, tp->phy_addr));
@@ -2140,8 +2138,6 @@ static int tg3_phy_init(struct tg3 *tp)
 
 	tp->phy_flags |= TG3_PHYFLG_IS_CONNECTED;
 
-	phydev->advertising = phydev->supported;
-
 	phy_attached_info(phydev);
 
 	return 0;
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 2b46c0de90d0..ceec467f590d 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -373,8 +373,7 @@ static int gmac_setup_phy(struct net_device *netdev)
 	netdev->phydev = phy;
 
 	phy_set_max_speed(phy, SPEED_1000);
-	phy->supported |= SUPPORTED_Asym_Pause | SUPPORTED_Pause;
-	phy->advertising = phy->supported;
+	phy_support_asym_pause(phy);
 
 	/* set PHY interface type */
 	switch (phy->interface) {
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 08b7ad1594ce..79521e27f0d1 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -288,9 +288,7 @@ static int dnet_mii_probe(struct net_device *dev)
 	else
 		phy_set_max_speed(phydev, SPEED_100);
 
-	phydev->supported |= SUPPORTED_Asym_Pause | SUPPORTED_Pause;
-
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	bp->link = 0;
 	bp->speed = 0;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index ed6c76d20b45..3f319ee66ab4 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1079,8 +1079,7 @@ static int ftgmac100_mii_probe(struct ftgmac100 *priv, phy_interface_t intf)
 	/* Indicate that we support PAUSE frames (see comment in
 	 * Documentation/networking/phy.txt)
 	 */
-	phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	/* Display what we found */
 	phy_attached_info(phydev);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 783134f1b779..a5131a510e8b 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2491,8 +2491,7 @@ static int dpaa_phy_init(struct net_device *net_dev)
 
 	/* Remove any features not supported by the controller */
 	phy_dev->supported &= mac_dev->if_support;
-	phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-	phy_dev->advertising = phy_dev->supported;
+	phy_support_asym_pause(phy_dev);
 
 	mac_dev->phy_dev = phy_dev;
 	net_dev->phydev = phy_dev;
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index f27f9bae1a4a..40a1a87cd338 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1814,8 +1814,8 @@ static int init_phy(struct net_device *dev)
 	phydev->supported &= (GFAR_SUPPORTED | gigabit_support);
 	phydev->advertising = phydev->supported;
 
-	/* Add support for flow control, but don't advertise it by default */
-	phydev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+	/* Add support for flow control */
+	phy_support_asym_pause(phydev);
 
 	/* disable EEE autoneg, EEE not supported by eTSEC */
 	memset(&edata, 0, sizeof(struct ethtool_eee));
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 05b15d254e32..24b1f2a0c32a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -211,9 +211,7 @@ int hclge_mac_connect_phy(struct hclge_dev *hdev)
 	}
 
 	phydev->supported &= HCLGE_PHY_SUPPORTED_FEATURES;
-	phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index db231bda7c2a..cc1e9a96a43b 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -355,12 +355,8 @@ static int mtk_phy_connect(struct net_device *dev)
 	dev->phydev->speed = 0;
 	dev->phydev->duplex = 0;
 
-	if (of_phy_is_fixed_link(mac->of_node))
-		dev->phydev->supported |=
-		SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-
 	phy_set_max_speed(dev->phydev, SPEED_1000);
-	dev->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	phy_support_asym_pause(dev->phydev);
 	dev->phydev->advertising = dev->phydev->supported |
 				    ADVERTISED_Autoneg;
 	phy_start_aneg(dev->phydev);
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 048307959c01..b1a0e657febf 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -999,7 +999,6 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter)
 	struct phy_device *phydev;
 	struct net_device *netdev;
 	int ret = -EIO;
-	u32 mii_adv;
 
 	netdev = adapter->netdev;
 	phydev = phy_find_first(adapter->mdiobus);
@@ -1016,10 +1015,8 @@ static int lan743x_phy_open(struct lan743x_adapter *adapter)
 	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 
 	/* support both flow controls */
+	phy_support_asym_pause(phydev);
 	phy->fc_request_control = (FLOW_CTRL_RX | FLOW_CTRL_TX);
-	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-	mii_adv = (u32)mii_advertise_flowctrl(phy->fc_request_control);
-	phydev->advertising |= mii_adv_to_ethtool_adv_t(mii_adv);
 	phy->fc_autoneg = phydev->autoneg;
 
 	phy_start(phydev);
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index 3e34bf53f055..c009407618d9 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -1051,8 +1051,7 @@ static int smsc911x_mii_probe(struct net_device *dev)
 	phy_set_max_speed(phydev, SPEED_100);
 
 	/* mask with MAC supported features */
-	phydev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	pdata->last_duplex = -1;
 	pdata->last_carrier = -1;
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index 326177384544..9b6366b20110 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -1138,8 +1138,7 @@ static int smsc9420_mii_probe(struct net_device *dev)
 	phy_set_max_speed(phydev, SPEED_100);
 
 	/* mask with MAC supported features */
-	phydev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-	phydev->advertising = phydev->supported;
+	phy_support_asym_pause(phydev);
 
 	phy_attached_info(phydev);
 
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 76ff364c40e9..a50720ec109c 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1225,7 +1225,8 @@ static int ave_init(struct net_device *ndev)
 
 	if (!phy_interface_is_rgmii(phydev))
 		phy_set_max_speed(phydev, SPEED_100);
-	phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
+	phy_support_asym_pause(phydev);
 
 	phy_attached_info(phydev);
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index e9ca83a438b0..a0646a66f005 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1783,6 +1783,19 @@ void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode)
 }
 EXPORT_SYMBOL(phy_remove_link_mode);
 
+/**
+ * phy_support_asym_pause - Enable support of asym pause
+ * @phydev: target phy_device struct
+ *
+ * Description: Called by the MAC to indicate is supports Asym Pause.
+ */
+void phy_support_asym_pause(struct phy_device *phydev)
+{
+	phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	phydev->advertising = phydev->supported;
+}
+EXPORT_SYMBOL(phy_support_asym_pause);
+
 static void of_set_phy_supported(struct phy_device *phydev)
 {
 	struct device_node *node = phydev->mdio.dev.of_node;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9c4c3eca8cf2..e2db819807c1 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1050,6 +1050,7 @@ int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
+void phy_support_asym_pause(struct phy_device *phydev);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From c306ad36184fb7d0bd53f45441f45c1810e88a53 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:16 +0200
Subject: net: ethernet: Add helper for MACs which support pause

Rather than have the MAC drivers manipulate phydev members, add a
helper function for MACs supporting Pause, but not Asym Pause.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcm63xx_enet.c |  2 +-
 drivers/net/ethernet/freescale/fec_main.c    |  4 +---
 drivers/net/phy/phy_device.c                 | 14 ++++++++++++++
 include/linux/phy.h                          |  1 +
 4 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 2eee9459c2cf..9f25667c38e6 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -890,7 +890,7 @@ static int bcm_enet_open(struct net_device *dev)
 		}
 
 		/* mask with MAC supported features */
-		phydev->supported |= SUPPORTED_Pause;
+		phy_support_sym_pause(phydev);
 		phy_set_max_speed(phydev, SPEED_100);
 
 		if (priv->pause_auto && priv->pause_rx && priv->pause_tx)
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 0c6fd77b6599..05ce0903391a 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1950,14 +1950,12 @@ static int fec_enet_mii_probe(struct net_device *ndev)
 		phy_remove_link_mode(phy_dev,
 				     ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
 #if !defined(CONFIG_M5272)
-		phy_dev->supported |= SUPPORTED_Pause;
+		phy_support_sym_pause(phy_dev);
 #endif
 	}
 	else
 		phy_set_max_speed(phy_dev, 100);
 
-	phy_dev->advertising = phy_dev->supported;
-
 	fep->link = 0;
 	fep->full_duplex = 0;
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index a0646a66f005..e657d5ae2ab8 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1783,6 +1783,20 @@ void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode)
 }
 EXPORT_SYMBOL(phy_remove_link_mode);
 
+/**
+ * phy_support_sym_pause - Enable support of symmetrical pause
+ * @phydev: target phy_device struct
+ *
+ * Description: Called by the MAC to indicate is supports symmetrical
+ * Pause, but not asym pause.
+ */
+void phy_support_sym_pause(struct phy_device *phydev)
+{
+	phydev->supported |= SUPPORTED_Pause;
+	phydev->advertising = phydev->supported;
+}
+EXPORT_SYMBOL(phy_support_sym_pause);
+
 /**
  * phy_support_asym_pause - Enable support of asym pause
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e2db819807c1..bc5d6c3f1388 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1050,6 +1050,7 @@ int phy_start_interrupts(struct phy_device *phydev);
 void phy_print_status(struct phy_device *phydev);
 int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
+void phy_support_sym_pause(struct phy_device *phydev);
 void phy_support_asym_pause(struct phy_device *phydev);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
-- 
cgit v1.2.3


From 70814e819c1139e5e7faacb3700eab5eac559272 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:17 +0200
Subject: net: ethernet: Add helper for set_pauseparam for Asym Pause

ethtool can be used to enable/disable pause. Add a helper to configure
the PHY when asym pause is supported.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/apm/xgene/xgene_enet_ethtool.c    | 26 ++----------
 drivers/net/ethernet/aurora/nb8800.c               |  9 +---
 drivers/net/ethernet/broadcom/tg3.c                | 43 ++++++-------------
 drivers/net/ethernet/faraday/ftgmac100.c           | 17 ++------
 drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c | 23 +---------
 drivers/net/ethernet/freescale/gianfar_ethtool.c   | 49 +++++++---------------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    |  8 +---
 drivers/net/ethernet/socionext/sni_ave.c           | 11 +----
 drivers/net/phy/phy_device.c                       | 30 +++++++++++++
 include/linux/phy.h                                |  1 +
 10 files changed, 69 insertions(+), 148 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
index 4f50f11718f4..dfe03afd00b0 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
@@ -306,7 +306,6 @@ static int xgene_set_pauseparam(struct net_device *ndev,
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
 	struct phy_device *phydev = ndev->phydev;
-	u32 oldadv, newadv;
 
 	if (phy_interface_mode_is_rgmii(pdata->phy_mode) ||
 	    pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
@@ -322,29 +321,12 @@ static int xgene_set_pauseparam(struct net_device *ndev,
 		pdata->tx_pause = pp->tx_pause;
 		pdata->rx_pause = pp->rx_pause;
 
-		oldadv = phydev->advertising;
-		newadv = oldadv & ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+		phy_set_asym_pause(phydev, pp->rx_pause,  pp->tx_pause);
 
-		if (pp->rx_pause)
-			newadv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-
-		if (pp->tx_pause)
-			newadv ^= ADVERTISED_Asym_Pause;
-
-		if (oldadv ^ newadv) {
-			phydev->advertising = newadv;
-
-			if (phydev->autoneg)
-				return phy_start_aneg(phydev);
-
-			if (!pp->autoneg) {
-				pdata->mac_ops->flowctl_tx(pdata,
-							   pdata->tx_pause);
-				pdata->mac_ops->flowctl_rx(pdata,
-							   pdata->rx_pause);
-			}
+		if (!pp->autoneg) {
+			pdata->mac_ops->flowctl_tx(pdata, pdata->tx_pause);
+			pdata->mac_ops->flowctl_rx(pdata, pdata->rx_pause);
 		}
-
 	} else {
 		if (pp->autoneg)
 			return -EINVAL;
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index c8d1f8fa4713..6f56276015a4 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -935,18 +935,11 @@ static void nb8800_pause_adv(struct net_device *dev)
 {
 	struct nb8800_priv *priv = netdev_priv(dev);
 	struct phy_device *phydev = dev->phydev;
-	u32 adv = 0;
 
 	if (!phydev)
 		return;
 
-	if (priv->pause_rx)
-		adv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-	if (priv->pause_tx)
-		adv ^= ADVERTISED_Asym_Pause;
-
-	phydev->supported |= adv;
-	phydev->advertising |= adv;
+	phy_set_asym_pause(phydev, priv->pause_rx, priv->pause_tx);
 }
 
 static int nb8800_open(struct net_device *dev)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 193e990fac7a..b2a3d008e1df 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12492,7 +12492,6 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 		tg3_warn_mgmt_link_flap(tp);
 
 	if (tg3_flag(tp, USE_PHYLIB)) {
-		u32 newadv;
 		struct phy_device *phydev;
 
 		phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
@@ -12503,20 +12502,16 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 			return -EINVAL;
 
 		tp->link_config.flowctrl = 0;
+		phy_set_asym_pause(phydev, epause->rx_pause, epause->tx_pause);
 		if (epause->rx_pause) {
 			tp->link_config.flowctrl |= FLOW_CTRL_RX;
 
 			if (epause->tx_pause) {
 				tp->link_config.flowctrl |= FLOW_CTRL_TX;
-				newadv = ADVERTISED_Pause;
-			} else
-				newadv = ADVERTISED_Pause |
-					 ADVERTISED_Asym_Pause;
+			}
 		} else if (epause->tx_pause) {
 			tp->link_config.flowctrl |= FLOW_CTRL_TX;
-			newadv = ADVERTISED_Asym_Pause;
-		} else
-			newadv = 0;
+		}
 
 		if (epause->autoneg)
 			tg3_flag_set(tp, PAUSE_AUTONEG);
@@ -12524,33 +12519,19 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 			tg3_flag_clear(tp, PAUSE_AUTONEG);
 
 		if (tp->phy_flags & TG3_PHYFLG_IS_CONNECTED) {
-			u32 oldadv = phydev->advertising &
-				     (ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-			if (oldadv != newadv) {
-				phydev->advertising &=
-					~(ADVERTISED_Pause |
-					  ADVERTISED_Asym_Pause);
-				phydev->advertising |= newadv;
-				if (phydev->autoneg) {
-					/*
-					 * Always renegotiate the link to
-					 * inform our link partner of our
-					 * flow control settings, even if the
-					 * flow control is forced.  Let
-					 * tg3_adjust_link() do the final
-					 * flow control setup.
-					 */
-					return phy_start_aneg(phydev);
-				}
+			if (phydev->autoneg) {
+				/* phy_set_asym_pause() will
+				 * renegotiate the link to inform our
+				 * link partner of our flow control
+				 * settings, even if the flow control
+				 * is forced.  Let tg3_adjust_link()
+				 * do the final flow control setup.
+				 */
+				return 0;
 			}
 
 			if (!epause->autoneg)
 				tg3_setup_flow_control(tp, 0, 0);
-		} else {
-			tp->link_config.advertising &=
-					~(ADVERTISED_Pause |
-					  ADVERTISED_Asym_Pause);
-			tp->link_config.advertising |= newadv;
 		}
 	} else {
 		int irq_sync = 0;
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 3f319ee66ab4..d8ead7e4177e 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1219,22 +1219,11 @@ static int ftgmac100_set_pauseparam(struct net_device *netdev,
 	priv->tx_pause = pause->tx_pause;
 	priv->rx_pause = pause->rx_pause;
 
-	if (phydev) {
-		phydev->advertising &= ~ADVERTISED_Pause;
-		phydev->advertising &= ~ADVERTISED_Asym_Pause;
+	if (phydev)
+		phy_set_asym_pause(phydev, pause->rx_pause, pause->tx_pause);
 
-		if (pause->rx_pause) {
-			phydev->advertising |= ADVERTISED_Pause;
-			phydev->advertising |= ADVERTISED_Asym_Pause;
-		}
-
-		if (pause->tx_pause)
-			phydev->advertising ^= ADVERTISED_Asym_Pause;
-	}
 	if (netif_running(netdev)) {
-		if (phydev && priv->aneg_pause)
-			phy_start_aneg(phydev);
-		else
+		if (!(phydev && priv->aneg_pause))
 			ftgmac100_config_pause(priv);
 	}
 
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 3184c8f7cdd0..1f8cdbc4378c 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -210,29 +210,8 @@ static int dpaa_set_pauseparam(struct net_device *net_dev,
 	/* Determine the sym/asym advertised PAUSE capabilities from the desired
 	 * rx/tx pause settings.
 	 */
-	newadv = 0;
-	if (epause->rx_pause)
-		newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-	if (epause->tx_pause)
-		newadv ^= ADVERTISED_Asym_Pause;
 
-	oldadv = phydev->advertising &
-			(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-
-	/* If there are differences between the old and the new advertised
-	 * values, restart PHY autonegotiation and advertise the new values.
-	 */
-	if (oldadv != newadv) {
-		phydev->advertising &= ~(ADVERTISED_Pause
-				| ADVERTISED_Asym_Pause);
-		phydev->advertising |= newadv;
-		if (phydev->autoneg) {
-			err = phy_start_aneg(phydev);
-			if (err < 0)
-				netdev_err(net_dev, "phy_start_aneg() = %d\n",
-					   err);
-		}
-	}
+	phy_set_asym_pause(phydev, epause->rx_pause, epause->tx_pause);
 
 	fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
 	err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 395a5266ea30..3545e8f715f2 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -503,7 +503,6 @@ static int gfar_spauseparam(struct net_device *dev,
 	struct gfar_private *priv = netdev_priv(dev);
 	struct phy_device *phydev = dev->phydev;
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
-	u32 oldadv, newadv;
 
 	if (!phydev)
 		return -ENODEV;
@@ -514,54 +513,36 @@ static int gfar_spauseparam(struct net_device *dev,
 		return -EINVAL;
 
 	priv->rx_pause_en = priv->tx_pause_en = 0;
+	phy_set_asym_pause(phydev, epause->rx_pause, epause->tx_pause);
 	if (epause->rx_pause) {
 		priv->rx_pause_en = 1;
 
 		if (epause->tx_pause) {
 			priv->tx_pause_en = 1;
-			/* FLOW_CTRL_RX & TX */
-			newadv = ADVERTISED_Pause;
-		} else  /* FLOW_CTLR_RX */
-			newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+		}
 	} else if (epause->tx_pause) {
 		priv->tx_pause_en = 1;
-		/* FLOW_CTLR_TX */
-		newadv = ADVERTISED_Asym_Pause;
-	} else
-		newadv = 0;
+	}
 
 	if (epause->autoneg)
 		priv->pause_aneg_en = 1;
 	else
 		priv->pause_aneg_en = 0;
 
-	oldadv = phydev->advertising &
-		(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-	if (oldadv != newadv) {
-		phydev->advertising &=
-			~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-		phydev->advertising |= newadv;
-		if (phydev->autoneg)
-			/* inform link partner of our
-			 * new flow ctrl settings
-			 */
-			return phy_start_aneg(phydev);
-
-		if (!epause->autoneg) {
-			u32 tempval;
-			tempval = gfar_read(&regs->maccfg1);
-			tempval &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
-
-			priv->tx_actual_en = 0;
-			if (priv->tx_pause_en) {
-				priv->tx_actual_en = 1;
-				tempval |= MACCFG1_TX_FLOW;
-			}
+	if (!epause->autoneg) {
+		u32 tempval = gfar_read(&regs->maccfg1);
 
-			if (priv->rx_pause_en)
-				tempval |= MACCFG1_RX_FLOW;
-			gfar_write(&regs->maccfg1, tempval);
+		tempval &= ~(MACCFG1_TX_FLOW | MACCFG1_RX_FLOW);
+
+		priv->tx_actual_en = 0;
+		if (priv->tx_pause_en) {
+			priv->tx_actual_en = 1;
+			tempval |= MACCFG1_TX_FLOW;
 		}
+
+		if (priv->rx_pause_en)
+			tempval |= MACCFG1_RX_FLOW;
+		gfar_write(&regs->maccfg1, tempval);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c56db06b63e0..cf18608669f5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5228,13 +5228,7 @@ static void hclge_set_flowctrl_adv(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
 	if (!phydev)
 		return;
 
-	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-
-	if (rx_en)
-		phydev->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-
-	if (tx_en)
-		phydev->advertising ^= ADVERTISED_Asym_Pause;
+	phy_set_asym_pause(phydev, rx_en, tx_en);
 }
 
 static int hclge_cfg_pauseparam(struct hclge_dev *hdev, u32 rx_en, u32 tx_en)
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index a50720ec109c..61e6abb966ac 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -461,16 +461,7 @@ static int ave_ethtool_set_pauseparam(struct net_device *ndev,
 	priv->pause_rx   = pause->rx_pause;
 	priv->pause_tx   = pause->tx_pause;
 
-	phydev->advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-	if (pause->rx_pause)
-		phydev->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
-	if (pause->tx_pause)
-		phydev->advertising ^= ADVERTISED_Asym_Pause;
-
-	if (pause->autoneg) {
-		if (netif_running(ndev))
-			phy_start_aneg(phydev);
-	}
+	phy_set_asym_pause(phydev, pause->rx_pause, pause->tx_pause);
 
 	return 0;
 }
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index e657d5ae2ab8..5732d89c8e37 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1810,6 +1810,36 @@ void phy_support_asym_pause(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_support_asym_pause);
 
+/**
+ * phy_set_asym_pause - Configure Pause and Asym Pause
+ * @phydev: target phy_device struct
+ * @rx: Receiver Pause is supported
+ * @tx: Transmit Pause is supported
+ *
+ * Description: Configure advertised Pause support depending on if
+ * transmit and receiver pause is supported. If there has been a
+ * change in adverting, trigger a new autoneg. Generally called from
+ * the set_pauseparam .ndo.
+ */
+void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx)
+{
+	u16 oldadv = phydev->advertising;
+	u16 newadv = oldadv &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+
+	if (rx)
+		newadv |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	if (tx)
+		newadv ^= SUPPORTED_Asym_Pause;
+
+	if (oldadv != newadv) {
+		phydev->advertising = newadv;
+
+		if (phydev->autoneg)
+			phy_start_aneg(phydev);
+	}
+}
+EXPORT_SYMBOL(phy_set_asym_pause);
+
 static void of_set_phy_supported(struct phy_device *phydev)
 {
 	struct device_node *node = phydev->mdio.dev.of_node;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index bc5d6c3f1388..e4062ba7472f 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1052,6 +1052,7 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
 void phy_support_sym_pause(struct phy_device *phydev);
 void phy_support_asym_pause(struct phy_device *phydev);
+void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From 0c122405d4c3ec638ba00865c872ec5a3ed1a6c0 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:18 +0200
Subject: net: ethernet: Add helper for set_pauseparam for Pause

ethtool can be used to enable/disable pause. Add a helper to configure
the PHY when Pause is supported.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bcm63xx_enet.c |  7 ++-----
 drivers/net/ethernet/freescale/fec_main.c    |  9 ++-------
 drivers/net/phy/phy_device.c                 | 23 +++++++++++++++++++++++
 include/linux/phy.h                          |  2 ++
 4 files changed, 29 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 9f25667c38e6..02e7dfc1a2ef 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -892,11 +892,8 @@ static int bcm_enet_open(struct net_device *dev)
 		/* mask with MAC supported features */
 		phy_support_sym_pause(phydev);
 		phy_set_max_speed(phydev, SPEED_100);
-
-		if (priv->pause_auto && priv->pause_rx && priv->pause_tx)
-			phydev->advertising |= SUPPORTED_Pause;
-		else
-			phydev->advertising &= ~SUPPORTED_Pause;
+		phy_set_sym_pause(phydev, priv->pause_rx, priv->pause_rx,
+				  priv->pause_auto);
 
 		phy_attached_info(phydev);
 
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 05ce0903391a..2e0bb90131b6 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2229,13 +2229,8 @@ static int fec_enet_set_pauseparam(struct net_device *ndev,
 	fep->pause_flag |= pause->rx_pause ? FEC_PAUSE_FLAG_ENABLE : 0;
 	fep->pause_flag |= pause->autoneg ? FEC_PAUSE_FLAG_AUTONEG : 0;
 
-	if (pause->rx_pause || pause->autoneg) {
-		ndev->phydev->supported |= ADVERTISED_Pause;
-		ndev->phydev->advertising |= ADVERTISED_Pause;
-	} else {
-		ndev->phydev->supported &= ~ADVERTISED_Pause;
-		ndev->phydev->advertising &= ~ADVERTISED_Pause;
-	}
+	phy_set_sym_pause(ndev->phydev, pause->rx_pause, pause->tx_pause,
+			  pause->autoneg);
 
 	if (pause->autoneg) {
 		if (netif_running(ndev))
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 5732d89c8e37..de95f1e072e9 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1810,6 +1810,29 @@ void phy_support_asym_pause(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_support_asym_pause);
 
+/**
+ * phy_set_sym_pause - Configure symmetric Pause
+ * @phydev: target phy_device struct
+ * @rx: Receiver Pause is supported
+ * @tx: Transmit Pause is supported
+ * @autoneg: Auto neg should be used
+ *
+ * Description: Configure advertised Pause support depending on if
+ * receiver pause and pause auto neg is supported. Generally called
+ * from the set_pauseparam .ndo.
+ */
+void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx,
+		       bool autoneg)
+{
+	phydev->supported &= ~SUPPORTED_Pause;
+
+	if (rx && tx && autoneg)
+		phydev->supported |= SUPPORTED_Pause;
+
+	phydev->advertising = phydev->supported;
+}
+EXPORT_SYMBOL(phy_set_sym_pause);
+
 /**
  * phy_set_asym_pause - Configure Pause and Asym Pause
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e4062ba7472f..8521391ebb20 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1052,6 +1052,8 @@ int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
 void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
 void phy_support_sym_pause(struct phy_device *phydev);
 void phy_support_asym_pause(struct phy_device *phydev);
+void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx,
+		       bool autoneg);
 void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
-- 
cgit v1.2.3


From 22b7d29926b577ff4f480611380d03268545b787 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 12 Sep 2018 01:53:19 +0200
Subject: net: ethernet: Add helper to determine if pause configuration is
 supported

Rather than have MAC drivers open code the test, add a helper in
phylib. This will help when we change the type of phydev->supported.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c |  4 +---
 drivers/net/ethernet/broadcom/tg3.c                 |  4 +---
 drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c  |  4 +---
 drivers/net/ethernet/freescale/gianfar_ethtool.c    |  4 +---
 drivers/net/phy/phy_device.c                        | 20 ++++++++++++++++++++
 include/linux/phy.h                                 |  2 ++
 6 files changed, 26 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
index dfe03afd00b0..78dd09b5beeb 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
@@ -312,9 +312,7 @@ static int xgene_set_pauseparam(struct net_device *ndev,
 		if (!phydev)
 			return -EINVAL;
 
-		if (!(phydev->supported & SUPPORTED_Pause) ||
-		    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-		     pp->rx_pause != pp->tx_pause))
+		if (!phy_validate_pause(phydev, pp))
 			return -EINVAL;
 
 		pdata->pause_autoneg = pp->autoneg;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index b2a3d008e1df..fb0e458e25b7 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12496,9 +12496,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
 
 		phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr);
 
-		if (!(phydev->supported & SUPPORTED_Pause) ||
-		    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-		     (epause->rx_pause != epause->tx_pause)))
+		if (!phy_validate_pause(phydev, epause))
 			return -EINVAL;
 
 		tp->link_config.flowctrl = 0;
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
index 1f8cdbc4378c..5d0fdf667b82 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -194,9 +194,7 @@ static int dpaa_set_pauseparam(struct net_device *net_dev,
 		return -ENODEV;
 	}
 
-	if (!(phydev->supported & SUPPORTED_Pause) ||
-	    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-	    (epause->rx_pause != epause->tx_pause)))
+	if (!phy_validate_pause(phydev, epause))
 		return -EINVAL;
 
 	/* The MAC should know how to handle PAUSE frame autonegotiation before
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 3545e8f715f2..d3662965f59d 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -507,9 +507,7 @@ static int gfar_spauseparam(struct net_device *dev,
 	if (!phydev)
 		return -ENODEV;
 
-	if (!(phydev->supported & SUPPORTED_Pause) ||
-	    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
-	     (epause->rx_pause != epause->tx_pause)))
+	if (!phy_validate_pause(phydev, epause))
 		return -EINVAL;
 
 	priv->rx_pause_en = priv->tx_pause_en = 0;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index de95f1e072e9..af64a9320fb0 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1863,6 +1863,26 @@ void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx)
 }
 EXPORT_SYMBOL(phy_set_asym_pause);
 
+/**
+ * phy_validate_pause - Test if the PHY/MAC support the pause configuration
+ * @phydev: phy_device struct
+ * @pp: requested pause configuration
+ *
+ * Description: Test if the PHY/MAC combination supports the Pause
+ * configuration the user is requesting. Returns True if it is
+ * supported, false otherwise.
+ */
+bool phy_validate_pause(struct phy_device *phydev,
+			struct ethtool_pauseparam *pp)
+{
+	if (!(phydev->supported & SUPPORTED_Pause) ||
+	    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
+	     pp->rx_pause != pp->tx_pause))
+		return false;
+	return true;
+}
+EXPORT_SYMBOL(phy_validate_pause);
+
 static void of_set_phy_supported(struct phy_device *phydev)
 {
 	struct device_node *node = phydev->mdio.dev.of_node;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 8521391ebb20..192a1fa0c73b 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1055,6 +1055,8 @@ void phy_support_asym_pause(struct phy_device *phydev);
 void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx,
 		       bool autoneg);
 void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx);
+bool phy_validate_pause(struct phy_device *phydev,
+			struct ethtool_pauseparam *pp);
 
 int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask,
 		       int (*run)(struct phy_device *));
-- 
cgit v1.2.3


From 0789724f86a59fa7078d67dfeb1ee4a15ae3c693 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Thu, 26 Jul 2018 15:59:16 +0200
Subject: firmware: meson_sm: Add serial number sysfs entry

The Amlogic Meson SoC Secure Monitor implements a call to retrieve an unique
SoC ID starting from the GX Family and all new families.

The serial number is simply exposed as a sysfs entry under the firmware
sysfs directory.

Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/firmware/meson/meson_sm.c       | 56 +++++++++++++++++++++++++++++++++
 include/linux/firmware/meson/meson_sm.h |  1 +
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/meson/meson_sm.c b/drivers/firmware/meson/meson_sm.c
index 0ec2ca87318c..29fbc818a573 100644
--- a/drivers/firmware/meson/meson_sm.c
+++ b/drivers/firmware/meson/meson_sm.c
@@ -24,6 +24,7 @@
 #include <linux/printk.h>
 #include <linux/types.h>
 #include <linux/sizes.h>
+ #include <linux/slab.h>
 
 #include <linux/firmware/meson/meson_sm.h>
 
@@ -48,6 +49,7 @@ struct meson_sm_chip gxbb_chip = {
 		CMD(SM_EFUSE_READ,	0x82000030),
 		CMD(SM_EFUSE_WRITE,	0x82000031),
 		CMD(SM_EFUSE_USER_MAX,	0x82000033),
+		CMD(SM_GET_CHIP_ID,	0x82000044),
 		{ /* sentinel */ },
 	},
 };
@@ -214,6 +216,57 @@ int meson_sm_call_write(void *buffer, unsigned int size, unsigned int cmd_index,
 }
 EXPORT_SYMBOL(meson_sm_call_write);
 
+#define SM_CHIP_ID_LENGTH	119
+#define SM_CHIP_ID_OFFSET	4
+#define SM_CHIP_ID_SIZE		12
+
+static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	uint8_t *id_buf;
+	int ret;
+
+	id_buf = kmalloc(SM_CHIP_ID_LENGTH, GFP_KERNEL);
+	if (!id_buf)
+		return -ENOMEM;
+
+	ret = meson_sm_call_read(id_buf, SM_CHIP_ID_LENGTH, SM_GET_CHIP_ID,
+				 0, 0, 0, 0, 0);
+	if (ret < 0) {
+		kfree(id_buf);
+		return ret;
+	}
+
+	ret = sprintf(buf, "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n",
+			id_buf[SM_CHIP_ID_OFFSET + 0],
+			id_buf[SM_CHIP_ID_OFFSET + 1],
+			id_buf[SM_CHIP_ID_OFFSET + 2],
+			id_buf[SM_CHIP_ID_OFFSET + 3],
+			id_buf[SM_CHIP_ID_OFFSET + 4],
+			id_buf[SM_CHIP_ID_OFFSET + 5],
+			id_buf[SM_CHIP_ID_OFFSET + 6],
+			id_buf[SM_CHIP_ID_OFFSET + 7],
+			id_buf[SM_CHIP_ID_OFFSET + 8],
+			id_buf[SM_CHIP_ID_OFFSET + 9],
+			id_buf[SM_CHIP_ID_OFFSET + 10],
+			id_buf[SM_CHIP_ID_OFFSET + 11]);
+
+	kfree(id_buf);
+
+	return ret;
+}
+
+static DEVICE_ATTR_RO(serial);
+
+static struct attribute *meson_sm_sysfs_attributes[] = {
+	&dev_attr_serial.attr,
+	NULL,
+};
+
+static const struct attribute_group meson_sm_sysfs_attr_group = {
+	.attrs = meson_sm_sysfs_attributes,
+};
+
 static const struct of_device_id meson_sm_ids[] = {
 	{ .compatible = "amlogic,meson-gxbb-sm", .data = &gxbb_chip },
 	{ /* sentinel */ },
@@ -242,6 +295,9 @@ static int __init meson_sm_probe(struct platform_device *pdev)
 	fw.chip = chip;
 	pr_info("secure-monitor enabled\n");
 
+	if (sysfs_create_group(&pdev->dev.kobj, &meson_sm_sysfs_attr_group))
+		goto out_in_base;
+
 	return 0;
 
 out_in_base:
diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h
index 37a5eaea69dd..f98c20dd266e 100644
--- a/include/linux/firmware/meson/meson_sm.h
+++ b/include/linux/firmware/meson/meson_sm.h
@@ -17,6 +17,7 @@ enum {
 	SM_EFUSE_READ,
 	SM_EFUSE_WRITE,
 	SM_EFUSE_USER_MAX,
+	SM_GET_CHIP_ID,
 };
 
 struct meson_sm_firmware;
-- 
cgit v1.2.3


From d4983983d98710e4927fdb8de8e987c303b3fba3 Mon Sep 17 00:00:00 2001
From: Maxime Jourdan <mjourdan@baylibre.com>
Date: Thu, 23 Aug 2018 13:49:53 +0200
Subject: soc: amlogic: add meson-canvas driver

Amlogic SoCs have a repository of 256 canvas which they use to
describe pixel buffers.

They contain metadata like width, height, block mode, endianness [..]

Many IPs within those SoCs like vdec/vpu rely on those canvas to read/write
pixels.

Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
Tested-by: Neil Armstrong <narmstrong@baylibre.com>
Signed-off-by: Maxime Jourdan <mjourdan@baylibre.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
---
 drivers/soc/amlogic/Kconfig              |   7 ++
 drivers/soc/amlogic/Makefile             |   1 +
 drivers/soc/amlogic/meson-canvas.c       | 185 +++++++++++++++++++++++++++++++
 include/linux/soc/amlogic/meson-canvas.h |  65 +++++++++++
 4 files changed, 258 insertions(+)
 create mode 100644 drivers/soc/amlogic/meson-canvas.c
 create mode 100644 include/linux/soc/amlogic/meson-canvas.h

(limited to 'include/linux')

diff --git a/drivers/soc/amlogic/Kconfig b/drivers/soc/amlogic/Kconfig
index b04f6e4aedbc..2f282b472912 100644
--- a/drivers/soc/amlogic/Kconfig
+++ b/drivers/soc/amlogic/Kconfig
@@ -1,5 +1,12 @@
 menu "Amlogic SoC drivers"
 
+config MESON_CANVAS
+	tristate "Amlogic Meson Canvas driver"
+	depends on ARCH_MESON || COMPILE_TEST
+	default n
+	help
+	  Say yes to support the canvas IP for Amlogic SoCs.
+
 config MESON_GX_SOCINFO
 	bool "Amlogic Meson GX SoC Information driver"
 	depends on ARCH_MESON || COMPILE_TEST
diff --git a/drivers/soc/amlogic/Makefile b/drivers/soc/amlogic/Makefile
index 8fa321893928..0ab16d35ac36 100644
--- a/drivers/soc/amlogic/Makefile
+++ b/drivers/soc/amlogic/Makefile
@@ -1,3 +1,4 @@
+obj-$(CONFIG_MESON_CANVAS) += meson-canvas.o
 obj-$(CONFIG_MESON_GX_SOCINFO) += meson-gx-socinfo.o
 obj-$(CONFIG_MESON_GX_PM_DOMAINS) += meson-gx-pwrc-vpu.o
 obj-$(CONFIG_MESON_MX_SOCINFO) += meson-mx-socinfo.o
diff --git a/drivers/soc/amlogic/meson-canvas.c b/drivers/soc/amlogic/meson-canvas.c
new file mode 100644
index 000000000000..fce33ca76bb6
--- /dev/null
+++ b/drivers/soc/amlogic/meson-canvas.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 BayLibre, SAS
+ * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
+ * Copyright (C) 2014 Endless Mobile
+ */
+
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/soc/amlogic/meson-canvas.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/io.h>
+
+#define NUM_CANVAS 256
+
+/* DMC Registers */
+#define DMC_CAV_LUT_DATAL	0x00
+	#define CANVAS_WIDTH_LBIT	29
+	#define CANVAS_WIDTH_LWID	3
+#define DMC_CAV_LUT_DATAH	0x04
+	#define CANVAS_WIDTH_HBIT	0
+	#define CANVAS_HEIGHT_BIT	9
+	#define CANVAS_WRAP_BIT		22
+	#define CANVAS_BLKMODE_BIT	24
+	#define CANVAS_ENDIAN_BIT	26
+#define DMC_CAV_LUT_ADDR	0x08
+	#define CANVAS_LUT_WR_EN	BIT(9)
+	#define CANVAS_LUT_RD_EN	BIT(8)
+
+struct meson_canvas {
+	struct device *dev;
+	void __iomem *reg_base;
+	spinlock_t lock; /* canvas device lock */
+	u8 used[NUM_CANVAS];
+};
+
+static void canvas_write(struct meson_canvas *canvas, u32 reg, u32 val)
+{
+	writel_relaxed(val, canvas->reg_base + reg);
+}
+
+static u32 canvas_read(struct meson_canvas *canvas, u32 reg)
+{
+	return readl_relaxed(canvas->reg_base + reg);
+}
+
+struct meson_canvas *meson_canvas_get(struct device *dev)
+{
+	struct device_node *canvas_node;
+	struct platform_device *canvas_pdev;
+
+	canvas_node = of_parse_phandle(dev->of_node, "amlogic,canvas", 0);
+	if (!canvas_node)
+		return ERR_PTR(-ENODEV);
+
+	canvas_pdev = of_find_device_by_node(canvas_node);
+	if (!canvas_pdev)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	return dev_get_drvdata(&canvas_pdev->dev);
+}
+EXPORT_SYMBOL_GPL(meson_canvas_get);
+
+int meson_canvas_config(struct meson_canvas *canvas, u8 canvas_index,
+			u32 addr, u32 stride, u32 height,
+			unsigned int wrap,
+			unsigned int blkmode,
+			unsigned int endian)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&canvas->lock, flags);
+	if (!canvas->used[canvas_index]) {
+		dev_err(canvas->dev,
+			"Trying to setup non allocated canvas %u\n",
+			canvas_index);
+		spin_unlock_irqrestore(&canvas->lock, flags);
+		return -EINVAL;
+	}
+
+	canvas_write(canvas, DMC_CAV_LUT_DATAL,
+		     ((addr + 7) >> 3) |
+		     (((stride + 7) >> 3) << CANVAS_WIDTH_LBIT));
+
+	canvas_write(canvas, DMC_CAV_LUT_DATAH,
+		     ((((stride + 7) >> 3) >> CANVAS_WIDTH_LWID) <<
+						CANVAS_WIDTH_HBIT) |
+		     (height << CANVAS_HEIGHT_BIT) |
+		     (wrap << CANVAS_WRAP_BIT) |
+		     (blkmode << CANVAS_BLKMODE_BIT) |
+		     (endian << CANVAS_ENDIAN_BIT));
+
+	canvas_write(canvas, DMC_CAV_LUT_ADDR,
+		     CANVAS_LUT_WR_EN | canvas_index);
+
+	/* Force a read-back to make sure everything is flushed. */
+	canvas_read(canvas, DMC_CAV_LUT_DATAH);
+	spin_unlock_irqrestore(&canvas->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(meson_canvas_config);
+
+int meson_canvas_alloc(struct meson_canvas *canvas, u8 *canvas_index)
+{
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&canvas->lock, flags);
+	for (i = 0; i < NUM_CANVAS; ++i) {
+		if (!canvas->used[i]) {
+			canvas->used[i] = 1;
+			spin_unlock_irqrestore(&canvas->lock, flags);
+			*canvas_index = i;
+			return 0;
+		}
+	}
+	spin_unlock_irqrestore(&canvas->lock, flags);
+
+	dev_err(canvas->dev, "No more canvas available\n");
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(meson_canvas_alloc);
+
+int meson_canvas_free(struct meson_canvas *canvas, u8 canvas_index)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&canvas->lock, flags);
+	if (!canvas->used[canvas_index]) {
+		dev_err(canvas->dev,
+			"Trying to free unused canvas %u\n", canvas_index);
+		spin_unlock_irqrestore(&canvas->lock, flags);
+		return -EINVAL;
+	}
+	canvas->used[canvas_index] = 0;
+	spin_unlock_irqrestore(&canvas->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(meson_canvas_free);
+
+static int meson_canvas_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	struct meson_canvas *canvas;
+	struct device *dev = &pdev->dev;
+
+	canvas = devm_kzalloc(dev, sizeof(*canvas), GFP_KERNEL);
+	if (!canvas)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	canvas->reg_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(canvas->reg_base))
+		return PTR_ERR(canvas->reg_base);
+
+	canvas->dev = dev;
+	spin_lock_init(&canvas->lock);
+	dev_set_drvdata(dev, canvas);
+
+	return 0;
+}
+
+static const struct of_device_id canvas_dt_match[] = {
+	{ .compatible = "amlogic,canvas" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, canvas_dt_match);
+
+static struct platform_driver meson_canvas_driver = {
+	.probe = meson_canvas_probe,
+	.driver = {
+		.name = "amlogic-canvas",
+		.of_match_table = canvas_dt_match,
+	},
+};
+module_platform_driver(meson_canvas_driver);
+
+MODULE_DESCRIPTION("Amlogic Canvas driver");
+MODULE_AUTHOR("Maxime Jourdan <mjourdan@baylibre.com>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/soc/amlogic/meson-canvas.h b/include/linux/soc/amlogic/meson-canvas.h
new file mode 100644
index 000000000000..b4dde2fbeb3f
--- /dev/null
+++ b/include/linux/soc/amlogic/meson-canvas.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2018 BayLibre, SAS
+ */
+#ifndef __SOC_MESON_CANVAS_H
+#define __SOC_MESON_CANVAS_H
+
+#include <linux/kernel.h>
+
+#define MESON_CANVAS_WRAP_NONE	0x00
+#define MESON_CANVAS_WRAP_X	0x01
+#define MESON_CANVAS_WRAP_Y	0x02
+
+#define MESON_CANVAS_BLKMODE_LINEAR	0x00
+#define MESON_CANVAS_BLKMODE_32x32	0x01
+#define MESON_CANVAS_BLKMODE_64x64	0x02
+
+#define MESON_CANVAS_ENDIAN_SWAP16	0x1
+#define MESON_CANVAS_ENDIAN_SWAP32	0x3
+#define MESON_CANVAS_ENDIAN_SWAP64	0x7
+#define MESON_CANVAS_ENDIAN_SWAP128	0xf
+
+struct meson_canvas;
+
+/**
+ * meson_canvas_get() - get a canvas provider instance
+ *
+ * @dev: consumer device pointer
+ */
+struct meson_canvas *meson_canvas_get(struct device *dev);
+
+/**
+ * meson_canvas_alloc() - take ownership of a canvas
+ *
+ * @canvas: canvas provider instance retrieved from meson_canvas_get()
+ * @canvas_index: will be filled with the canvas ID
+ */
+int meson_canvas_alloc(struct meson_canvas *canvas, u8 *canvas_index);
+
+/**
+ * meson_canvas_free() - remove ownership from a canvas
+ *
+ * @canvas: canvas provider instance retrieved from meson_canvas_get()
+ * @canvas_index: canvas ID that was obtained via meson_canvas_alloc()
+ */
+int meson_canvas_free(struct meson_canvas *canvas, u8 canvas_index);
+
+/**
+ * meson_canvas_config() - configure a canvas
+ *
+ * @canvas: canvas provider instance retrieved from meson_canvas_get()
+ * @canvas_index: canvas ID that was obtained via meson_canvas_alloc()
+ * @addr: physical address to the pixel buffer
+ * @stride: width of the buffer
+ * @height: height of the buffer
+ * @wrap: undocumented
+ * @blkmode: block mode (linear, 32x32, 64x64)
+ * @endian: byte swapping (swap16, swap32, swap64, swap128)
+ */
+int meson_canvas_config(struct meson_canvas *canvas, u8 canvas_index,
+			u32 addr, u32 stride, u32 height,
+			unsigned int wrap, unsigned int blkmode,
+			unsigned int endian);
+
+#endif
-- 
cgit v1.2.3


From b9762bebc6332b40c33e03dea03e30fa12d9e3ed Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Wed, 5 Sep 2018 23:50:05 +0200
Subject: gpiolib: Pass bitmaps, not integer arrays, to get/set array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Most users of get/set array functions iterate consecutive bits of data,
usually a single integer, while processing array of results obtained
from, or building an array of values to be passed to those functions.
Save time wasted on those iterations by changing the functions' API to
accept bitmaps.

All current users are updated as well.

More benefits from the change are expected as soon as planned support
for accepting/passing those bitmaps directly from/to respective GPIO
chip callbacks if applicable is implemented.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
Cc: Sebastien Bourdelin <sebastien.bourdelin@savoirfairelinux.com>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Peter Korsgaard <peter.korsgaard@barco.com>
Cc: Peter Rosin <peda@axentia.se>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rojhalat Ibrahim <imr@rtschenk.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Kishon Vijay Abraham I <kishon@ti.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Michael Hennerich <Michael.Hennerich@analog.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Hartmut Knaack <knaack.h@gmx.de>
Cc: Peter Meerwald-Stadler <pmeerw@pmeerw.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Yegor Yefremov <yegorslists@googlemail.com>
Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-api/gpio/consumer.rst  | 22 ++++----
 drivers/auxdisplay/hd44780.c                | 59 +++++++--------------
 drivers/bus/ts-nbus.c                       | 15 ++----
 drivers/gpio/gpio-max3191x.c                | 10 ++--
 drivers/gpio/gpiolib.c                      | 82 +++++++++++++++--------------
 drivers/gpio/gpiolib.h                      |  4 +-
 drivers/i2c/muxes/i2c-mux-gpio.c            | 13 ++---
 drivers/mmc/core/pwrseq_simple.c            | 13 ++---
 drivers/mux/gpio.c                          | 13 ++---
 drivers/net/phy/mdio-mux-gpio.c             | 11 ++--
 drivers/pcmcia/soc_common.c                 |  7 +--
 drivers/phy/motorola/phy-mapphone-mdm6600.c | 15 +++---
 drivers/staging/iio/adc/ad7606.c            |  9 ++--
 drivers/tty/serial/serial_mctrl_gpio.c      |  7 +--
 include/linux/gpio/consumer.h               | 34 ++++++------
 15 files changed, 137 insertions(+), 177 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index aa03f389d41d..ed68042ddccf 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -323,29 +323,29 @@ The following functions get or set the values of an array of GPIOs::
 
 	int gpiod_get_array_value(unsigned int array_size,
 				  struct gpio_desc **desc_array,
-				  int *value_array);
+				  unsigned long *value_bitmap);
 	int gpiod_get_raw_array_value(unsigned int array_size,
 				      struct gpio_desc **desc_array,
-				      int *value_array);
+				      unsigned long *value_bitmap);
 	int gpiod_get_array_value_cansleep(unsigned int array_size,
 					   struct gpio_desc **desc_array,
-					   int *value_array);
+					   unsigned long *value_bitmap);
 	int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 					   struct gpio_desc **desc_array,
-					   int *value_array);
+					   unsigned long *value_bitmap);
 
 	void gpiod_set_array_value(unsigned int array_size,
 				   struct gpio_desc **desc_array,
-				   int *value_array)
+				   unsigned long *value_bitmap)
 	void gpiod_set_raw_array_value(unsigned int array_size,
 				       struct gpio_desc **desc_array,
-				       int *value_array)
+				       unsigned long *value_bitmap)
 	void gpiod_set_array_value_cansleep(unsigned int array_size,
 					    struct gpio_desc **desc_array,
-					    int *value_array)
+					    unsigned long *value_bitmap)
 	void gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 						struct gpio_desc **desc_array,
-						int *value_array)
+						unsigned long *value_bitmap)
 
 The array can be an arbitrary set of GPIOs. The functions will try to access
 GPIOs belonging to the same bank or chip simultaneously if supported by the
@@ -356,8 +356,8 @@ accessed sequentially.
 The functions take three arguments:
 	* array_size	- the number of array elements
 	* desc_array	- an array of GPIO descriptors
-	* value_array	- an array to store the GPIOs' values (get) or
-			  an array of values to assign to the GPIOs (set)
+	* value_bitmap	- a bitmap to store the GPIOs' values (get) or
+			  a bitmap of values to assign to the GPIOs (set)
 
 The descriptor array can be obtained using the gpiod_get_array() function
 or one of its variants. If the group of descriptors returned by that function
@@ -366,7 +366,7 @@ the struct gpio_descs returned by gpiod_get_array()::
 
 	struct gpio_descs *my_gpio_descs = gpiod_get_array(...);
 	gpiod_set_array_value(my_gpio_descs->ndescs, my_gpio_descs->desc,
-			      my_gpio_values);
+			      my_gpio_value_bitmap);
 
 It is also possible to access a completely arbitrary array of descriptors. The
 descriptors may be obtained using any combination of gpiod_get() and
diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
index f1a42f0f1ded..e9a893384362 100644
--- a/drivers/auxdisplay/hd44780.c
+++ b/drivers/auxdisplay/hd44780.c
@@ -62,17 +62,12 @@ static void hd44780_strobe_gpio(struct hd44780 *hd)
 /* write to an LCD panel register in 8 bit GPIO mode */
 static void hd44780_write_gpio8(struct hd44780 *hd, u8 val, unsigned int rs)
 {
-	int values[10];	/* for DATA[0-7], RS, RW */
-	unsigned int i, n;
-
-	for (i = 0; i < 8; i++)
-		values[PIN_DATA0 + i] = !!(val & BIT(i));
-	values[PIN_CTRL_RS] = rs;
-	n = 9;
-	if (hd->pins[PIN_CTRL_RW]) {
-		values[PIN_CTRL_RW] = 0;
-		n++;
-	}
+	DECLARE_BITMAP(values, 10); /* for DATA[0-7], RS, RW */
+	unsigned int n;
+
+	values[0] = val;
+	__assign_bit(8, values, rs);
+	n = hd->pins[PIN_CTRL_RW] ? 10 : 9;
 
 	/* Present the data to the port */
 	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA0], values);
@@ -83,32 +78,25 @@ static void hd44780_write_gpio8(struct hd44780 *hd, u8 val, unsigned int rs)
 /* write to an LCD panel register in 4 bit GPIO mode */
 static void hd44780_write_gpio4(struct hd44780 *hd, u8 val, unsigned int rs)
 {
-	int values[10];	/* for DATA[0-7], RS, RW, but DATA[0-3] is unused */
-	unsigned int i, n;
+	DECLARE_BITMAP(values, 6); /* for DATA[4-7], RS, RW */
+	unsigned int n;
 
 	/* High nibble + RS, RW */
-	for (i = 4; i < 8; i++)
-		values[PIN_DATA0 + i] = !!(val & BIT(i));
-	values[PIN_CTRL_RS] = rs;
-	n = 5;
-	if (hd->pins[PIN_CTRL_RW]) {
-		values[PIN_CTRL_RW] = 0;
-		n++;
-	}
+	values[0] = val >> 4;
+	__assign_bit(4, values, rs);
+	n = hd->pins[PIN_CTRL_RW] ? 6 : 5;
 
 	/* Present the data to the port */
-	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4],
-				       &values[PIN_DATA4]);
+	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], values);
 
 	hd44780_strobe_gpio(hd);
 
 	/* Low nibble */
-	for (i = 0; i < 4; i++)
-		values[PIN_DATA4 + i] = !!(val & BIT(i));
+	values[0] &= ~0x0fUL;
+	values[0] |= val & 0x0f;
 
 	/* Present the data to the port */
-	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4],
-				       &values[PIN_DATA4]);
+	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], values);
 
 	hd44780_strobe_gpio(hd);
 }
@@ -155,23 +143,16 @@ static void hd44780_write_cmd_gpio4(struct charlcd *lcd, int cmd)
 /* Send 4-bits of a command to the LCD panel in raw 4 bit GPIO mode */
 static void hd44780_write_cmd_raw_gpio4(struct charlcd *lcd, int cmd)
 {
-	int values[10];	/* for DATA[0-7], RS, RW, but DATA[0-3] is unused */
+	DECLARE_BITMAP(values, 6); /* for DATA[4-7], RS, RW */
 	struct hd44780 *hd = lcd->drvdata;
-	unsigned int i, n;
+	unsigned int n;
 
 	/* Command nibble + RS, RW */
-	for (i = 0; i < 4; i++)
-		values[PIN_DATA4 + i] = !!(cmd & BIT(i));
-	values[PIN_CTRL_RS] = 0;
-	n = 5;
-	if (hd->pins[PIN_CTRL_RW]) {
-		values[PIN_CTRL_RW] = 0;
-		n++;
-	}
+	values[0] = cmd & 0x0f;
+	n = hd->pins[PIN_CTRL_RW] ? 6 : 5;
 
 	/* Present the data to the port */
-	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4],
-				       &values[PIN_DATA4]);
+	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], values);
 
 	hd44780_strobe_gpio(hd);
 }
diff --git a/drivers/bus/ts-nbus.c b/drivers/bus/ts-nbus.c
index 073fd9011154..8dde7c77f62c 100644
--- a/drivers/bus/ts-nbus.c
+++ b/drivers/bus/ts-nbus.c
@@ -110,11 +110,9 @@ static void ts_nbus_set_direction(struct ts_nbus *ts_nbus, int direction)
  */
 static void ts_nbus_reset_bus(struct ts_nbus *ts_nbus)
 {
-	int i;
-	int values[8];
+	DECLARE_BITMAP(values, 8);
 
-	for (i = 0; i < 8; i++)
-		values[i] = 0;
+	values[0] = 0;
 
 	gpiod_set_array_value_cansleep(8, ts_nbus->data->desc, values);
 	gpiod_set_value_cansleep(ts_nbus->csn, 0);
@@ -157,14 +155,9 @@ static int ts_nbus_read_byte(struct ts_nbus *ts_nbus, u8 *val)
 static void ts_nbus_write_byte(struct ts_nbus *ts_nbus, u8 byte)
 {
 	struct gpio_descs *gpios = ts_nbus->data;
-	int i;
-	int values[8];
+	DECLARE_BITMAP(values, 8);
 
-	for (i = 0; i < 8; i++)
-		if (byte & BIT(i))
-			values[i] = 1;
-		else
-			values[i] = 0;
+	values[0] = byte;
 
 	gpiod_set_array_value_cansleep(8, gpios->desc, values);
 }
diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index b5b9cb1fda50..bd4a245fc5a0 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -315,14 +315,16 @@ static void gpiod_set_array_single_value_cansleep(unsigned int ndescs,
 						  struct gpio_desc **desc,
 						  int value)
 {
-	int i, *values;
+	unsigned long *values;
 
-	values = kmalloc_array(ndescs, sizeof(*values), GFP_KERNEL);
+	values = bitmap_alloc(ndescs, GFP_KERNEL);
 	if (!values)
 		return;
 
-	for (i = 0; i < ndescs; i++)
-		values[i] = value;
+	if (value)
+		bitmap_fill(values, ndescs);
+	else
+		bitmap_zero(values, ndescs);
 
 	gpiod_set_array_value_cansleep(ndescs, desc, values);
 	kfree(values);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e8f8a1999393..b66b2191c5c5 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -427,7 +427,7 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd,
 	struct linehandle_state *lh = filep->private_data;
 	void __user *ip = (void __user *)arg;
 	struct gpiohandle_data ghd;
-	int vals[GPIOHANDLES_MAX];
+	DECLARE_BITMAP(vals, GPIOHANDLES_MAX);
 	int i;
 
 	if (cmd == GPIOHANDLE_GET_LINE_VALUES_IOCTL) {
@@ -442,7 +442,7 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd,
 
 		memset(&ghd, 0, sizeof(ghd));
 		for (i = 0; i < lh->numdescs; i++)
-			ghd.values[i] = vals[i];
+			ghd.values[i] = test_bit(i, vals);
 
 		if (copy_to_user(ip, &ghd, sizeof(ghd)))
 			return -EFAULT;
@@ -461,7 +461,7 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd,
 
 		/* Clamp all values to [0,1] */
 		for (i = 0; i < lh->numdescs; i++)
-			vals[i] = !!ghd.values[i];
+			__assign_bit(i, vals, ghd.values[i]);
 
 		/* Reuse the array setting function */
 		return gpiod_set_array_value_complex(false,
@@ -2784,7 +2784,7 @@ static int gpio_chip_get_multiple(struct gpio_chip *chip,
 int gpiod_get_array_value_complex(bool raw, bool can_sleep,
 				  unsigned int array_size,
 				  struct gpio_desc **desc_array,
-				  int *value_array)
+				  unsigned long *value_bitmap)
 {
 	int i = 0;
 
@@ -2835,7 +2835,7 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep,
 
 			if (!raw && test_bit(FLAG_ACTIVE_LOW, &desc->flags))
 				value = !value;
-			value_array[j] = value;
+			__assign_bit(j, value_bitmap, value);
 			trace_gpio_value(desc_to_gpio(desc), 1, value);
 		}
 
@@ -2895,9 +2895,9 @@ EXPORT_SYMBOL_GPL(gpiod_get_value);
 
 /**
  * gpiod_get_raw_array_value() - read raw values from an array of GPIOs
- * @array_size: number of elements in the descriptor / value arrays
+ * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be read
- * @value_array: array to store the read values
+ * @value_bitmap: bitmap to store the read values
  *
  * Read the raw values of the GPIOs, i.e. the values of the physical lines
  * without regard for their ACTIVE_LOW status.  Return 0 in case of success,
@@ -2907,20 +2907,21 @@ EXPORT_SYMBOL_GPL(gpiod_get_value);
  * and it will complain if the GPIO chip functions potentially sleep.
  */
 int gpiod_get_raw_array_value(unsigned int array_size,
-			      struct gpio_desc **desc_array, int *value_array)
+			      struct gpio_desc **desc_array,
+			      unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_get_array_value_complex(true, false, array_size,
-					     desc_array, value_array);
+					     desc_array, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value);
 
 /**
  * gpiod_get_array_value() - read values from an array of GPIOs
- * @array_size: number of elements in the descriptor / value arrays
+ * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be read
- * @value_array: array to store the read values
+ * @value_bitmap: bitmap to store the read values
  *
  * Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
  * into account.  Return 0 in case of success, else an error code.
@@ -2929,12 +2930,13 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value);
  * and it will complain if the GPIO chip functions potentially sleep.
  */
 int gpiod_get_array_value(unsigned int array_size,
-			  struct gpio_desc **desc_array, int *value_array)
+			  struct gpio_desc **desc_array,
+			  unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_get_array_value_complex(false, false, array_size,
-					     desc_array, value_array);
+					     desc_array, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_array_value);
 
@@ -3027,7 +3029,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip,
 int gpiod_set_array_value_complex(bool raw, bool can_sleep,
 				   unsigned int array_size,
 				   struct gpio_desc **desc_array,
-				   int *value_array)
+				   unsigned long *value_bitmap)
 {
 	int i = 0;
 
@@ -3056,7 +3058,7 @@ int gpiod_set_array_value_complex(bool raw, bool can_sleep,
 		do {
 			struct gpio_desc *desc = desc_array[i];
 			int hwgpio = gpio_chip_hwgpio(desc);
-			int value = value_array[i];
+			int value = test_bit(i, value_bitmap);
 
 			if (!raw && test_bit(FLAG_ACTIVE_LOW, &desc->flags))
 				value = !value;
@@ -3152,9 +3154,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_value);
 
 /**
  * gpiod_set_raw_array_value() - assign values to an array of GPIOs
- * @array_size: number of elements in the descriptor / value arrays
+ * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be assigned
- * @value_array: array of values to assign
+ * @value_bitmap: bitmap of values to assign
  *
  * Set the raw values of the GPIOs, i.e. the values of the physical lines
  * without regard for their ACTIVE_LOW status.
@@ -3163,20 +3165,21 @@ EXPORT_SYMBOL_GPL(gpiod_set_value);
  * complain if the GPIO chip functions potentially sleep.
  */
 int gpiod_set_raw_array_value(unsigned int array_size,
-			 struct gpio_desc **desc_array, int *value_array)
+			 struct gpio_desc **desc_array,
+			 unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_set_array_value_complex(true, false, array_size,
-					desc_array, value_array);
+					desc_array, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);
 
 /**
  * gpiod_set_array_value() - assign values to an array of GPIOs
- * @array_size: number of elements in the descriptor / value arrays
+ * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be assigned
- * @value_array: array of values to assign
+ * @value_bitmap: bitmap of values to assign
  *
  * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
  * into account.
@@ -3185,12 +3188,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);
  * complain if the GPIO chip functions potentially sleep.
  */
 void gpiod_set_array_value(unsigned int array_size,
-			   struct gpio_desc **desc_array, int *value_array)
+			   struct gpio_desc **desc_array,
+			   unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return;
 	gpiod_set_array_value_complex(false, false, array_size, desc_array,
-				      value_array);
+				      value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_array_value);
 
@@ -3410,9 +3414,9 @@ EXPORT_SYMBOL_GPL(gpiod_get_value_cansleep);
 
 /**
  * gpiod_get_raw_array_value_cansleep() - read raw values from an array of GPIOs
- * @array_size: number of elements in the descriptor / value arrays
+ * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be read
- * @value_array: array to store the read values
+ * @value_bitmap: bitmap to store the read values
  *
  * Read the raw values of the GPIOs, i.e. the values of the physical lines
  * without regard for their ACTIVE_LOW status.  Return 0 in case of success,
@@ -3422,21 +3426,21 @@ EXPORT_SYMBOL_GPL(gpiod_get_value_cansleep);
  */
 int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 				       struct gpio_desc **desc_array,
-				       int *value_array)
+				       unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_get_array_value_complex(true, true, array_size,
-					     desc_array, value_array);
+					     desc_array, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value_cansleep);
 
 /**
  * gpiod_get_array_value_cansleep() - read values from an array of GPIOs
- * @array_size: number of elements in the descriptor / value arrays
+ * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be read
- * @value_array: array to store the read values
+ * @value_bitmap: bitmap to store the read values
  *
  * Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
  * into account.  Return 0 in case of success, else an error code.
@@ -3445,13 +3449,13 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value_cansleep);
  */
 int gpiod_get_array_value_cansleep(unsigned int array_size,
 				   struct gpio_desc **desc_array,
-				   int *value_array)
+				   unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_get_array_value_complex(false, true, array_size,
-					     desc_array, value_array);
+					     desc_array, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_array_value_cansleep);
 
@@ -3493,9 +3497,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
 
 /**
  * gpiod_set_raw_array_value_cansleep() - assign values to an array of GPIOs
- * @array_size: number of elements in the descriptor / value arrays
+ * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be assigned
- * @value_array: array of values to assign
+ * @value_bitmap: bitmap of values to assign
  *
  * Set the raw values of the GPIOs, i.e. the values of the physical lines
  * without regard for their ACTIVE_LOW status.
@@ -3504,13 +3508,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
  */
 int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 					struct gpio_desc **desc_array,
-					int *value_array)
+					unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_set_array_value_complex(true, true, array_size, desc_array,
-				      value_array);
+				      value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep);
 
@@ -3533,9 +3537,9 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n)
 
 /**
  * gpiod_set_array_value_cansleep() - assign values to an array of GPIOs
- * @array_size: number of elements in the descriptor / value arrays
+ * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be assigned
- * @value_array: array of values to assign
+ * @value_bitmap: bitmap of values to assign
  *
  * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
  * into account.
@@ -3544,13 +3548,13 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n)
  */
 void gpiod_set_array_value_cansleep(unsigned int array_size,
 				    struct gpio_desc **desc_array,
-				    int *value_array)
+				    unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
 		return;
 	gpiod_set_array_value_complex(false, true, array_size, desc_array,
-				      value_array);
+				      value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep);
 
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index a7e49fef73d4..11e83d2eef89 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -187,11 +187,11 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum);
 int gpiod_get_array_value_complex(bool raw, bool can_sleep,
 				  unsigned int array_size,
 				  struct gpio_desc **desc_array,
-				  int *value_array);
+				  unsigned long *value_bitmap);
 int gpiod_set_array_value_complex(bool raw, bool can_sleep,
 				   unsigned int array_size,
 				   struct gpio_desc **desc_array,
-				   int *value_array);
+				   unsigned long *value_bitmap);
 
 /* This is just passed between gpiolib and devres */
 struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c
index 401308e3d036..d835857bb094 100644
--- a/drivers/i2c/muxes/i2c-mux-gpio.c
+++ b/drivers/i2c/muxes/i2c-mux-gpio.c
@@ -22,18 +22,15 @@ struct gpiomux {
 	struct i2c_mux_gpio_platform_data data;
 	unsigned gpio_base;
 	struct gpio_desc **gpios;
-	int *values;
 };
 
 static void i2c_mux_gpio_set(const struct gpiomux *mux, unsigned val)
 {
-	int i;
+	DECLARE_BITMAP(values, BITS_PER_TYPE(val));
 
-	for (i = 0; i < mux->data.n_gpios; i++)
-		mux->values[i] = (val >> i) & 1;
+	values[0] = val;
 
-	gpiod_set_array_value_cansleep(mux->data.n_gpios,
-				       mux->gpios, mux->values);
+	gpiod_set_array_value_cansleep(mux->data.n_gpios, mux->gpios, values);
 }
 
 static int i2c_mux_gpio_select(struct i2c_mux_core *muxc, u32 chan)
@@ -182,15 +179,13 @@ static int i2c_mux_gpio_probe(struct platform_device *pdev)
 		return -EPROBE_DEFER;
 
 	muxc = i2c_mux_alloc(parent, &pdev->dev, mux->data.n_values,
-			     mux->data.n_gpios * sizeof(*mux->gpios) +
-			     mux->data.n_gpios * sizeof(*mux->values), 0,
+			     mux->data.n_gpios * sizeof(*mux->gpios), 0,
 			     i2c_mux_gpio_select, NULL);
 	if (!muxc) {
 		ret = -ENOMEM;
 		goto alloc_failed;
 	}
 	mux->gpios = muxc->priv;
-	mux->values = (int *)(mux->gpios + mux->data.n_gpios);
 	muxc->priv = mux;
 
 	platform_set_drvdata(pdev, muxc);
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index a8b9fee4d62a..902476ef9a0e 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -40,18 +40,13 @@ static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
 	struct gpio_descs *reset_gpios = pwrseq->reset_gpios;
 
 	if (!IS_ERR(reset_gpios)) {
-		int i, *values;
+		DECLARE_BITMAP(values, BITS_PER_TYPE(value));
 		int nvalues = reset_gpios->ndescs;
 
-		values = kmalloc_array(nvalues, sizeof(int), GFP_KERNEL);
-		if (!values)
-			return;
+		values[0] = value;
 
-		for (i = 0; i < nvalues; i++)
-			values[i] = value;
-
-		gpiod_set_array_value_cansleep(nvalues, reset_gpios->desc, values);
-		kfree(values);
+		gpiod_set_array_value_cansleep(nvalues, reset_gpios->desc,
+					       values);
 	}
 }
 
diff --git a/drivers/mux/gpio.c b/drivers/mux/gpio.c
index 6fdd9316db8b..46c44532fbd5 100644
--- a/drivers/mux/gpio.c
+++ b/drivers/mux/gpio.c
@@ -17,20 +17,17 @@
 
 struct mux_gpio {
 	struct gpio_descs *gpios;
-	int *val;
 };
 
 static int mux_gpio_set(struct mux_control *mux, int state)
 {
 	struct mux_gpio *mux_gpio = mux_chip_priv(mux->chip);
-	int i;
+	DECLARE_BITMAP(values, BITS_PER_TYPE(state));
 
-	for (i = 0; i < mux_gpio->gpios->ndescs; i++)
-		mux_gpio->val[i] = (state >> i) & 1;
+	values[0] = state;
 
 	gpiod_set_array_value_cansleep(mux_gpio->gpios->ndescs,
-				       mux_gpio->gpios->desc,
-				       mux_gpio->val);
+				       mux_gpio->gpios->desc, values);
 
 	return 0;
 }
@@ -58,13 +55,11 @@ static int mux_gpio_probe(struct platform_device *pdev)
 	if (pins < 0)
 		return pins;
 
-	mux_chip = devm_mux_chip_alloc(dev, 1, sizeof(*mux_gpio) +
-				       pins * sizeof(*mux_gpio->val));
+	mux_chip = devm_mux_chip_alloc(dev, 1, sizeof(*mux_gpio));
 	if (IS_ERR(mux_chip))
 		return PTR_ERR(mux_chip);
 
 	mux_gpio = mux_chip_priv(mux_chip);
-	mux_gpio->val = (int *)(mux_gpio + 1);
 	mux_chip->ops = &mux_gpio_ops;
 
 	mux_gpio->gpios = devm_gpiod_get_array(dev, "mux", GPIOD_OUT_LOW);
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index bc90764a8b8d..e25ccfc8c070 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -20,23 +20,21 @@
 struct mdio_mux_gpio_state {
 	struct gpio_descs *gpios;
 	void *mux_handle;
-	int values[];
 };
 
 static int mdio_mux_gpio_switch_fn(int current_child, int desired_child,
 				   void *data)
 {
 	struct mdio_mux_gpio_state *s = data;
-	unsigned int n;
+	DECLARE_BITMAP(values, BITS_PER_TYPE(desired_child));
 
 	if (current_child == desired_child)
 		return 0;
 
-	for (n = 0; n < s->gpios->ndescs; n++)
-		s->values[n] = (desired_child >> n) & 1;
+	values[0] = desired_child;
 
 	gpiod_set_array_value_cansleep(s->gpios->ndescs, s->gpios->desc,
-				       s->values);
+				       values);
 
 	return 0;
 }
@@ -51,8 +49,7 @@ static int mdio_mux_gpio_probe(struct platform_device *pdev)
 	if (IS_ERR(gpios))
 		return PTR_ERR(gpios);
 
-	s = devm_kzalloc(&pdev->dev, struct_size(s, values, gpios->ndescs),
-			 GFP_KERNEL);
+	s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
 	if (!s) {
 		gpiod_put_array(gpios);
 		return -ENOMEM;
diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c
index c5f2344c189b..ac033d555700 100644
--- a/drivers/pcmcia/soc_common.c
+++ b/drivers/pcmcia/soc_common.c
@@ -351,15 +351,16 @@ static int soc_common_pcmcia_config_skt(
 
 	if (ret == 0) {
 		struct gpio_desc *descs[2];
-		int values[2], n = 0;
+		DECLARE_BITMAP(values, 2);
+		int n = 0;
 
 		if (skt->gpio_reset) {
 			descs[n] = skt->gpio_reset;
-			values[n++] = !!(state->flags & SS_RESET);
+			__assign_bit(n++, values, state->flags & SS_RESET);
 		}
 		if (skt->gpio_bus_enable) {
 			descs[n] = skt->gpio_bus_enable;
-			values[n++] = !!(state->flags & SS_OUTPUT_ENA);
+			__assign_bit(n++, values, state->flags & SS_OUTPUT_ENA);
 		}
 
 		if (n)
diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c
index 0075fb0bef8c..9162b61ddb95 100644
--- a/drivers/phy/motorola/phy-mapphone-mdm6600.c
+++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c
@@ -157,12 +157,9 @@ static const struct phy_ops gpio_usb_ops = {
  */
 static void phy_mdm6600_cmd(struct phy_mdm6600 *ddata, int val)
 {
-	int values[PHY_MDM6600_NR_CMD_LINES];
-	int i;
+	DECLARE_BITMAP(values, PHY_MDM6600_NR_CMD_LINES);
 
-	val &= (1 << PHY_MDM6600_NR_CMD_LINES) - 1;
-	for (i = 0; i < PHY_MDM6600_NR_CMD_LINES; i++)
-		values[i] = (val & BIT(i)) >> i;
+	values[0] = val;
 
 	gpiod_set_array_value_cansleep(PHY_MDM6600_NR_CMD_LINES,
 				       ddata->cmd_gpios->desc, values);
@@ -176,7 +173,7 @@ static void phy_mdm6600_status(struct work_struct *work)
 {
 	struct phy_mdm6600 *ddata;
 	struct device *dev;
-	int values[PHY_MDM6600_NR_STATUS_LINES];
+	DECLARE_BITMAP(values, PHY_MDM6600_NR_STATUS_LINES);
 	int error, i, val = 0;
 
 	ddata = container_of(work, struct phy_mdm6600, status_work.work);
@@ -189,11 +186,11 @@ static void phy_mdm6600_status(struct work_struct *work)
 		return;
 
 	for (i = 0; i < PHY_MDM6600_NR_STATUS_LINES; i++) {
-		val |= values[i] << i;
+		val |= test_bit(i, values) << i;
 		dev_dbg(ddata->dev, "XXX %s: i: %i values[i]: %i val: %i\n",
-			__func__, i, values[i], val);
+			__func__, i, test_bit(i, values), val);
 	}
-	ddata->status = val;
+	ddata->status = values[0];
 
 	dev_info(dev, "modem status: %i %s\n",
 		 ddata->status,
diff --git a/drivers/staging/iio/adc/ad7606.c b/drivers/staging/iio/adc/ad7606.c
index 25b9fcd5e3a4..9c1d77d48700 100644
--- a/drivers/staging/iio/adc/ad7606.c
+++ b/drivers/staging/iio/adc/ad7606.c
@@ -202,7 +202,7 @@ static int ad7606_write_raw(struct iio_dev *indio_dev,
 			    long mask)
 {
 	struct ad7606_state *st = iio_priv(indio_dev);
-	int values[3];
+	DECLARE_BITMAP(values, 3);
 	int ret, i;
 
 	switch (mask) {
@@ -227,13 +227,10 @@ static int ad7606_write_raw(struct iio_dev *indio_dev,
 		if (ret < 0)
 			return ret;
 
-		values[0] = (ret >> 0) & 1;
-		values[1] = (ret >> 1) & 1;
-		values[2] = (ret >> 2) & 1;
+		values[0] = ret;
 
 		mutex_lock(&st->lock);
-		gpiod_set_array_value(ARRAY_SIZE(values), st->gpio_os->desc,
-				      values);
+		gpiod_set_array_value(3, st->gpio_os->desc, values);
 		st->oversampling = val;
 		mutex_unlock(&st->lock);
 
diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c
index 1c06325beaca..7d9d2c7b6c39 100644
--- a/drivers/tty/serial/serial_mctrl_gpio.c
+++ b/drivers/tty/serial/serial_mctrl_gpio.c
@@ -40,7 +40,7 @@ void mctrl_gpio_set(struct mctrl_gpios *gpios, unsigned int mctrl)
 {
 	enum mctrl_gpio_idx i;
 	struct gpio_desc *desc_array[UART_GPIO_MAX];
-	int value_array[UART_GPIO_MAX];
+	DECLARE_BITMAP(values, UART_GPIO_MAX);
 	unsigned int count = 0;
 
 	if (gpios == NULL)
@@ -49,10 +49,11 @@ void mctrl_gpio_set(struct mctrl_gpios *gpios, unsigned int mctrl)
 	for (i = 0; i < UART_GPIO_MAX; i++)
 		if (gpios->gpio[i] && mctrl_gpios_desc[i].dir_out) {
 			desc_array[count] = gpios->gpio[i];
-			value_array[count] = !!(mctrl & mctrl_gpios_desc[i].mctrl);
+			__assign_bit(count, values,
+				     mctrl & mctrl_gpios_desc[i].mctrl);
 			count++;
 		}
-	gpiod_set_array_value(count, desc_array, value_array);
+	gpiod_set_array_value(count, desc_array, values);
 }
 EXPORT_SYMBOL_GPL(mctrl_gpio_set);
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 21ddbe440030..2b0389f0bec4 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -104,36 +104,38 @@ int gpiod_direction_output_raw(struct gpio_desc *desc, int value);
 /* Value get/set from non-sleeping context */
 int gpiod_get_value(const struct gpio_desc *desc);
 int gpiod_get_array_value(unsigned int array_size,
-			  struct gpio_desc **desc_array, int *value_array);
+			  struct gpio_desc **desc_array,
+			  unsigned long *value_bitmap);
 void gpiod_set_value(struct gpio_desc *desc, int value);
 void gpiod_set_array_value(unsigned int array_size,
-			   struct gpio_desc **desc_array, int *value_array);
+			   struct gpio_desc **desc_array,
+			   unsigned long *value_bitmap);
 int gpiod_get_raw_value(const struct gpio_desc *desc);
 int gpiod_get_raw_array_value(unsigned int array_size,
 			      struct gpio_desc **desc_array,
-			      int *value_array);
+			      unsigned long *value_bitmap);
 void gpiod_set_raw_value(struct gpio_desc *desc, int value);
 int gpiod_set_raw_array_value(unsigned int array_size,
 			       struct gpio_desc **desc_array,
-			       int *value_array);
+			       unsigned long *value_bitmap);
 
 /* Value get/set from sleeping context */
 int gpiod_get_value_cansleep(const struct gpio_desc *desc);
 int gpiod_get_array_value_cansleep(unsigned int array_size,
 				   struct gpio_desc **desc_array,
-				   int *value_array);
+				   unsigned long *value_bitmap);
 void gpiod_set_value_cansleep(struct gpio_desc *desc, int value);
 void gpiod_set_array_value_cansleep(unsigned int array_size,
 				    struct gpio_desc **desc_array,
-				    int *value_array);
+				    unsigned long *value_bitmap);
 int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc);
 int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 				       struct gpio_desc **desc_array,
-				       int *value_array);
+				       unsigned long *value_bitmap);
 void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value);
 int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 					struct gpio_desc **desc_array,
-					int *value_array);
+					unsigned long *value_bitmap);
 
 int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
 int gpiod_set_transitory(struct gpio_desc *desc, bool transitory);
@@ -330,7 +332,7 @@ static inline int gpiod_get_value(const struct gpio_desc *desc)
 }
 static inline int gpiod_get_array_value(unsigned int array_size,
 					struct gpio_desc **desc_array,
-					int *value_array)
+					unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -343,7 +345,7 @@ static inline void gpiod_set_value(struct gpio_desc *desc, int value)
 }
 static inline void gpiod_set_array_value(unsigned int array_size,
 					 struct gpio_desc **desc_array,
-					 int *value_array)
+					 unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -356,7 +358,7 @@ static inline int gpiod_get_raw_value(const struct gpio_desc *desc)
 }
 static inline int gpiod_get_raw_array_value(unsigned int array_size,
 					    struct gpio_desc **desc_array,
-					    int *value_array)
+					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -369,7 +371,7 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 }
 static inline int gpiod_set_raw_array_value(unsigned int array_size,
 					     struct gpio_desc **desc_array,
-					     int *value_array)
+					     unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -384,7 +386,7 @@ static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc)
 }
 static inline int gpiod_get_array_value_cansleep(unsigned int array_size,
 				     struct gpio_desc **desc_array,
-				     int *value_array)
+				     unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -397,7 +399,7 @@ static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 }
 static inline void gpiod_set_array_value_cansleep(unsigned int array_size,
 					    struct gpio_desc **desc_array,
-					    int *value_array)
+					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -410,7 +412,7 @@ static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
 }
 static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 					       struct gpio_desc **desc_array,
-					       int *value_array)
+					       unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
@@ -424,7 +426,7 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc,
 }
 static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 						struct gpio_desc **desc_array,
-						int *value_array)
+						unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
-- 
cgit v1.2.3


From bf9346f5d47b4506aafbc384dfb5796b0adc3f8d Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Wed, 5 Sep 2018 23:50:06 +0200
Subject: gpiolib: Identify arrays matching GPIO hardware

Certain GPIO array lookup results may map directly to GPIO pins of a
single GPIO chip in hardware order.  If that condition is recognized
and handled efficiently, significant performance gain of get/set array
functions may be possible.

While processing a request for an array of GPIO descriptors, identify
those which represent corresponding pins of a single GPIO chip.  Skip
over pins which require open source or open drain special processing.
Moreover, identify pins which require inversion.  Pass a pointer to
that information with the array to the caller so it can benefit from
enhanced performance as soon as get/set array functions can accept and
make efficient use of it.

Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-api/gpio/consumer.rst |  4 +-
 drivers/gpio/gpiolib.c                     | 72 +++++++++++++++++++++++++++++-
 drivers/gpio/gpiolib.h                     |  9 ++++
 include/linux/gpio/consumer.h              |  9 ++++
 4 files changed, 92 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index ed68042ddccf..7e0298b9a7b9 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -109,9 +109,11 @@ For a function using multiple GPIOs all of those can be obtained with one call::
 					   enum gpiod_flags flags)
 
 This function returns a struct gpio_descs which contains an array of
-descriptors::
+descriptors.  It also contains a pointer to a gpiolib private structure which,
+if passed back to get/set array functions, may speed up I/O proocessing::
 
 	struct gpio_descs {
+		struct gpio_array *info;
 		unsigned int ndescs;
 		struct gpio_desc *desc[];
 	}
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index b66b2191c5c5..141f39308a53 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -4174,7 +4174,9 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev,
 {
 	struct gpio_desc *desc;
 	struct gpio_descs *descs;
-	int count;
+	struct gpio_array *array_info = NULL;
+	struct gpio_chip *chip;
+	int count, bitmap_size;
 
 	count = gpiod_count(dev, con_id);
 	if (count < 0)
@@ -4190,9 +4192,77 @@ struct gpio_descs *__must_check gpiod_get_array(struct device *dev,
 			gpiod_put_array(descs);
 			return ERR_CAST(desc);
 		}
+
 		descs->desc[descs->ndescs] = desc;
+
+		chip = gpiod_to_chip(desc);
+		/*
+		 * Select a chip of first array member
+		 * whose index matches its pin hardware number
+		 * as a candidate for fast bitmap processing.
+		 */
+		if (!array_info && gpio_chip_hwgpio(desc) == descs->ndescs) {
+			struct gpio_descs *array;
+
+			bitmap_size = BITS_TO_LONGS(chip->ngpio > count ?
+						    chip->ngpio : count);
+
+			array = kzalloc(struct_size(descs, desc, count) +
+					struct_size(array_info, invert_mask,
+					3 * bitmap_size), GFP_KERNEL);
+			if (!array) {
+				gpiod_put_array(descs);
+				return ERR_PTR(-ENOMEM);
+			}
+
+			memcpy(array, descs,
+			       struct_size(descs, desc, descs->ndescs + 1));
+			kfree(descs);
+
+			descs = array;
+			array_info = (void *)(descs->desc + count);
+			array_info->get_mask = array_info->invert_mask +
+						  bitmap_size;
+			array_info->set_mask = array_info->get_mask +
+						  bitmap_size;
+
+			array_info->desc = descs->desc;
+			array_info->size = count;
+			array_info->chip = chip;
+			bitmap_set(array_info->get_mask, descs->ndescs,
+				   count - descs->ndescs);
+			bitmap_set(array_info->set_mask, descs->ndescs,
+				   count - descs->ndescs);
+			descs->info = array_info;
+		}
+		/*
+		 * Unmark members which don't qualify for fast bitmap
+		 * processing (different chip, not in hardware order)
+		 */
+		if (array_info && (chip != array_info->chip ||
+		    gpio_chip_hwgpio(desc) != descs->ndescs)) {
+			__clear_bit(descs->ndescs, array_info->get_mask);
+			__clear_bit(descs->ndescs, array_info->set_mask);
+		} else if (array_info) {
+			/* Exclude open drain or open source from fast output */
+			if (gpiochip_line_is_open_drain(chip, descs->ndescs) ||
+			    gpiochip_line_is_open_source(chip, descs->ndescs))
+				__clear_bit(descs->ndescs,
+					    array_info->set_mask);
+			/* Identify 'fast' pins which require invertion */
+			if (gpiod_is_active_low(desc))
+				__set_bit(descs->ndescs,
+					  array_info->invert_mask);
+		}
+
 		descs->ndescs++;
 	}
+	if (array_info)
+		dev_dbg(dev,
+			"GPIO array info: chip=%s, size=%d, get_mask=%lx, set_mask=%lx, invert_mask=%lx\n",
+			array_info->chip->label, array_info->size,
+			*array_info->get_mask, *array_info->set_mask,
+			*array_info->invert_mask);
 	return descs;
 }
 EXPORT_SYMBOL_GPL(gpiod_get_array);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 11e83d2eef89..b60905d558b1 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -183,6 +183,15 @@ static inline bool acpi_can_fallback_to_crs(struct acpi_device *adev,
 }
 #endif
 
+struct gpio_array {
+	struct gpio_desc	**desc;
+	unsigned int		size;
+	struct gpio_chip	*chip;
+	unsigned long		*get_mask;
+	unsigned long		*set_mask;
+	unsigned long		invert_mask[];
+};
+
 struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum);
 int gpiod_get_array_value_complex(bool raw, bool can_sleep,
 				  unsigned int array_size,
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 2b0389f0bec4..0ffd71c0a77c 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -17,11 +17,20 @@ struct device;
  */
 struct gpio_desc;
 
+/**
+ * Opaque descriptor for a structure of GPIO array attributes.  This structure
+ * is attached to struct gpiod_descs obtained from gpiod_get_array() and can be
+ * passed back to get/set array functions in order to activate fast processing
+ * path if applicable.
+ */
+struct gpio_array;
+
 /**
  * Struct containing an array of descriptors that can be obtained using
  * gpiod_get_array().
  */
 struct gpio_descs {
+	struct gpio_array *info;
 	unsigned int ndescs;
 	struct gpio_desc *desc[];
 };
-- 
cgit v1.2.3


From 77588c14ac868caece82fddbfae7de03b2cec941 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Wed, 5 Sep 2018 23:50:07 +0200
Subject: gpiolib: Pass array info to get/set array functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to make use of array info obtained from gpiod_get_array() and
speed up processing of arrays matching single GPIO chip layout, that
information must be passed to get/set array functions.  Extend the
functions' API with that additional parameter and update all users.
Pass NULL if a user builds an array itself from single GPIOs.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Sebastien Bourdelin <sebastien.bourdelin@savoirfairelinux.com>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Peter Korsgaard <peter.korsgaard@barco.com>
Cc: Peter Rosin <peda@axentia.se>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rojhalat Ibrahim <imr@rtschenk.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Kishon Vijay Abraham I <kishon@ti.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Lars-Peter Clausen <lars@metafoo.de>
Cc: Michael Hennerich <Michael.Hennerich@analog.com>
Cc: Jonathan Cameron <jic23@kernel.org>
Cc: Hartmut Knaack <knaack.h@gmx.de>
Cc: Peter Meerwald-Stadler <pmeerw@pmeerw.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Yegor Yefremov <yegorslists@googlemail.com>
Cc: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-api/gpio/consumer.rst  | 14 ++++++++--
 drivers/auxdisplay/hd44780.c                |  8 +++---
 drivers/bus/ts-nbus.c                       |  5 ++--
 drivers/gpio/gpio-max3191x.c                |  6 +++--
 drivers/gpio/gpiolib.c                      | 40 +++++++++++++++++++++++------
 drivers/gpio/gpiolib.h                      |  2 ++
 drivers/i2c/muxes/i2c-mux-gpio.c            |  3 ++-
 drivers/mmc/core/pwrseq_simple.c            |  2 +-
 drivers/mux/gpio.c                          |  3 ++-
 drivers/net/phy/mdio-mux-gpio.c             |  2 +-
 drivers/pcmcia/soc_common.c                 |  2 +-
 drivers/phy/motorola/phy-mapphone-mdm6600.c |  4 ++-
 drivers/staging/iio/adc/ad7606.c            |  3 ++-
 drivers/tty/serial/serial_mctrl_gpio.c      |  2 +-
 include/linux/gpio/consumer.h               | 16 ++++++++++++
 15 files changed, 86 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index 7e0298b9a7b9..0afd95a12b10 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -325,28 +325,36 @@ The following functions get or set the values of an array of GPIOs::
 
 	int gpiod_get_array_value(unsigned int array_size,
 				  struct gpio_desc **desc_array,
+				  struct gpio_array *array_info,
 				  unsigned long *value_bitmap);
 	int gpiod_get_raw_array_value(unsigned int array_size,
 				      struct gpio_desc **desc_array,
+				      struct gpio_array *array_info,
 				      unsigned long *value_bitmap);
 	int gpiod_get_array_value_cansleep(unsigned int array_size,
 					   struct gpio_desc **desc_array,
+					   struct gpio_array *array_info,
 					   unsigned long *value_bitmap);
 	int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 					   struct gpio_desc **desc_array,
+					   struct gpio_array *array_info,
 					   unsigned long *value_bitmap);
 
 	void gpiod_set_array_value(unsigned int array_size,
 				   struct gpio_desc **desc_array,
+				   struct gpio_array *array_info,
 				   unsigned long *value_bitmap)
 	void gpiod_set_raw_array_value(unsigned int array_size,
 				       struct gpio_desc **desc_array,
+				       struct gpio_array *array_info,
 				       unsigned long *value_bitmap)
 	void gpiod_set_array_value_cansleep(unsigned int array_size,
 					    struct gpio_desc **desc_array,
+					    struct gpio_array *array_info,
 					    unsigned long *value_bitmap)
 	void gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 						struct gpio_desc **desc_array,
+						struct gpio_array *array_info,
 						unsigned long *value_bitmap)
 
 The array can be an arbitrary set of GPIOs. The functions will try to access
@@ -358,6 +366,7 @@ accessed sequentially.
 The functions take three arguments:
 	* array_size	- the number of array elements
 	* desc_array	- an array of GPIO descriptors
+	* array_info	- optional information obtained from gpiod_array_get()
 	* value_bitmap	- a bitmap to store the GPIOs' values (get) or
 			  a bitmap of values to assign to the GPIOs (set)
 
@@ -368,12 +377,13 @@ the struct gpio_descs returned by gpiod_get_array()::
 
 	struct gpio_descs *my_gpio_descs = gpiod_get_array(...);
 	gpiod_set_array_value(my_gpio_descs->ndescs, my_gpio_descs->desc,
-			      my_gpio_value_bitmap);
+			      my_gpio_descs->info, my_gpio_value_bitmap);
 
 It is also possible to access a completely arbitrary array of descriptors. The
 descriptors may be obtained using any combination of gpiod_get() and
 gpiod_get_array(). Afterwards the array of descriptors has to be setup
-manually before it can be passed to one of the above functions.
+manually before it can be passed to one of the above functions.  In that case,
+array_info should be set to NULL.
 
 Note that for optimal performance GPIOs belonging to the same chip should be
 contiguous within the array of descriptors.
diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c
index e9a893384362..9ad93ea42fdc 100644
--- a/drivers/auxdisplay/hd44780.c
+++ b/drivers/auxdisplay/hd44780.c
@@ -70,7 +70,7 @@ static void hd44780_write_gpio8(struct hd44780 *hd, u8 val, unsigned int rs)
 	n = hd->pins[PIN_CTRL_RW] ? 10 : 9;
 
 	/* Present the data to the port */
-	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA0], values);
+	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA0], NULL, values);
 
 	hd44780_strobe_gpio(hd);
 }
@@ -87,7 +87,7 @@ static void hd44780_write_gpio4(struct hd44780 *hd, u8 val, unsigned int rs)
 	n = hd->pins[PIN_CTRL_RW] ? 6 : 5;
 
 	/* Present the data to the port */
-	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], values);
+	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], NULL, values);
 
 	hd44780_strobe_gpio(hd);
 
@@ -96,7 +96,7 @@ static void hd44780_write_gpio4(struct hd44780 *hd, u8 val, unsigned int rs)
 	values[0] |= val & 0x0f;
 
 	/* Present the data to the port */
-	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], values);
+	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], NULL, values);
 
 	hd44780_strobe_gpio(hd);
 }
@@ -152,7 +152,7 @@ static void hd44780_write_cmd_raw_gpio4(struct charlcd *lcd, int cmd)
 	n = hd->pins[PIN_CTRL_RW] ? 6 : 5;
 
 	/* Present the data to the port */
-	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], values);
+	gpiod_set_array_value_cansleep(n, &hd->pins[PIN_DATA4], NULL, values);
 
 	hd44780_strobe_gpio(hd);
 }
diff --git a/drivers/bus/ts-nbus.c b/drivers/bus/ts-nbus.c
index 8dde7c77f62c..9989ce904a37 100644
--- a/drivers/bus/ts-nbus.c
+++ b/drivers/bus/ts-nbus.c
@@ -114,7 +114,8 @@ static void ts_nbus_reset_bus(struct ts_nbus *ts_nbus)
 
 	values[0] = 0;
 
-	gpiod_set_array_value_cansleep(8, ts_nbus->data->desc, values);
+	gpiod_set_array_value_cansleep(8, ts_nbus->data->desc,
+				       ts_nbus->data->info, values);
 	gpiod_set_value_cansleep(ts_nbus->csn, 0);
 	gpiod_set_value_cansleep(ts_nbus->strobe, 0);
 	gpiod_set_value_cansleep(ts_nbus->ale, 0);
@@ -159,7 +160,7 @@ static void ts_nbus_write_byte(struct ts_nbus *ts_nbus, u8 byte)
 
 	values[0] = byte;
 
-	gpiod_set_array_value_cansleep(8, gpios->desc, values);
+	gpiod_set_array_value_cansleep(8, gpios->desc, gpios->info, values);
 }
 
 /*
diff --git a/drivers/gpio/gpio-max3191x.c b/drivers/gpio/gpio-max3191x.c
index bd4a245fc5a0..9a8876abeb57 100644
--- a/drivers/gpio/gpio-max3191x.c
+++ b/drivers/gpio/gpio-max3191x.c
@@ -313,6 +313,7 @@ static int max3191x_set_config(struct gpio_chip *gpio, unsigned int offset,
 
 static void gpiod_set_array_single_value_cansleep(unsigned int ndescs,
 						  struct gpio_desc **desc,
+						  struct gpio_array *info,
 						  int value)
 {
 	unsigned long *values;
@@ -326,7 +327,7 @@ static void gpiod_set_array_single_value_cansleep(unsigned int ndescs,
 	else
 		bitmap_zero(values, ndescs);
 
-	gpiod_set_array_value_cansleep(ndescs, desc, values);
+	gpiod_set_array_value_cansleep(ndescs, desc, info, values);
 	kfree(values);
 }
 
@@ -399,7 +400,8 @@ static int max3191x_probe(struct spi_device *spi)
 	if (max3191x->modesel_pins)
 		gpiod_set_array_single_value_cansleep(
 				 max3191x->modesel_pins->ndescs,
-				 max3191x->modesel_pins->desc, max3191x->mode);
+				 max3191x->modesel_pins->desc,
+				 max3191x->modesel_pins->info, max3191x->mode);
 
 	max3191x->ignore_uv = device_property_read_bool(dev,
 						  "maxim,ignore-undervoltage");
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 141f39308a53..cd7c1deed132 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -436,6 +436,7 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd,
 							true,
 							lh->numdescs,
 							lh->descs,
+							NULL,
 							vals);
 		if (ret)
 			return ret;
@@ -468,6 +469,7 @@ static long linehandle_ioctl(struct file *filep, unsigned int cmd,
 					      true,
 					      lh->numdescs,
 					      lh->descs,
+					      NULL,
 					      vals);
 	}
 	return -EINVAL;
@@ -2784,6 +2786,7 @@ static int gpio_chip_get_multiple(struct gpio_chip *chip,
 int gpiod_get_array_value_complex(bool raw, bool can_sleep,
 				  unsigned int array_size,
 				  struct gpio_desc **desc_array,
+				  struct gpio_array *array_info,
 				  unsigned long *value_bitmap)
 {
 	int i = 0;
@@ -2897,6 +2900,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_value);
  * gpiod_get_raw_array_value() - read raw values from an array of GPIOs
  * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be read
+ * @array_info: information on applicability of fast bitmap processing path
  * @value_bitmap: bitmap to store the read values
  *
  * Read the raw values of the GPIOs, i.e. the values of the physical lines
@@ -2908,12 +2912,14 @@ EXPORT_SYMBOL_GPL(gpiod_get_value);
  */
 int gpiod_get_raw_array_value(unsigned int array_size,
 			      struct gpio_desc **desc_array,
+			      struct gpio_array *array_info,
 			      unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_get_array_value_complex(true, false, array_size,
-					     desc_array, value_bitmap);
+					     desc_array, array_info,
+					     value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value);
 
@@ -2921,6 +2927,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value);
  * gpiod_get_array_value() - read values from an array of GPIOs
  * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be read
+ * @array_info: information on applicability of fast bitmap processing path
  * @value_bitmap: bitmap to store the read values
  *
  * Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
@@ -2931,12 +2938,14 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value);
  */
 int gpiod_get_array_value(unsigned int array_size,
 			  struct gpio_desc **desc_array,
+			  struct gpio_array *array_info,
 			  unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_get_array_value_complex(false, false, array_size,
-					     desc_array, value_bitmap);
+					     desc_array, array_info,
+					     value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_array_value);
 
@@ -3029,6 +3038,7 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip,
 int gpiod_set_array_value_complex(bool raw, bool can_sleep,
 				   unsigned int array_size,
 				   struct gpio_desc **desc_array,
+				   struct gpio_array *array_info,
 				   unsigned long *value_bitmap)
 {
 	int i = 0;
@@ -3156,6 +3166,7 @@ EXPORT_SYMBOL_GPL(gpiod_set_value);
  * gpiod_set_raw_array_value() - assign values to an array of GPIOs
  * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be assigned
+ * @array_info: information on applicability of fast bitmap processing path
  * @value_bitmap: bitmap of values to assign
  *
  * Set the raw values of the GPIOs, i.e. the values of the physical lines
@@ -3166,12 +3177,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_value);
  */
 int gpiod_set_raw_array_value(unsigned int array_size,
 			 struct gpio_desc **desc_array,
+			 struct gpio_array *array_info,
 			 unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_set_array_value_complex(true, false, array_size,
-					desc_array, value_bitmap);
+					desc_array, array_info, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);
 
@@ -3179,6 +3191,7 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);
  * gpiod_set_array_value() - assign values to an array of GPIOs
  * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be assigned
+ * @array_info: information on applicability of fast bitmap processing path
  * @value_bitmap: bitmap of values to assign
  *
  * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
@@ -3189,12 +3202,13 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);
  */
 void gpiod_set_array_value(unsigned int array_size,
 			   struct gpio_desc **desc_array,
+			   struct gpio_array *array_info,
 			   unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return;
 	gpiod_set_array_value_complex(false, false, array_size, desc_array,
-				      value_bitmap);
+				      array_info, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_array_value);
 
@@ -3416,6 +3430,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_value_cansleep);
  * gpiod_get_raw_array_value_cansleep() - read raw values from an array of GPIOs
  * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be read
+ * @array_info: information on applicability of fast bitmap processing path
  * @value_bitmap: bitmap to store the read values
  *
  * Read the raw values of the GPIOs, i.e. the values of the physical lines
@@ -3426,13 +3441,15 @@ EXPORT_SYMBOL_GPL(gpiod_get_value_cansleep);
  */
 int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 				       struct gpio_desc **desc_array,
+				       struct gpio_array *array_info,
 				       unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_get_array_value_complex(true, true, array_size,
-					     desc_array, value_bitmap);
+					     desc_array, array_info,
+					     value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value_cansleep);
 
@@ -3440,6 +3457,7 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value_cansleep);
  * gpiod_get_array_value_cansleep() - read values from an array of GPIOs
  * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be read
+ * @array_info: information on applicability of fast bitmap processing path
  * @value_bitmap: bitmap to store the read values
  *
  * Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
@@ -3449,13 +3467,15 @@ EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value_cansleep);
  */
 int gpiod_get_array_value_cansleep(unsigned int array_size,
 				   struct gpio_desc **desc_array,
+				   struct gpio_array *array_info,
 				   unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_get_array_value_complex(false, true, array_size,
-					     desc_array, value_bitmap);
+					     desc_array, array_info,
+					     value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_array_value_cansleep);
 
@@ -3499,6 +3519,7 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
  * gpiod_set_raw_array_value_cansleep() - assign values to an array of GPIOs
  * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be assigned
+ * @array_info: information on applicability of fast bitmap processing path
  * @value_bitmap: bitmap of values to assign
  *
  * Set the raw values of the GPIOs, i.e. the values of the physical lines
@@ -3508,13 +3529,14 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
  */
 int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 					struct gpio_desc **desc_array,
+					struct gpio_array *array_info,
 					unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
 		return -EINVAL;
 	return gpiod_set_array_value_complex(true, true, array_size, desc_array,
-				      value_bitmap);
+				      array_info, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep);
 
@@ -3539,6 +3561,7 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n)
  * gpiod_set_array_value_cansleep() - assign values to an array of GPIOs
  * @array_size: number of elements in the descriptor array / value bitmap
  * @desc_array: array of GPIO descriptors whose values will be assigned
+ * @array_info: information on applicability of fast bitmap processing path
  * @value_bitmap: bitmap of values to assign
  *
  * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
@@ -3548,13 +3571,14 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n)
  */
 void gpiod_set_array_value_cansleep(unsigned int array_size,
 				    struct gpio_desc **desc_array,
+				    struct gpio_array *array_info,
 				    unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
 		return;
 	gpiod_set_array_value_complex(false, true, array_size, desc_array,
-				      value_bitmap);
+				      array_info, value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep);
 
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index b60905d558b1..b65ca896b24d 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -196,10 +196,12 @@ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *chip, u16 hwnum);
 int gpiod_get_array_value_complex(bool raw, bool can_sleep,
 				  unsigned int array_size,
 				  struct gpio_desc **desc_array,
+				  struct gpio_array *array_info,
 				  unsigned long *value_bitmap);
 int gpiod_set_array_value_complex(bool raw, bool can_sleep,
 				   unsigned int array_size,
 				   struct gpio_desc **desc_array,
+				   struct gpio_array *array_info,
 				   unsigned long *value_bitmap);
 
 /* This is just passed between gpiolib and devres */
diff --git a/drivers/i2c/muxes/i2c-mux-gpio.c b/drivers/i2c/muxes/i2c-mux-gpio.c
index d835857bb094..13882a2a4f60 100644
--- a/drivers/i2c/muxes/i2c-mux-gpio.c
+++ b/drivers/i2c/muxes/i2c-mux-gpio.c
@@ -30,7 +30,8 @@ static void i2c_mux_gpio_set(const struct gpiomux *mux, unsigned val)
 
 	values[0] = val;
 
-	gpiod_set_array_value_cansleep(mux->data.n_gpios, mux->gpios, values);
+	gpiod_set_array_value_cansleep(mux->data.n_gpios, mux->gpios, NULL,
+				       values);
 }
 
 static int i2c_mux_gpio_select(struct i2c_mux_core *muxc, u32 chan)
diff --git a/drivers/mmc/core/pwrseq_simple.c b/drivers/mmc/core/pwrseq_simple.c
index 902476ef9a0e..7f882a2bb872 100644
--- a/drivers/mmc/core/pwrseq_simple.c
+++ b/drivers/mmc/core/pwrseq_simple.c
@@ -46,7 +46,7 @@ static void mmc_pwrseq_simple_set_gpios_value(struct mmc_pwrseq_simple *pwrseq,
 		values[0] = value;
 
 		gpiod_set_array_value_cansleep(nvalues, reset_gpios->desc,
-					       values);
+					       reset_gpios->info, values);
 	}
 }
 
diff --git a/drivers/mux/gpio.c b/drivers/mux/gpio.c
index 46c44532fbd5..02c1f2c014e8 100644
--- a/drivers/mux/gpio.c
+++ b/drivers/mux/gpio.c
@@ -27,7 +27,8 @@ static int mux_gpio_set(struct mux_control *mux, int state)
 	values[0] = state;
 
 	gpiod_set_array_value_cansleep(mux_gpio->gpios->ndescs,
-				       mux_gpio->gpios->desc, values);
+				       mux_gpio->gpios->desc,
+				       mux_gpio->gpios->info, values);
 
 	return 0;
 }
diff --git a/drivers/net/phy/mdio-mux-gpio.c b/drivers/net/phy/mdio-mux-gpio.c
index e25ccfc8c070..fe34576262bd 100644
--- a/drivers/net/phy/mdio-mux-gpio.c
+++ b/drivers/net/phy/mdio-mux-gpio.c
@@ -34,7 +34,7 @@ static int mdio_mux_gpio_switch_fn(int current_child, int desired_child,
 	values[0] = desired_child;
 
 	gpiod_set_array_value_cansleep(s->gpios->ndescs, s->gpios->desc,
-				       values);
+				       s->gpios->info, values);
 
 	return 0;
 }
diff --git a/drivers/pcmcia/soc_common.c b/drivers/pcmcia/soc_common.c
index ac033d555700..3a8c84bb174d 100644
--- a/drivers/pcmcia/soc_common.c
+++ b/drivers/pcmcia/soc_common.c
@@ -364,7 +364,7 @@ static int soc_common_pcmcia_config_skt(
 		}
 
 		if (n)
-			gpiod_set_array_value_cansleep(n, descs, values);
+			gpiod_set_array_value_cansleep(n, descs, NULL, values);
 
 		/*
 		 * This really needs a better solution.  The IRQ
diff --git a/drivers/phy/motorola/phy-mapphone-mdm6600.c b/drivers/phy/motorola/phy-mapphone-mdm6600.c
index 9162b61ddb95..25d456a323c2 100644
--- a/drivers/phy/motorola/phy-mapphone-mdm6600.c
+++ b/drivers/phy/motorola/phy-mapphone-mdm6600.c
@@ -162,7 +162,8 @@ static void phy_mdm6600_cmd(struct phy_mdm6600 *ddata, int val)
 	values[0] = val;
 
 	gpiod_set_array_value_cansleep(PHY_MDM6600_NR_CMD_LINES,
-				       ddata->cmd_gpios->desc, values);
+				       ddata->cmd_gpios->desc,
+				       ddata->cmd_gpios->info, values);
 }
 
 /**
@@ -181,6 +182,7 @@ static void phy_mdm6600_status(struct work_struct *work)
 
 	error = gpiod_get_array_value_cansleep(PHY_MDM6600_NR_STATUS_LINES,
 					       ddata->status_gpios->desc,
+					       ddata->status_gpios->info,
 					       values);
 	if (error)
 		return;
diff --git a/drivers/staging/iio/adc/ad7606.c b/drivers/staging/iio/adc/ad7606.c
index 9c1d77d48700..b7810b1aad07 100644
--- a/drivers/staging/iio/adc/ad7606.c
+++ b/drivers/staging/iio/adc/ad7606.c
@@ -230,7 +230,8 @@ static int ad7606_write_raw(struct iio_dev *indio_dev,
 		values[0] = ret;
 
 		mutex_lock(&st->lock);
-		gpiod_set_array_value(3, st->gpio_os->desc, values);
+		gpiod_set_array_value(3, st->gpio_os->desc, st->gpio_os->info,
+				      values);
 		st->oversampling = val;
 		mutex_unlock(&st->lock);
 
diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c
index 7d9d2c7b6c39..39ed56214cd3 100644
--- a/drivers/tty/serial/serial_mctrl_gpio.c
+++ b/drivers/tty/serial/serial_mctrl_gpio.c
@@ -53,7 +53,7 @@ void mctrl_gpio_set(struct mctrl_gpios *gpios, unsigned int mctrl)
 				     mctrl & mctrl_gpios_desc[i].mctrl);
 			count++;
 		}
-	gpiod_set_array_value(count, desc_array, values);
+	gpiod_set_array_value(count, desc_array, NULL, values);
 }
 EXPORT_SYMBOL_GPL(mctrl_gpio_set);
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 0ffd71c0a77c..d7fbe30ece84 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -114,36 +114,44 @@ int gpiod_direction_output_raw(struct gpio_desc *desc, int value);
 int gpiod_get_value(const struct gpio_desc *desc);
 int gpiod_get_array_value(unsigned int array_size,
 			  struct gpio_desc **desc_array,
+			  struct gpio_array *array_info,
 			  unsigned long *value_bitmap);
 void gpiod_set_value(struct gpio_desc *desc, int value);
 void gpiod_set_array_value(unsigned int array_size,
 			   struct gpio_desc **desc_array,
+			   struct gpio_array *array_info,
 			   unsigned long *value_bitmap);
 int gpiod_get_raw_value(const struct gpio_desc *desc);
 int gpiod_get_raw_array_value(unsigned int array_size,
 			      struct gpio_desc **desc_array,
+			      struct gpio_array *array_info,
 			      unsigned long *value_bitmap);
 void gpiod_set_raw_value(struct gpio_desc *desc, int value);
 int gpiod_set_raw_array_value(unsigned int array_size,
 			       struct gpio_desc **desc_array,
+			       struct gpio_array *array_info,
 			       unsigned long *value_bitmap);
 
 /* Value get/set from sleeping context */
 int gpiod_get_value_cansleep(const struct gpio_desc *desc);
 int gpiod_get_array_value_cansleep(unsigned int array_size,
 				   struct gpio_desc **desc_array,
+				   struct gpio_array *array_info,
 				   unsigned long *value_bitmap);
 void gpiod_set_value_cansleep(struct gpio_desc *desc, int value);
 void gpiod_set_array_value_cansleep(unsigned int array_size,
 				    struct gpio_desc **desc_array,
+				    struct gpio_array *array_info,
 				    unsigned long *value_bitmap);
 int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc);
 int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 				       struct gpio_desc **desc_array,
+				       struct gpio_array *array_info,
 				       unsigned long *value_bitmap);
 void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value);
 int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 					struct gpio_desc **desc_array,
+					struct gpio_array *array_info,
 					unsigned long *value_bitmap);
 
 int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
@@ -341,6 +349,7 @@ static inline int gpiod_get_value(const struct gpio_desc *desc)
 }
 static inline int gpiod_get_array_value(unsigned int array_size,
 					struct gpio_desc **desc_array,
+					struct gpio_array *array_info,
 					unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
@@ -354,6 +363,7 @@ static inline void gpiod_set_value(struct gpio_desc *desc, int value)
 }
 static inline void gpiod_set_array_value(unsigned int array_size,
 					 struct gpio_desc **desc_array,
+					 struct gpio_array *array_info,
 					 unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
@@ -367,6 +377,7 @@ static inline int gpiod_get_raw_value(const struct gpio_desc *desc)
 }
 static inline int gpiod_get_raw_array_value(unsigned int array_size,
 					    struct gpio_desc **desc_array,
+					    struct gpio_array *array_info,
 					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
@@ -380,6 +391,7 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 }
 static inline int gpiod_set_raw_array_value(unsigned int array_size,
 					     struct gpio_desc **desc_array,
+					     struct gpio_array *array_info,
 					     unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
@@ -395,6 +407,7 @@ static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc)
 }
 static inline int gpiod_get_array_value_cansleep(unsigned int array_size,
 				     struct gpio_desc **desc_array,
+				     struct gpio_array *array_info,
 				     unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
@@ -408,6 +421,7 @@ static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 }
 static inline void gpiod_set_array_value_cansleep(unsigned int array_size,
 					    struct gpio_desc **desc_array,
+					    struct gpio_array *array_info,
 					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
@@ -421,6 +435,7 @@ static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
 }
 static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 					       struct gpio_desc **desc_array,
+					       struct gpio_array *array_info,
 					       unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
@@ -435,6 +450,7 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc,
 }
 static inline int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 						struct gpio_desc **desc_array,
+						struct gpio_array *array_info,
 						unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
-- 
cgit v1.2.3


From 1e479c619b2ac983fdea1a8212c7b822b5098da0 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 12 Sep 2018 22:22:45 +0200
Subject: rtc: unexport non devm managed registration

Ensure the non managed version of the un/registration functions is not used
anymore. No driver is using it anymore and they should not be necessary.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/class.c | 12 +++++-------
 include/linux/rtc.h |  5 -----
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 0fca4d74c76b..3b43787f154b 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -286,9 +286,10 @@ static void rtc_device_get_offset(struct rtc_device *rtc)
  *
  * Returns the pointer to the new struct class device.
  */
-struct rtc_device *rtc_device_register(const char *name, struct device *dev,
-					const struct rtc_class_ops *ops,
-					struct module *owner)
+static struct rtc_device *rtc_device_register(const char *name,
+					      struct device *dev,
+					      const struct rtc_class_ops *ops,
+					      struct module *owner)
 {
 	struct rtc_device *rtc;
 	struct rtc_wkalrm alrm;
@@ -351,15 +352,13 @@ exit:
 			name, err);
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL_GPL(rtc_device_register);
-
 
 /**
  * rtc_device_unregister - removes the previously registered RTC class device
  *
  * @rtc: the RTC class device to destroy
  */
-void rtc_device_unregister(struct rtc_device *rtc)
+static void rtc_device_unregister(struct rtc_device *rtc)
 {
 	mutex_lock(&rtc->ops_lock);
 	/*
@@ -372,7 +371,6 @@ void rtc_device_unregister(struct rtc_device *rtc)
 	mutex_unlock(&rtc->ops_lock);
 	put_device(&rtc->dev);
 }
-EXPORT_SYMBOL_GPL(rtc_device_unregister);
 
 static void devm_rtc_device_release(struct device *dev, void *res)
 {
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 6aedc30003e7..faf00a1472d4 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -167,17 +167,12 @@ struct rtc_device {
 #define RTC_TIMESTAMP_BEGIN_2000	946684800LL /* 2000-01-01 00:00:00 */
 #define RTC_TIMESTAMP_END_2099		4102444799LL /* 2099-12-31 23:59:59 */
 
-extern struct rtc_device *rtc_device_register(const char *name,
-					struct device *dev,
-					const struct rtc_class_ops *ops,
-					struct module *owner);
 extern struct rtc_device *devm_rtc_device_register(struct device *dev,
 					const char *name,
 					const struct rtc_class_ops *ops,
 					struct module *owner);
 struct rtc_device *devm_rtc_allocate_device(struct device *dev);
 int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
-extern void rtc_device_unregister(struct rtc_device *rtc);
 extern void devm_rtc_device_unregister(struct device *dev,
 					struct rtc_device *rtc);
 
-- 
cgit v1.2.3


From 15033f0457dca569b284bef0c8d3ad55fb37eacb Mon Sep 17 00:00:00 2001
From: Andre Naujoks <nautsch2@gmail.com>
Date: Mon, 10 Sep 2018 10:27:15 +0200
Subject: ipv6: Add sockopt IPV6_MULTICAST_ALL analogue to IP_MULTICAST_ALL

The socket option will be enabled by default to ensure current behaviour
is not changed. This is the same for the IPv4 version.

A socket bound to in6addr_any and a specific port will receive all traffic
on that port. Analogue to IP_MULTICAST_ALL, disable this behaviour, if
one or more multicast groups were joined (using said socket) and only
pass on multicast traffic from groups, which were explicitly joined via
this socket.

Without this option disabled a socket (system even) joined to multiple
multicast groups is very hard to get right. Filtering by destination
address has to take place in user space to avoid receiving multicast
traffic from other multicast groups, which might have traffic on the same
port.

The extension of the IP_MULTICAST_ALL socketoption to just apply to ipv6,
too, is not done to avoid changing the behaviour of current applications.

Signed-off-by: Andre Naujoks <nautsch2@gmail.com>
Acked-By: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h     |  3 ++-
 include/uapi/linux/in6.h |  1 +
 net/ipv6/af_inet6.c      |  1 +
 net/ipv6/ipv6_sockglue.c | 11 +++++++++++
 net/ipv6/mcast.c         |  2 +-
 5 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 8415bf1a9776..495e834c1367 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -274,7 +274,8 @@ struct ipv6_pinfo {
 						 */
 				dontfrag:1,
 				autoflowlabel:1,
-				autoflowlabel_set:1;
+				autoflowlabel_set:1,
+				mc_all:1;
 	__u8			min_hopcount;
 	__u8			tclass;
 	__be32			rcv_flowinfo;
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index ed291e55f024..71d82fe15b03 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -177,6 +177,7 @@ struct in6_flowlabel_req {
 #define IPV6_V6ONLY		26
 #define IPV6_JOIN_ANYCAST	27
 #define IPV6_LEAVE_ANYCAST	28
+#define IPV6_MULTICAST_ALL	29
 
 /* IPV6_MTU_DISCOVER values */
 #define IPV6_PMTUDISC_DONT		0
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 9a4261e50272..77ef8478234f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -209,6 +209,7 @@ lookup_protocol:
 	np->hop_limit	= -1;
 	np->mcast_hops	= IPV6_DEFAULT_MCASTHOPS;
 	np->mc_loop	= 1;
+	np->mc_all	= 1;
 	np->pmtudisc	= IPV6_PMTUDISC_WANT;
 	np->repflow	= net->ipv6.sysctl.flowlabel_reflect;
 	sk->sk_ipv6only	= net->ipv6.sysctl.bindv6only;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c0cac9cc3a28..381ce38940ae 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -674,6 +674,13 @@ done:
 			retv = ipv6_sock_ac_drop(sk, mreq.ipv6mr_ifindex, &mreq.ipv6mr_acaddr);
 		break;
 	}
+	case IPV6_MULTICAST_ALL:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->mc_all = valbool;
+		retv = 0;
+		break;
+
 	case MCAST_JOIN_GROUP:
 	case MCAST_LEAVE_GROUP:
 	{
@@ -1266,6 +1273,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->mcast_oif;
 		break;
 
+	case IPV6_MULTICAST_ALL:
+		val = np->mc_all;
+		break;
+
 	case IPV6_UNICAST_IF:
 		val = (__force int)htonl((__u32) np->ucast_oif);
 		break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 4ae54aaca373..6895e1dc0b03 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -636,7 +636,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
 	}
 	if (!mc) {
 		rcu_read_unlock();
-		return true;
+		return np->mc_all;
 	}
 	read_lock(&mc->sflock);
 	psl = mc->sflist;
-- 
cgit v1.2.3


From 37a3a98ef601f89100e3bb657fb0e190b857028c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 10 Sep 2018 16:20:25 +0200
Subject: ALSA: hda - Enable runtime PM only for discrete GPU

The recent change of vga_switcheroo allowed the runtime PM for
HD-audio on AMD GPUs, but this also resulted in a regression.  When
the HD-audio controller driver gets runtime-suspended, HD-audio link
is turned off, and the hotplug notification is ignored.  This leads to
the inconsistent audio state (the connection isn't notified and ELD is
ignored).

The best fix would be to implement the proper ELD notification via the
audio component, but it's still not ready.  As a quick workaround,
this patch adds the check of runtime_idle and allows the runtime
suspend only when the vga_switcheroo is bound with discrete GPU.
That is, a system with a single GPU and APU would be again without
runtime PM to keep the HD-audio link for the hotplug notification and
ELD read out.

Also, the codec->auto_runtime_pm flag is set only for the discrete GPU
at the time GPU gets bound via vga_switcheroo (i.e. only dGPU is
forcibly runtime-PM enabled), so that APU can still get the ELD
notification.

For identifying which GPU is bound, a new vga_switcheroo client
callback, gpu_bound, is implemented.  The vga_switcheroo simply calls
this when GPU is bound, and tells whether it's dGPU or APU.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200945
Fixes: 07f4f97d7b4b ("vga_switcheroo: Use device link for HDA controller")
Reported-by: Jian-Hong Pan <jian-hong@endlessm.com>
Tested-by: Jian-Hong Pan <jian-hong@endlessm.com>
Acked-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/vga/vga_switcheroo.c |  2 +
 include/linux/vga_switcheroo.h   |  3 ++
 sound/pci/hda/hda_intel.c        | 86 +++++++++++++++++++++++++++++-----------
 sound/pci/hda/hda_intel.h        |  1 +
 4 files changed, 69 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index a96bf46bc483..cf2a18571d48 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -215,6 +215,8 @@ static void vga_switcheroo_enable(void)
 			return;
 
 		client->id = ret | ID_BIT_AUDIO;
+		if (client->ops->gpu_bound)
+			client->ops->gpu_bound(client->pdev, ret);
 	}
 
 	vga_switcheroo_debugfs_init(&vgasr_priv);
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index a34539b7f750..7e6ac0114d55 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -133,15 +133,18 @@ struct vga_switcheroo_handler {
  * @can_switch: check if the device is in a position to switch now.
  * 	Mandatory. The client should return false if a user space process
  * 	has one of its device files open
+ * @gpu_bound: notify the client id to audio client when the GPU is bound.
  *
  * Client callbacks. A client can be either a GPU or an audio device on a GPU.
  * The @set_gpu_state and @can_switch methods are mandatory, @reprobe may be
  * set to NULL. For audio clients, the @reprobe member is bogus.
+ * OTOH, @gpu_bound is only for audio clients, and not used for GPU clients.
  */
 struct vga_switcheroo_client_ops {
 	void (*set_gpu_state)(struct pci_dev *dev, enum vga_switcheroo_state);
 	void (*reprobe)(struct pci_dev *dev);
 	bool (*can_switch)(struct pci_dev *dev);
+	void (*gpu_bound)(struct pci_dev *dev, enum vga_switcheroo_client_id);
 };
 
 #if defined(CONFIG_VGA_SWITCHEROO)
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 1b2ce304152a..aa4c672dbaf7 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -365,8 +365,10 @@ enum {
  */
 #ifdef SUPPORT_VGA_SWITCHEROO
 #define use_vga_switcheroo(chip)	((chip)->use_vga_switcheroo)
+#define needs_eld_notify_link(chip)	((chip)->need_eld_notify_link)
 #else
 #define use_vga_switcheroo(chip)	0
+#define needs_eld_notify_link(chip)	false
 #endif
 
 #define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \
@@ -453,6 +455,7 @@ static inline void mark_runtime_wc(struct azx *chip, struct azx_dev *azx_dev,
 #endif
 
 static int azx_acquire_irq(struct azx *chip, int do_disconnect);
+static void set_default_power_save(struct azx *chip);
 
 /*
  * initialize the PCI registers
@@ -1201,6 +1204,10 @@ static int azx_runtime_idle(struct device *dev)
 	    azx_bus(chip)->codec_powered || !chip->running)
 		return -EBUSY;
 
+	/* ELD notification gets broken when HD-audio bus is off */
+	if (needs_eld_notify_link(hda))
+		return -EBUSY;
+
 	return 0;
 }
 
@@ -1298,6 +1305,36 @@ static bool azx_vs_can_switch(struct pci_dev *pci)
 	return true;
 }
 
+/*
+ * The discrete GPU cannot power down unless the HDA controller runtime
+ * suspends, so activate runtime PM on codecs even if power_save == 0.
+ */
+static void setup_vga_switcheroo_runtime_pm(struct azx *chip)
+{
+	struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
+	struct hda_codec *codec;
+
+	if (hda->use_vga_switcheroo && !hda->need_eld_notify_link) {
+		list_for_each_codec(codec, &chip->bus)
+			codec->auto_runtime_pm = 1;
+		/* reset the power save setup */
+		if (chip->running)
+			set_default_power_save(chip);
+	}
+}
+
+static void azx_vs_gpu_bound(struct pci_dev *pci,
+			     enum vga_switcheroo_client_id client_id)
+{
+	struct snd_card *card = pci_get_drvdata(pci);
+	struct azx *chip = card->private_data;
+	struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
+
+	if (client_id == VGA_SWITCHEROO_DIS)
+		hda->need_eld_notify_link = 0;
+	setup_vga_switcheroo_runtime_pm(chip);
+}
+
 static void init_vga_switcheroo(struct azx *chip)
 {
 	struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
@@ -1306,6 +1343,7 @@ static void init_vga_switcheroo(struct azx *chip)
 		dev_info(chip->card->dev,
 			 "Handle vga_switcheroo audio client\n");
 		hda->use_vga_switcheroo = 1;
+		hda->need_eld_notify_link = 1; /* cleared in gpu_bound op */
 		chip->driver_caps |= AZX_DCAPS_PM_RUNTIME;
 		pci_dev_put(p);
 	}
@@ -1314,6 +1352,7 @@ static void init_vga_switcheroo(struct azx *chip)
 static const struct vga_switcheroo_client_ops azx_vs_ops = {
 	.set_gpu_state = azx_vs_set_state,
 	.can_switch = azx_vs_can_switch,
+	.gpu_bound = azx_vs_gpu_bound,
 };
 
 static int register_vga_switcheroo(struct azx *chip)
@@ -1339,6 +1378,7 @@ static int register_vga_switcheroo(struct azx *chip)
 #define init_vga_switcheroo(chip)		/* NOP */
 #define register_vga_switcheroo(chip)		0
 #define check_hdmi_disabled(pci)	false
+#define setup_vga_switcheroo_runtime_pm(chip)	/* NOP */
 #endif /* SUPPORT_VGA_SWITCHER */
 
 /*
@@ -1352,6 +1392,7 @@ static int azx_free(struct azx *chip)
 
 	if (azx_has_pm_runtime(chip) && chip->running)
 		pm_runtime_get_noresume(&pci->dev);
+	chip->running = 0;
 
 	azx_del_card_list(chip);
 
@@ -2230,6 +2271,25 @@ static struct snd_pci_quirk power_save_blacklist[] = {
 };
 #endif /* CONFIG_PM */
 
+static void set_default_power_save(struct azx *chip)
+{
+	int val = power_save;
+
+#ifdef CONFIG_PM
+	if (pm_blacklist) {
+		const struct snd_pci_quirk *q;
+
+		q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist);
+		if (q && val) {
+			dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n",
+				 q->subvendor, q->subdevice);
+			val = 0;
+		}
+	}
+#endif /* CONFIG_PM */
+	snd_hda_set_power_save(&chip->bus, val * 1000);
+}
+
 /* number of codec slots for each chipset: 0 = default slots (i.e. 4) */
 static unsigned int azx_max_codecs[AZX_NUM_DRIVERS] = {
 	[AZX_DRIVER_NVIDIA] = 8,
@@ -2241,9 +2301,7 @@ static int azx_probe_continue(struct azx *chip)
 	struct hda_intel *hda = container_of(chip, struct hda_intel, chip);
 	struct hdac_bus *bus = azx_bus(chip);
 	struct pci_dev *pci = chip->pci;
-	struct hda_codec *codec;
 	int dev = chip->dev_index;
-	int val;
 	int err;
 
 	hda->probe_continued = 1;
@@ -2322,31 +2380,13 @@ static int azx_probe_continue(struct azx *chip)
 	if (err < 0)
 		goto out_free;
 
+	setup_vga_switcheroo_runtime_pm(chip);
+
 	chip->running = 1;
 	azx_add_card_list(chip);
 
-	val = power_save;
-#ifdef CONFIG_PM
-	if (pm_blacklist) {
-		const struct snd_pci_quirk *q;
-
-		q = snd_pci_quirk_lookup(chip->pci, power_save_blacklist);
-		if (q && val) {
-			dev_info(chip->card->dev, "device %04x:%04x is on the power_save blacklist, forcing power_save to 0\n",
-				 q->subvendor, q->subdevice);
-			val = 0;
-		}
-	}
-#endif /* CONFIG_PM */
-	/*
-	 * The discrete GPU cannot power down unless the HDA controller runtime
-	 * suspends, so activate runtime PM on codecs even if power_save == 0.
-	 */
-	if (use_vga_switcheroo(hda))
-		list_for_each_codec(codec, &chip->bus)
-			codec->auto_runtime_pm = 1;
+	set_default_power_save(chip);
 
-	snd_hda_set_power_save(&chip->bus, val * 1000);
 	if (azx_has_pm_runtime(chip))
 		pm_runtime_put_autosuspend(&pci->dev);
 
diff --git a/sound/pci/hda/hda_intel.h b/sound/pci/hda/hda_intel.h
index e3a3d318d2e5..f59719e06b91 100644
--- a/sound/pci/hda/hda_intel.h
+++ b/sound/pci/hda/hda_intel.h
@@ -37,6 +37,7 @@ struct hda_intel {
 
 	/* vga_switcheroo setup */
 	unsigned int use_vga_switcheroo:1;
+	unsigned int need_eld_notify_link:1;
 	unsigned int vga_switcheroo_registered:1;
 	unsigned int init_failed:1; /* delayed init failed */
 
-- 
cgit v1.2.3


From fe8dd45bb7556246c6b76277b1ba4296c91c2505 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Wed, 12 Sep 2018 11:17:06 +0800
Subject: tun: switch to new type of msg_control

This patch introduces to a new tun/tap specific msg_control:

#define TUN_MSG_UBUF 1
#define TUN_MSG_PTR  2
struct tun_msg_ctl {
       int type;
       void *ptr;
};

This allows us to pass different kinds of msg_control through
sendmsg(). The first supported type is ubuf (TUN_MSG_UBUF) which will
be used by the existed vhost_net zerocopy code. The second is XDP
buff, which allows vhost_net to pass XDP buff to TUN. This could be
used to implement accepting an array of XDP buffs from vhost_net in
the following patches.

Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tap.c      | 18 ++++++++++++------
 drivers/net/tun.c      |  6 +++++-
 drivers/vhost/net.c    |  7 +++++--
 include/linux/if_tun.h | 14 ++++++++++++++
 4 files changed, 36 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index f0f7cd977667..7996ed7cbf18 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -619,7 +619,7 @@ static inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad,
 #define TAP_RESERVE HH_DATA_OFF(ETH_HLEN)
 
 /* Get packet from user space buffer */
-static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m,
+static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
 			    struct iov_iter *from, int noblock)
 {
 	int good_linear = SKB_MAX_HEAD(TAP_RESERVE);
@@ -663,7 +663,7 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m,
 	if (unlikely(len < ETH_HLEN))
 		goto err;
 
-	if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
+	if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) {
 		struct iov_iter i;
 
 		copylen = vnet_hdr.hdr_len ?
@@ -724,11 +724,11 @@ static ssize_t tap_get_user(struct tap_queue *q, struct msghdr *m,
 	tap = rcu_dereference(q->tap);
 	/* copy skb_ubuf_info for callback when skb has no error */
 	if (zerocopy) {
-		skb_shinfo(skb)->destructor_arg = m->msg_control;
+		skb_shinfo(skb)->destructor_arg = msg_control;
 		skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
 		skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
-	} else if (m && m->msg_control) {
-		struct ubuf_info *uarg = m->msg_control;
+	} else if (msg_control) {
+		struct ubuf_info *uarg = msg_control;
 		uarg->callback(uarg, false);
 	}
 
@@ -1150,7 +1150,13 @@ static int tap_sendmsg(struct socket *sock, struct msghdr *m,
 		       size_t total_len)
 {
 	struct tap_queue *q = container_of(sock, struct tap_queue, sock);
-	return tap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT);
+	struct tun_msg_ctl *ctl = m->msg_control;
+
+	if (ctl && ctl->type != TUN_MSG_UBUF)
+		return -EINVAL;
+
+	return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter,
+			    m->msg_flags & MSG_DONTWAIT);
 }
 
 static int tap_recvmsg(struct socket *sock, struct msghdr *m,
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 3ae539374f6b..89779b58c7ca 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2431,11 +2431,15 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 	int ret;
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = tun_get(tfile);
+	struct tun_msg_ctl *ctl = m->msg_control;
 
 	if (!tun)
 		return -EBADFD;
 
-	ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
+	if (ctl && ctl->type != TUN_MSG_UBUF)
+		return -EINVAL;
+
+	ret = tun_get_user(tun, tfile, ctl ? ctl->ptr : NULL, &m->msg_iter,
 			   m->msg_flags & MSG_DONTWAIT,
 			   m->msg_flags & MSG_MORE);
 	tun_put(tun);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 4e656f89cb22..fb01ce6d981c 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -620,6 +620,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
 		.msg_controllen = 0,
 		.msg_flags = MSG_DONTWAIT,
 	};
+	struct tun_msg_ctl ctl;
 	size_t len, total_len = 0;
 	int err;
 	struct vhost_net_ubuf_ref *uninitialized_var(ubufs);
@@ -664,8 +665,10 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
 			ubuf->ctx = nvq->ubufs;
 			ubuf->desc = nvq->upend_idx;
 			refcount_set(&ubuf->refcnt, 1);
-			msg.msg_control = ubuf;
-			msg.msg_controllen = sizeof(ubuf);
+			msg.msg_control = &ctl;
+			ctl.type = TUN_MSG_UBUF;
+			ctl.ptr = ubuf;
+			msg.msg_controllen = sizeof(ctl);
 			ubufs = nvq->ubufs;
 			atomic_inc(&ubufs->refcount);
 			nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3d2996dc7d85..12e3eebf0ce6 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -16,9 +16,23 @@
 #define __IF_TUN_H
 
 #include <uapi/linux/if_tun.h>
+#include <uapi/linux/virtio_net.h>
 
 #define TUN_XDP_FLAG 0x1UL
 
+#define TUN_MSG_UBUF 1
+#define TUN_MSG_PTR  2
+struct tun_msg_ctl {
+	unsigned short type;
+	unsigned short num;
+	void *ptr;
+};
+
+struct tun_xdp_hdr {
+	int buflen;
+	struct virtio_net_hdr gso;
+};
+
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
-- 
cgit v1.2.3


From 7f9c136216c745099f36a4e0c3b2e63eedeb442f Mon Sep 17 00:00:00 2001
From: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Date: Wed, 12 Sep 2018 11:06:32 -0700
Subject: soc: qcom: Add broadcast base for Last Level Cache Controller (LLCC)

Currently, broadcast base is set to end of the LLCC banks, which may
not be correct always. As the number of banks may vary for each chipset
and the broadcast base could be at a different address as well. This info
depends on the chipset, so get the broadcast base info from the device
tree (DT). Add broadcast base in LLCC driver and use this for broadcast
writes.

Signed-off-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/llcc-slice.c      | 55 +++++++++++++++++++++++---------------
 include/linux/soc/qcom/llcc-qcom.h |  4 +--
 2 files changed, 35 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/llcc-slice.c b/drivers/soc/qcom/llcc-slice.c
index 54063a31132f..08e3d388e153 100644
--- a/drivers/soc/qcom/llcc-slice.c
+++ b/drivers/soc/qcom/llcc-slice.c
@@ -106,22 +106,24 @@ static int llcc_update_act_ctrl(u32 sid,
 	u32 slice_status;
 	int ret;
 
-	act_ctrl_reg = drv_data->bcast_off + LLCC_TRP_ACT_CTRLn(sid);
-	status_reg = drv_data->bcast_off + LLCC_TRP_STATUSn(sid);
+	act_ctrl_reg = LLCC_TRP_ACT_CTRLn(sid);
+	status_reg = LLCC_TRP_STATUSn(sid);
 
 	/* Set the ACTIVE trigger */
 	act_ctrl_reg_val |= ACT_CTRL_ACT_TRIG;
-	ret = regmap_write(drv_data->regmap, act_ctrl_reg, act_ctrl_reg_val);
+	ret = regmap_write(drv_data->bcast_regmap, act_ctrl_reg,
+				act_ctrl_reg_val);
 	if (ret)
 		return ret;
 
 	/* Clear the ACTIVE trigger */
 	act_ctrl_reg_val &= ~ACT_CTRL_ACT_TRIG;
-	ret = regmap_write(drv_data->regmap, act_ctrl_reg, act_ctrl_reg_val);
+	ret = regmap_write(drv_data->bcast_regmap, act_ctrl_reg,
+				act_ctrl_reg_val);
 	if (ret)
 		return ret;
 
-	ret = regmap_read_poll_timeout(drv_data->regmap, status_reg,
+	ret = regmap_read_poll_timeout(drv_data->bcast_regmap, status_reg,
 				      slice_status, !(slice_status & status),
 				      0, LLCC_STATUS_READ_DELAY);
 	return ret;
@@ -226,16 +228,13 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
 	int ret;
 	const struct llcc_slice_config *llcc_table;
 	struct llcc_slice_desc desc;
-	u32 bcast_off = drv_data->bcast_off;
 
 	sz = drv_data->cfg_size;
 	llcc_table = drv_data->cfg;
 
 	for (i = 0; i < sz; i++) {
-		attr1_cfg = bcast_off +
-				LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
-		attr0_cfg = bcast_off +
-				LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
+		attr1_cfg = LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
+		attr0_cfg = LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
 
 		attr1_val = llcc_table[i].cache_mode;
 		attr1_val |= llcc_table[i].probe_target_ways <<
@@ -260,10 +259,12 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
 		attr0_val = llcc_table[i].res_ways & ATTR0_RES_WAYS_MASK;
 		attr0_val |= llcc_table[i].bonus_ways << ATTR0_BONUS_WAYS_SHIFT;
 
-		ret = regmap_write(drv_data->regmap, attr1_cfg, attr1_val);
+		ret = regmap_write(drv_data->bcast_regmap, attr1_cfg,
+					attr1_val);
 		if (ret)
 			return ret;
-		ret = regmap_write(drv_data->regmap, attr0_cfg, attr0_val);
+		ret = regmap_write(drv_data->bcast_regmap, attr0_cfg,
+					attr0_val);
 		if (ret)
 			return ret;
 		if (llcc_table[i].activate_on_init) {
@@ -279,24 +280,36 @@ int qcom_llcc_probe(struct platform_device *pdev,
 {
 	u32 num_banks;
 	struct device *dev = &pdev->dev;
-	struct resource *res;
-	void __iomem *base;
+	struct resource *llcc_banks_res, *llcc_bcast_res;
+	void __iomem *llcc_banks_base, *llcc_bcast_base;
 	int ret, i;
 
 	drv_data = devm_kzalloc(dev, sizeof(*drv_data), GFP_KERNEL);
 	if (!drv_data)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(base))
-		return PTR_ERR(base);
+	llcc_banks_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+							"llcc_base");
+	llcc_banks_base = devm_ioremap_resource(&pdev->dev, llcc_banks_res);
+	if (IS_ERR(llcc_banks_base))
+		return PTR_ERR(llcc_banks_base);
 
-	drv_data->regmap = devm_regmap_init_mmio(dev, base,
-					&llcc_regmap_config);
+	drv_data->regmap = devm_regmap_init_mmio(dev, llcc_banks_base,
+						&llcc_regmap_config);
 	if (IS_ERR(drv_data->regmap))
 		return PTR_ERR(drv_data->regmap);
 
+	llcc_bcast_res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+							"llcc_broadcast_base");
+	llcc_bcast_base = devm_ioremap_resource(&pdev->dev, llcc_bcast_res);
+	if (IS_ERR(llcc_bcast_base))
+		return PTR_ERR(llcc_bcast_base);
+
+	drv_data->bcast_regmap = devm_regmap_init_mmio(dev, llcc_bcast_base,
+							&llcc_regmap_config);
+	if (IS_ERR(drv_data->bcast_regmap))
+		return PTR_ERR(drv_data->bcast_regmap);
+
 	ret = regmap_read(drv_data->regmap, LLCC_COMMON_STATUS0,
 						&num_banks);
 	if (ret)
@@ -318,8 +331,6 @@ int qcom_llcc_probe(struct platform_device *pdev,
 	for (i = 0; i < num_banks; i++)
 		drv_data->offsets[i] = i * BANK_OFFSET_STRIDE;
 
-	drv_data->bcast_off = num_banks * BANK_OFFSET_STRIDE;
-
 	drv_data->bitmap = devm_kcalloc(dev,
 	BITS_TO_LONGS(drv_data->max_slices), sizeof(unsigned long),
 						GFP_KERNEL);
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 7e3b9c605ab2..c681e795b587 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -70,22 +70,22 @@ struct llcc_slice_config {
 /**
  * llcc_drv_data - Data associated with the llcc driver
  * @regmap: regmap associated with the llcc device
+ * @bcast_regmap: regmap associated with llcc broadcast offset
  * @cfg: pointer to the data structure for slice configuration
  * @lock: mutex associated with each slice
  * @cfg_size: size of the config data table
  * @max_slices: max slices as read from device tree
- * @bcast_off: Offset of the broadcast bank
  * @num_banks: Number of llcc banks
  * @bitmap: Bit map to track the active slice ids
  * @offsets: Pointer to the bank offsets array
  */
 struct llcc_drv_data {
 	struct regmap *regmap;
+	struct regmap *bcast_regmap;
 	const struct llcc_slice_config *cfg;
 	struct mutex lock;
 	u32 cfg_size;
 	u32 max_slices;
-	u32 bcast_off;
 	u32 num_banks;
 	unsigned long *bitmap;
 	u32 *offsets;
-- 
cgit v1.2.3


From c081f3060fab316fcf103967a24e502d58488849 Mon Sep 17 00:00:00 2001
From: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Date: Wed, 12 Sep 2018 11:06:33 -0700
Subject: soc: qcom: Add support to register LLCC EDAC driver

Cache error reporting controller detects and reports single and
double bit errors on Last Level Cache Controller (LLCC) cache.
Add required support to register LLCC EDAC driver as platform driver,
from LLCC driver.

Signed-off-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Reviewed-by: Evan Green <evgreen@chromium.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 drivers/soc/qcom/llcc-slice.c      | 18 ++++++++++++++++--
 include/linux/soc/qcom/llcc-qcom.h |  2 ++
 2 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/soc/qcom/llcc-slice.c b/drivers/soc/qcom/llcc-slice.c
index 08e3d388e153..d78926742510 100644
--- a/drivers/soc/qcom/llcc-slice.c
+++ b/drivers/soc/qcom/llcc-slice.c
@@ -225,7 +225,7 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
 	u32 attr0_val;
 	u32 max_cap_cacheline;
 	u32 sz;
-	int ret;
+	int ret = 0;
 	const struct llcc_slice_config *llcc_table;
 	struct llcc_slice_desc desc;
 
@@ -283,6 +283,7 @@ int qcom_llcc_probe(struct platform_device *pdev,
 	struct resource *llcc_banks_res, *llcc_bcast_res;
 	void __iomem *llcc_banks_base, *llcc_bcast_base;
 	int ret, i;
+	struct platform_device *llcc_edac;
 
 	drv_data = devm_kzalloc(dev, sizeof(*drv_data), GFP_KERNEL);
 	if (!drv_data)
@@ -342,7 +343,20 @@ int qcom_llcc_probe(struct platform_device *pdev,
 	mutex_init(&drv_data->lock);
 	platform_set_drvdata(pdev, drv_data);
 
-	return qcom_llcc_cfg_program(pdev);
+	ret = qcom_llcc_cfg_program(pdev);
+	if (ret)
+		return ret;
+
+	drv_data->ecc_irq = platform_get_irq(pdev, 0);
+	if (drv_data->ecc_irq >= 0) {
+		llcc_edac = platform_device_register_data(&pdev->dev,
+						"qcom_llcc_edac", -1, drv_data,
+						sizeof(*drv_data));
+		if (IS_ERR(llcc_edac))
+			dev_err(dev, "Failed to register llcc edac driver\n");
+	}
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(qcom_llcc_probe);
 
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index c681e795b587..2e4b34d2617e 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -78,6 +78,7 @@ struct llcc_slice_config {
  * @num_banks: Number of llcc banks
  * @bitmap: Bit map to track the active slice ids
  * @offsets: Pointer to the bank offsets array
+ * @ecc_irq: interrupt for llcc cache error detection and reporting
  */
 struct llcc_drv_data {
 	struct regmap *regmap;
@@ -89,6 +90,7 @@ struct llcc_drv_data {
 	u32 num_banks;
 	unsigned long *bitmap;
 	u32 *offsets;
+	int ecc_irq;
 };
 
 #if IS_ENABLED(CONFIG_QCOM_LLCC)
-- 
cgit v1.2.3


From 27450653f1db0b9d5b5048a246c850c52ee4aa61 Mon Sep 17 00:00:00 2001
From: Channagoud Kadabi <ckadabi@codeaurora.org>
Date: Wed, 12 Sep 2018 11:06:34 -0700
Subject: drivers: edac: Add EDAC driver support for QCOM SoCs

Add error reporting driver for Single Bit Errors (SBEs) and Double Bit
Errors (DBEs). As of now, this driver supports error reporting for
Last Level Cache Controller (LLCC) of Tag RAM and Data RAM. Interrupts
are triggered when the errors happen in the cache, the driver handles
those interrupts and dumps the syndrome registers.

Signed-off-by: Channagoud Kadabi <ckadabi@codeaurora.org>
Signed-off-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Co-developed-by: Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 MAINTAINERS                        |   8 +
 drivers/edac/Kconfig               |  14 ++
 drivers/edac/Makefile              |   1 +
 drivers/edac/qcom_edac.c           | 414 +++++++++++++++++++++++++++++++++++++
 include/linux/soc/qcom/llcc-qcom.h |  24 +++
 5 files changed, 461 insertions(+)
 create mode 100644 drivers/edac/qcom_edac.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index a5b256b25905..f7d7213ca293 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5346,6 +5346,14 @@ L:	linux-edac@vger.kernel.org
 S:	Maintained
 F:	drivers/edac/ti_edac.c
 
+EDAC-QCOM
+M:	Channagoud Kadabi <ckadabi@codeaurora.org>
+M:	Venkata Narendra Kumar Gutta <vnkgutta@codeaurora.org>
+L:	linux-arm-msm@vger.kernel.org
+L:	linux-edac@vger.kernel.org
+S:	Maintained
+F:	drivers/edac/qcom_edac.c
+
 EDIROL UA-101/UA-1000 DRIVER
 M:	Clemens Ladisch <clemens@ladisch.de>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 57304b2e989f..df9467eef32a 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -460,4 +460,18 @@ config EDAC_TI
 	  Support for error detection and correction on the
           TI SoCs.
 
+config EDAC_QCOM
+	tristate "QCOM EDAC Controller"
+	depends on ARCH_QCOM && QCOM_LLCC
+	help
+	  Support for error detection and correction on the
+	  Qualcomm Technologies, Inc. SoCs.
+
+	  This driver reports Single Bit Errors (SBEs) and Double Bit Errors (DBEs).
+	  As of now, it supports error reporting for Last Level Cache Controller (LLCC)
+	  of Tag RAM and Data RAM.
+
+	  For debugging issues having to do with stability and overall system
+	  health, you should probably say 'Y' here.
+
 endif # EDAC
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 02b43a7d8c3e..716096d08ea0 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -77,3 +77,4 @@ obj-$(CONFIG_EDAC_ALTERA)		+= altera_edac.o
 obj-$(CONFIG_EDAC_SYNOPSYS)		+= synopsys_edac.o
 obj-$(CONFIG_EDAC_XGENE)		+= xgene_edac.o
 obj-$(CONFIG_EDAC_TI)			+= ti_edac.o
+obj-$(CONFIG_EDAC_QCOM)			+= qcom_edac.o
diff --git a/drivers/edac/qcom_edac.c b/drivers/edac/qcom_edac.c
new file mode 100644
index 000000000000..82bd775124f2
--- /dev/null
+++ b/drivers/edac/qcom_edac.c
@@ -0,0 +1,414 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/edac.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/soc/qcom/llcc-qcom.h>
+
+#include "edac_mc.h"
+#include "edac_device.h"
+
+#define EDAC_LLCC                       "qcom_llcc"
+
+#define LLCC_ERP_PANIC_ON_UE            1
+
+#define TRP_SYN_REG_CNT                 6
+#define DRP_SYN_REG_CNT                 8
+
+#define LLCC_COMMON_STATUS0             0x0003000c
+#define LLCC_LB_CNT_MASK                GENMASK(31, 28)
+#define LLCC_LB_CNT_SHIFT               28
+
+/* Single & double bit syndrome register offsets */
+#define TRP_ECC_SB_ERR_SYN0             0x0002304c
+#define TRP_ECC_DB_ERR_SYN0             0x00020370
+#define DRP_ECC_SB_ERR_SYN0             0x0004204c
+#define DRP_ECC_DB_ERR_SYN0             0x00042070
+
+/* Error register offsets */
+#define TRP_ECC_ERROR_STATUS1           0x00020348
+#define TRP_ECC_ERROR_STATUS0           0x00020344
+#define DRP_ECC_ERROR_STATUS1           0x00042048
+#define DRP_ECC_ERROR_STATUS0           0x00042044
+
+/* TRP, DRP interrupt register offsets */
+#define DRP_INTERRUPT_STATUS            0x00041000
+#define TRP_INTERRUPT_0_STATUS          0x00020480
+#define DRP_INTERRUPT_CLEAR             0x00041008
+#define DRP_ECC_ERROR_CNTR_CLEAR        0x00040004
+#define TRP_INTERRUPT_0_CLEAR           0x00020484
+#define TRP_ECC_ERROR_CNTR_CLEAR        0x00020440
+
+/* Mask and shift macros */
+#define ECC_DB_ERR_COUNT_MASK           GENMASK(4, 0)
+#define ECC_DB_ERR_WAYS_MASK            GENMASK(31, 16)
+#define ECC_DB_ERR_WAYS_SHIFT           BIT(4)
+
+#define ECC_SB_ERR_COUNT_MASK           GENMASK(23, 16)
+#define ECC_SB_ERR_COUNT_SHIFT          BIT(4)
+#define ECC_SB_ERR_WAYS_MASK            GENMASK(15, 0)
+
+#define SB_ECC_ERROR                    BIT(0)
+#define DB_ECC_ERROR                    BIT(1)
+
+#define DRP_TRP_INT_CLEAR               GENMASK(1, 0)
+#define DRP_TRP_CNT_CLEAR               GENMASK(1, 0)
+
+/* Config registers offsets*/
+#define DRP_ECC_ERROR_CFG               0x00040000
+
+/* Tag RAM, Data RAM interrupt register offsets */
+#define CMN_INTERRUPT_0_ENABLE          0x0003001c
+#define CMN_INTERRUPT_2_ENABLE          0x0003003c
+#define TRP_INTERRUPT_0_ENABLE          0x00020488
+#define DRP_INTERRUPT_ENABLE            0x0004100c
+
+#define SB_ERROR_THRESHOLD              0x1
+#define SB_ERROR_THRESHOLD_SHIFT        24
+#define SB_DB_TRP_INTERRUPT_ENABLE      0x3
+#define TRP0_INTERRUPT_ENABLE           0x1
+#define DRP0_INTERRUPT_ENABLE           BIT(6)
+#define SB_DB_DRP_INTERRUPT_ENABLE      0x3
+
+enum {
+	LLCC_DRAM_CE = 0,
+	LLCC_DRAM_UE,
+	LLCC_TRAM_CE,
+	LLCC_TRAM_UE,
+};
+
+static const struct llcc_edac_reg_data edac_reg_data[] = {
+	[LLCC_DRAM_CE] = {
+		.name = "DRAM Single-bit",
+		.synd_reg = DRP_ECC_SB_ERR_SYN0,
+		.count_status_reg = DRP_ECC_ERROR_STATUS1,
+		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
+		.reg_cnt = DRP_SYN_REG_CNT,
+		.count_mask = ECC_SB_ERR_COUNT_MASK,
+		.ways_mask = ECC_SB_ERR_WAYS_MASK,
+		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
+	},
+	[LLCC_DRAM_UE] = {
+		.name = "DRAM Double-bit",
+		.synd_reg = DRP_ECC_DB_ERR_SYN0,
+		.count_status_reg = DRP_ECC_ERROR_STATUS1,
+		.ways_status_reg = DRP_ECC_ERROR_STATUS0,
+		.reg_cnt = DRP_SYN_REG_CNT,
+		.count_mask = ECC_DB_ERR_COUNT_MASK,
+		.ways_mask = ECC_DB_ERR_WAYS_MASK,
+		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
+	},
+	[LLCC_TRAM_CE] = {
+		.name = "TRAM Single-bit",
+		.synd_reg = TRP_ECC_SB_ERR_SYN0,
+		.count_status_reg = TRP_ECC_ERROR_STATUS1,
+		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
+		.reg_cnt = TRP_SYN_REG_CNT,
+		.count_mask = ECC_SB_ERR_COUNT_MASK,
+		.ways_mask = ECC_SB_ERR_WAYS_MASK,
+		.count_shift = ECC_SB_ERR_COUNT_SHIFT,
+	},
+	[LLCC_TRAM_UE] = {
+		.name = "TRAM Double-bit",
+		.synd_reg = TRP_ECC_DB_ERR_SYN0,
+		.count_status_reg = TRP_ECC_ERROR_STATUS1,
+		.ways_status_reg = TRP_ECC_ERROR_STATUS0,
+		.reg_cnt = TRP_SYN_REG_CNT,
+		.count_mask = ECC_DB_ERR_COUNT_MASK,
+		.ways_mask = ECC_DB_ERR_WAYS_MASK,
+		.ways_shift = ECC_DB_ERR_WAYS_SHIFT,
+	},
+};
+
+static int qcom_llcc_core_setup(struct regmap *llcc_bcast_regmap)
+{
+	u32 sb_err_threshold;
+	int ret;
+
+	/*
+	 * Configure interrupt enable registers such that Tag, Data RAM related
+	 * interrupts are propagated to interrupt controller for servicing
+	 */
+	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+				 TRP0_INTERRUPT_ENABLE,
+				 TRP0_INTERRUPT_ENABLE);
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(llcc_bcast_regmap, TRP_INTERRUPT_0_ENABLE,
+				 SB_DB_TRP_INTERRUPT_ENABLE,
+				 SB_DB_TRP_INTERRUPT_ENABLE);
+	if (ret)
+		return ret;
+
+	sb_err_threshold = (SB_ERROR_THRESHOLD << SB_ERROR_THRESHOLD_SHIFT);
+	ret = regmap_write(llcc_bcast_regmap, DRP_ECC_ERROR_CFG,
+			   sb_err_threshold);
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(llcc_bcast_regmap, CMN_INTERRUPT_2_ENABLE,
+				 DRP0_INTERRUPT_ENABLE,
+				 DRP0_INTERRUPT_ENABLE);
+	if (ret)
+		return ret;
+
+	ret = regmap_write(llcc_bcast_regmap, DRP_INTERRUPT_ENABLE,
+			   SB_DB_DRP_INTERRUPT_ENABLE);
+	return ret;
+}
+
+/* Clear the error interrupt and counter registers */
+static int
+qcom_llcc_clear_error_status(int err_type, struct llcc_drv_data *drv)
+{
+	int ret = 0;
+
+	switch (err_type) {
+	case LLCC_DRAM_CE:
+	case LLCC_DRAM_UE:
+		ret = regmap_write(drv->bcast_regmap, DRP_INTERRUPT_CLEAR,
+				   DRP_TRP_INT_CLEAR);
+		if (ret)
+			return ret;
+
+		ret = regmap_write(drv->bcast_regmap, DRP_ECC_ERROR_CNTR_CLEAR,
+				   DRP_TRP_CNT_CLEAR);
+		if (ret)
+			return ret;
+		break;
+	case LLCC_TRAM_CE:
+	case LLCC_TRAM_UE:
+		ret = regmap_write(drv->bcast_regmap, TRP_INTERRUPT_0_CLEAR,
+				   DRP_TRP_INT_CLEAR);
+		if (ret)
+			return ret;
+
+		ret = regmap_write(drv->bcast_regmap, TRP_ECC_ERROR_CNTR_CLEAR,
+				   DRP_TRP_CNT_CLEAR);
+		if (ret)
+			return ret;
+		break;
+	default:
+		ret = -EINVAL;
+		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
+			    err_type);
+	}
+	return ret;
+}
+
+/* Dump Syndrome registers data for Tag RAM, Data RAM bit errors*/
+static int
+dump_syn_reg_values(struct llcc_drv_data *drv, u32 bank, int err_type)
+{
+	struct llcc_edac_reg_data reg_data = edac_reg_data[err_type];
+	int err_cnt, err_ways, ret, i;
+	u32 synd_reg, synd_val;
+
+	for (i = 0; i < reg_data.reg_cnt; i++) {
+		synd_reg = reg_data.synd_reg + (i * 4);
+		ret = regmap_read(drv->regmap, drv->offsets[bank] + synd_reg,
+				  &synd_val);
+		if (ret)
+			goto clear;
+
+		edac_printk(KERN_CRIT, EDAC_LLCC, "%s: ECC_SYN%d: 0x%8x\n",
+			    reg_data.name, i, synd_val);
+	}
+
+	ret = regmap_read(drv->regmap,
+			  drv->offsets[bank] + reg_data.count_status_reg,
+			  &err_cnt);
+	if (ret)
+		goto clear;
+
+	err_cnt &= reg_data.count_mask;
+	err_cnt >>= reg_data.count_shift;
+	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error count: 0x%4x\n",
+		    reg_data.name, err_cnt);
+
+	ret = regmap_read(drv->regmap,
+			  drv->offsets[bank] + reg_data.ways_status_reg,
+			  &err_ways);
+	if (ret)
+		goto clear;
+
+	err_ways &= reg_data.ways_mask;
+	err_ways >>= reg_data.ways_shift;
+
+	edac_printk(KERN_CRIT, EDAC_LLCC, "%s: Error ways: 0x%4x\n",
+		    reg_data.name, err_ways);
+
+clear:
+	return qcom_llcc_clear_error_status(err_type, drv);
+}
+
+static int
+dump_syn_reg(struct edac_device_ctl_info *edev_ctl, int err_type, u32 bank)
+{
+	struct llcc_drv_data *drv = edev_ctl->pvt_info;
+	int ret;
+
+	ret = dump_syn_reg_values(drv, bank, err_type);
+	if (ret)
+		return ret;
+
+	switch (err_type) {
+	case LLCC_DRAM_CE:
+		edac_device_handle_ce(edev_ctl, 0, bank,
+				      "LLCC Data RAM correctable Error");
+		break;
+	case LLCC_DRAM_UE:
+		edac_device_handle_ue(edev_ctl, 0, bank,
+				      "LLCC Data RAM uncorrectable Error");
+		break;
+	case LLCC_TRAM_CE:
+		edac_device_handle_ce(edev_ctl, 0, bank,
+				      "LLCC Tag RAM correctable Error");
+		break;
+	case LLCC_TRAM_UE:
+		edac_device_handle_ue(edev_ctl, 0, bank,
+				      "LLCC Tag RAM uncorrectable Error");
+		break;
+	default:
+		ret = -EINVAL;
+		edac_printk(KERN_CRIT, EDAC_LLCC, "Unexpected error type: %d\n",
+			    err_type);
+	}
+
+	return ret;
+}
+
+static irqreturn_t
+llcc_ecc_irq_handler(int irq, void *edev_ctl)
+{
+	struct edac_device_ctl_info *edac_dev_ctl = edev_ctl;
+	struct llcc_drv_data *drv = edac_dev_ctl->pvt_info;
+	irqreturn_t irq_rc = IRQ_NONE;
+	u32 drp_error, trp_error, i;
+	bool irq_handled;
+	int ret;
+
+	/* Iterate over the banks and look for Tag RAM or Data RAM errors */
+	for (i = 0; i < drv->num_banks; i++) {
+		ret = regmap_read(drv->regmap,
+				  drv->offsets[i] + DRP_INTERRUPT_STATUS,
+				  &drp_error);
+
+		if (!ret && (drp_error & SB_ECC_ERROR)) {
+			edac_printk(KERN_CRIT, EDAC_LLCC,
+				    "Single Bit Error detected in Data RAM\n");
+			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_CE, i);
+		} else if (!ret && (drp_error & DB_ECC_ERROR)) {
+			edac_printk(KERN_CRIT, EDAC_LLCC,
+				    "Double Bit Error detected in Data RAM\n");
+			ret = dump_syn_reg(edev_ctl, LLCC_DRAM_UE, i);
+		}
+		if (!ret)
+			irq_handled = true;
+
+		ret = regmap_read(drv->regmap,
+				  drv->offsets[i] + TRP_INTERRUPT_0_STATUS,
+				  &trp_error);
+
+		if (!ret && (trp_error & SB_ECC_ERROR)) {
+			edac_printk(KERN_CRIT, EDAC_LLCC,
+				    "Single Bit Error detected in Tag RAM\n");
+			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_CE, i);
+		} else if (!ret && (trp_error & DB_ECC_ERROR)) {
+			edac_printk(KERN_CRIT, EDAC_LLCC,
+				    "Double Bit Error detected in Tag RAM\n");
+			ret = dump_syn_reg(edev_ctl, LLCC_TRAM_UE, i);
+		}
+		if (!ret)
+			irq_handled = true;
+	}
+
+	if (irq_handled)
+		irq_rc = IRQ_HANDLED;
+
+	return irq_rc;
+}
+
+static int qcom_llcc_edac_probe(struct platform_device *pdev)
+{
+	struct llcc_drv_data *llcc_driv_data = pdev->dev.platform_data;
+	struct edac_device_ctl_info *edev_ctl;
+	struct device *dev = &pdev->dev;
+	int ecc_irq;
+	int rc;
+
+	rc = qcom_llcc_core_setup(llcc_driv_data->bcast_regmap);
+	if (rc)
+		return rc;
+
+	/* Allocate edac control info */
+	edev_ctl = edac_device_alloc_ctl_info(0, "qcom-llcc", 1, "bank",
+					      llcc_driv_data->num_banks, 1,
+					      NULL, 0,
+					      edac_device_alloc_index());
+
+	if (!edev_ctl)
+		return -ENOMEM;
+
+	edev_ctl->dev = dev;
+	edev_ctl->mod_name = dev_name(dev);
+	edev_ctl->dev_name = dev_name(dev);
+	edev_ctl->ctl_name = "llcc";
+	edev_ctl->panic_on_ue = LLCC_ERP_PANIC_ON_UE;
+	edev_ctl->pvt_info = llcc_driv_data;
+
+	rc = edac_device_add_device(edev_ctl);
+	if (rc)
+		goto out_mem;
+
+	platform_set_drvdata(pdev, edev_ctl);
+
+	/* Request for ecc irq */
+	ecc_irq = llcc_driv_data->ecc_irq;
+	if (ecc_irq < 0) {
+		rc = -ENODEV;
+		goto out_dev;
+	}
+	rc = devm_request_irq(dev, ecc_irq, llcc_ecc_irq_handler,
+			      IRQF_TRIGGER_HIGH, "llcc_ecc", edev_ctl);
+	if (rc)
+		goto out_dev;
+
+	return rc;
+
+out_dev:
+	edac_device_del_device(edev_ctl->dev);
+out_mem:
+	edac_device_free_ctl_info(edev_ctl);
+
+	return rc;
+}
+
+static int qcom_llcc_edac_remove(struct platform_device *pdev)
+{
+	struct edac_device_ctl_info *edev_ctl = dev_get_drvdata(&pdev->dev);
+
+	edac_device_del_device(edev_ctl->dev);
+	edac_device_free_ctl_info(edev_ctl);
+
+	return 0;
+}
+
+static struct platform_driver qcom_llcc_edac_driver = {
+	.probe = qcom_llcc_edac_probe,
+	.remove = qcom_llcc_edac_remove,
+	.driver = {
+		.name = "qcom_llcc_edac",
+	},
+};
+module_platform_driver(qcom_llcc_edac_driver);
+
+MODULE_DESCRIPTION("QCOM EDAC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h
index 2e4b34d2617e..69c285b1c990 100644
--- a/include/linux/soc/qcom/llcc-qcom.h
+++ b/include/linux/soc/qcom/llcc-qcom.h
@@ -93,6 +93,30 @@ struct llcc_drv_data {
 	int ecc_irq;
 };
 
+/**
+ * llcc_edac_reg_data - llcc edac registers data for each error type
+ * @name: Name of the error
+ * @synd_reg: Syndrome register address
+ * @count_status_reg: Status register address to read the error count
+ * @ways_status_reg: Status register address to read the error ways
+ * @reg_cnt: Number of registers
+ * @count_mask: Mask value to get the error count
+ * @ways_mask: Mask value to get the error ways
+ * @count_shift: Shift value to get the error count
+ * @ways_shift: Shift value to get the error ways
+ */
+struct llcc_edac_reg_data {
+	char *name;
+	u64 synd_reg;
+	u64 count_status_reg;
+	u64 ways_status_reg;
+	u32 reg_cnt;
+	u32 count_mask;
+	u32 ways_mask;
+	u8  count_shift;
+	u8  ways_shift;
+};
+
 #if IS_ENABLED(CONFIG_QCOM_LLCC)
 /**
  * llcc_slice_getd - get llcc slice descriptor
-- 
cgit v1.2.3


From f4926ef76e23e291fcd38bd107c0a9bb8e2db505 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 18 May 2018 15:47:50 -0700
Subject: soc: qcom: geni: Make version macros simpler

This macro doesn't work, because it hides a local variable inside of the
macro to hold the version and that variable name is called 'ver' and
'version' sometimes.

Let's change this to be more explicit. Introduce three macros for the
major, minor, and step of the version, and require callers to pass the
version in to get the part of the version out. This way we don't hide
local variables inside macros and things are less evil overall.

Cc: Karthikeyan Ramasubramanian <kramasub@codeaurora.org>
Cc: Sagar Dharia <sdharia@codeaurora.org>
Cc: Girish Mahadevan <girishm@codeaurora.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 include/linux/qcom-geni-se.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h
index 5d6144977828..3bcd67fd5548 100644
--- a/include/linux/qcom-geni-se.h
+++ b/include/linux/qcom-geni-se.h
@@ -225,19 +225,14 @@ struct geni_se {
 #define HW_VER_MINOR_SHFT		16
 #define HW_VER_STEP_MASK		GENMASK(15, 0)
 
+#define GENI_SE_VERSION_MAJOR(ver) ((ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT)
+#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT)
+#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK)
+
 #if IS_ENABLED(CONFIG_QCOM_GENI_SE)
 
 u32 geni_se_get_qup_hw_version(struct geni_se *se);
 
-#define geni_se_get_wrapper_version(se, major, minor, step) do { \
-	u32 ver; \
-\
-	ver = geni_se_get_qup_hw_version(se); \
-	major = (ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT; \
-	minor = (ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT; \
-	step = version & HW_VER_STEP_MASK; \
-} while (0)
-
 /**
  * geni_se_read_proto() - Read the protocol configured for a serial engine
  * @se:		Pointer to the concerned serial engine.
-- 
cgit v1.2.3


From 7a9cdebdcc17e426fb5287e4a82db1dfe86339b2 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 12 Sep 2018 23:57:48 -1000
Subject: mm: get rid of vmacache_flush_all() entirely

Jann Horn points out that the vmacache_flush_all() function is not only
potentially expensive, it's buggy too.  It also happens to be entirely
unnecessary, because the sequence number overflow case can be avoided by
simply making the sequence number be 64-bit.  That doesn't even grow the
data structures in question, because the other adjacent fields are
already 64-bit.

So simplify the whole thing by just making the sequence number overflow
case go away entirely, which gets rid of all the complications and makes
the code faster too.  Win-win.

[ Oleg Nesterov points out that the VMACACHE_FULL_FLUSHES statistics
  also just goes away entirely with this ]

Reported-by: Jann Horn <jannh@google.com>
Suggested-by: Will Deacon <will.deacon@arm.com>
Acked-by: Davidlohr Bueso <dave@stgolabs.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h      |  2 +-
 include/linux/mm_types_task.h |  2 +-
 include/linux/vm_event_item.h |  1 -
 include/linux/vmacache.h      |  5 -----
 mm/debug.c                    |  4 ++--
 mm/vmacache.c                 | 38 --------------------------------------
 6 files changed, 4 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index cd2bc939efd0..5ed8f6292a53 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -341,7 +341,7 @@ struct mm_struct {
 	struct {
 		struct vm_area_struct *mmap;		/* list of VMAs */
 		struct rb_root mm_rb;
-		u32 vmacache_seqnum;                   /* per-thread vmacache */
+		u64 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
 		unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 5fe87687664c..d7016dcb245e 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -32,7 +32,7 @@
 #define VMACACHE_MASK (VMACACHE_SIZE - 1)
 
 struct vmacache {
-	u32 seqnum;
+	u64 seqnum;
 	struct vm_area_struct *vmas[VMACACHE_SIZE];
 };
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 5c7f010676a7..47a3441cf4c4 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -105,7 +105,6 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_DEBUG_VM_VMACACHE
 		VMACACHE_FIND_CALLS,
 		VMACACHE_FIND_HITS,
-		VMACACHE_FULL_FLUSHES,
 #endif
 #ifdef CONFIG_SWAP
 		SWAP_RA,
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
index 3e9a963edd6a..6fce268a4588 100644
--- a/include/linux/vmacache.h
+++ b/include/linux/vmacache.h
@@ -10,7 +10,6 @@ static inline void vmacache_flush(struct task_struct *tsk)
 	memset(tsk->vmacache.vmas, 0, sizeof(tsk->vmacache.vmas));
 }
 
-extern void vmacache_flush_all(struct mm_struct *mm);
 extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
 extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
 						    unsigned long addr);
@@ -24,10 +23,6 @@ extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
 static inline void vmacache_invalidate(struct mm_struct *mm)
 {
 	mm->vmacache_seqnum++;
-
-	/* deal with overflows */
-	if (unlikely(mm->vmacache_seqnum == 0))
-		vmacache_flush_all(mm);
 }
 
 #endif /* __LINUX_VMACACHE_H */
diff --git a/mm/debug.c b/mm/debug.c
index 38c926520c97..bd10aad8539a 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(dump_vma);
 
 void dump_mm(const struct mm_struct *mm)
 {
-	pr_emerg("mm %px mmap %px seqnum %d task_size %lu\n"
+	pr_emerg("mm %px mmap %px seqnum %llu task_size %lu\n"
 #ifdef CONFIG_MMU
 		"get_unmapped_area %px\n"
 #endif
@@ -142,7 +142,7 @@ void dump_mm(const struct mm_struct *mm)
 		"tlb_flush_pending %d\n"
 		"def_flags: %#lx(%pGv)\n",
 
-		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
+		mm, mm->mmap, (long long) mm->vmacache_seqnum, mm->task_size,
 #ifdef CONFIG_MMU
 		mm->get_unmapped_area,
 #endif
diff --git a/mm/vmacache.c b/mm/vmacache.c
index ea517bef7dc5..cdc32a3b02fa 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -19,44 +19,6 @@
 #endif
 #define VMACACHE_HASH(addr) ((addr >> VMACACHE_SHIFT) & VMACACHE_MASK)
 
-/*
- * Flush vma caches for threads that share a given mm.
- *
- * The operation is safe because the caller holds the mmap_sem
- * exclusively and other threads accessing the vma cache will
- * have mmap_sem held at least for read, so no extra locking
- * is required to maintain the vma cache.
- */
-void vmacache_flush_all(struct mm_struct *mm)
-{
-	struct task_struct *g, *p;
-
-	count_vm_vmacache_event(VMACACHE_FULL_FLUSHES);
-
-	/*
-	 * Single threaded tasks need not iterate the entire
-	 * list of process. We can avoid the flushing as well
-	 * since the mm's seqnum was increased and don't have
-	 * to worry about other threads' seqnum. Current's
-	 * flush will occur upon the next lookup.
-	 */
-	if (atomic_read(&mm->mm_users) == 1)
-		return;
-
-	rcu_read_lock();
-	for_each_process_thread(g, p) {
-		/*
-		 * Only flush the vmacache pointers as the
-		 * mm seqnum is already set and curr's will
-		 * be set upon invalidation when the next
-		 * lookup is done.
-		 */
-		if (mm == p->mm)
-			vmacache_flush(p);
-	}
-	rcu_read_unlock();
-}
-
 /*
  * This task may be accessing a foreign mm via (for example)
  * get_user_pages()->find_vma().  The vmacache is task-local and this
-- 
cgit v1.2.3


From 59104f239b9ed6cf3986e4228173ff2f4c95039e Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Thu, 26 Jul 2018 02:37:49 +0000
Subject: drm: shmobile: convert to SPDX identifiers

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
---
 drivers/gpu/drm/shmobile/Kconfig               | 1 +
 drivers/gpu/drm/shmobile/shmob_drm_backlight.c | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_backlight.h | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_crtc.c      | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_crtc.h      | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_drv.c       | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_drv.h       | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_kms.c       | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_kms.h       | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_plane.c     | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_plane.h     | 6 +-----
 drivers/gpu/drm/shmobile/shmob_drm_regs.h      | 6 +-----
 include/linux/platform_data/shmob_drm.h        | 6 +-----
 13 files changed, 13 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/shmobile/Kconfig b/drivers/gpu/drm/shmobile/Kconfig
index 0426d66660d1..61bbe8e8bcc5 100644
--- a/drivers/gpu/drm/shmobile/Kconfig
+++ b/drivers/gpu/drm/shmobile/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 config DRM_SHMOBILE
 	tristate "DRM Support for SH Mobile"
 	depends on DRM && ARM
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_backlight.c b/drivers/gpu/drm/shmobile/shmob_drm_backlight.c
index 33dd41afea0e..f6628a5ee95f 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_backlight.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_backlight.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_backlight.c  --  SH Mobile DRM Backlight
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/backlight.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_backlight.h b/drivers/gpu/drm/shmobile/shmob_drm_backlight.h
index bac719ecc301..d9abb7a60be5 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_backlight.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_backlight.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_backlight.h  --  SH Mobile DRM Backlight
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_BACKLIGHT_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
index fc66167b0641..499b5fdb869f 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_crtc.c  --  SH Mobile DRM CRTCs
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/backlight.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_crtc.h b/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
index c11f421737dc..9ca6920641d8 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_crtc.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_crtc.h  --  SH Mobile DRM CRTCs
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_CRTC_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index 592572554eb0..6ececad6f845 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_drv.c  --  SH Mobile DRM driver
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.h b/drivers/gpu/drm/shmobile/shmob_drm_drv.h
index 088a6e55fa29..80dc4b1020aa 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm.h  --  SH Mobile DRM driver
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_DRV_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_kms.c b/drivers/gpu/drm/shmobile/shmob_drm_kms.c
index 447638581c08..a17268444c6d 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_kms.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_kms.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_kms.c  --  SH Mobile DRM Mode Setting
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <drm/drmP.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_kms.h b/drivers/gpu/drm/shmobile/shmob_drm_kms.h
index 753e2817dc2c..6ec2b732bb94 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_kms.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_kms.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_kms.h  --  SH Mobile DRM Mode Setting
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_KMS_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_plane.c b/drivers/gpu/drm/shmobile/shmob_drm_plane.c
index 1d0359f713ca..1d1ee5e51351 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_plane.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_plane.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * shmob_drm_plane.c  --  SH Mobile DRM Planes
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #include <drm/drmP.h>
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_plane.h b/drivers/gpu/drm/shmobile/shmob_drm_plane.h
index a58cc1fc3240..bae67cc8c628 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_plane.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_plane.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_plane.h  --  SH Mobile DRM Planes
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_PLANE_H__
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_regs.h b/drivers/gpu/drm/shmobile/shmob_drm_regs.h
index ea17d4415b9e..9eb0b3d01df8 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_regs.h
+++ b/drivers/gpu/drm/shmobile/shmob_drm_regs.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm_regs.h  --  SH Mobile DRM registers
  *
  * Copyright (C) 2012 Renesas Electronics Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_REGS_H__
diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h
index ee495d707f17..fe815d7d9f58 100644
--- a/include/linux/platform_data/shmob_drm.h
+++ b/include/linux/platform_data/shmob_drm.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * shmob_drm.h  --  SH Mobile DRM driver
  *
  * Copyright (C) 2012 Renesas Corporation
  *
  * Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __SHMOB_DRM_H__
-- 
cgit v1.2.3


From e21120383f2dce32312f63ffca145ff8a87d41f5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 11 Sep 2018 19:47:09 -0400
Subject: move compat handling of tty ioctls to tty_compat_ioctl()

ioctls that are
	* callable only via tty_ioctl()
	* not driver-specific
	* not demand data structure conversions
	* either always need passing arg as is or always demand compat_ptr()
get intercepted in tty_compat_ioctl() from the very beginning and
redirecter to tty_ioctl().  As the result, their entries in fs/compat_ioctl.c
(some of those had been missing, BTW) got removed, as well as
n_tty_compat_ioctl_helper() (now it's never called with any cmd it would accept).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/tty/tty_io.c    | 78 +++++++++++++++++++++++++++++++++++++++++++++++--
 drivers/tty/tty_ioctl.c | 16 ----------
 fs/compat_ioctl.c       | 51 --------------------------------
 include/linux/tty.h     |  2 --
 4 files changed, 76 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 32bc3e3fe4d3..7bfc8afc130a 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -97,6 +97,7 @@
 #include <linux/seq_file.h>
 #include <linux/serial.h>
 #include <linux/ratelimit.h>
+#include <linux/compat.h>
 
 #include <linux/uaccess.h>
 
@@ -2668,6 +2669,81 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
 	struct tty_ldisc *ld;
 	int retval = -ENOIOCTLCMD;
 
+	switch (cmd) {
+	case TIOCSTI:
+	case TIOCGWINSZ:
+	case TIOCSWINSZ:
+	case TIOCGEXCL:
+	case TIOCGETD:
+	case TIOCSETD:
+	case TIOCGDEV:
+	case TIOCMGET:
+	case TIOCMSET:
+	case TIOCMBIC:
+	case TIOCMBIS:
+	case TIOCGICOUNT:
+	case TIOCGPGRP:
+	case TIOCSPGRP:
+	case TIOCGSID:
+	case TIOCSERGETLSR:
+	case TIOCGRS485:
+	case TIOCSRS485:
+#ifdef TIOCGETP
+	case TIOCGETP:
+	case TIOCSETP:
+	case TIOCSETN:
+#endif
+#ifdef TIOCGETC
+	case TIOCGETC:
+	case TIOCSETC:
+#endif
+#ifdef TIOCGLTC
+	case TIOCGLTC:
+	case TIOCSLTC:
+#endif
+	case TCSETSF:
+	case TCSETSW:
+	case TCSETS:
+	case TCGETS:
+#ifdef TCGETS2
+	case TCGETS2:
+	case TCSETSF2:
+	case TCSETSW2:
+	case TCSETS2:
+#endif
+	case TCGETA:
+	case TCSETAF:
+	case TCSETAW:
+	case TCSETA:
+	case TIOCGLCKTRMIOS:
+	case TIOCSLCKTRMIOS:
+#ifdef TCGETX
+	case TCGETX:
+	case TCSETX:
+	case TCSETXW:
+	case TCSETXF:
+#endif
+	case TIOCGSOFTCAR:
+	case TIOCSSOFTCAR:
+		return tty_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+	case TIOCCONS:
+	case TIOCEXCL:
+	case TIOCNXCL:
+	case TIOCVHANGUP:
+	case TIOCSBRK:
+	case TIOCCBRK:
+	case TCSBRK:
+	case TCSBRKP:
+	case TCFLSH:
+	case TIOCGPTPEER:
+	case TIOCNOTTY:
+	case TIOCSCTTY:
+	case TCXONC:
+	case TIOCMIWAIT:
+	case TIOCSERCONFIG:
+		return tty_ioctl(file, cmd, arg);
+	}
+
 	if (tty_paranoia_check(tty, file_inode(file), "tty_ioctl"))
 		return -EINVAL;
 
@@ -2682,8 +2758,6 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
 		return hung_up_tty_compat_ioctl(file, cmd, arg);
 	if (ld->ops->compat_ioctl)
 		retval = ld->ops->compat_ioctl(tty, file, cmd, arg);
-	else
-		retval = n_tty_compat_ioctl_helper(tty, file, cmd, arg);
 	tty_ldisc_deref(ld);
 
 	return retval;
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index d99fec44036c..9245fffdbceb 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -941,19 +941,3 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 	}
 }
 EXPORT_SYMBOL(n_tty_ioctl_helper);
-
-#ifdef CONFIG_COMPAT
-long n_tty_compat_ioctl_helper(struct tty_struct *tty, struct file *file,
-					unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case TIOCGLCKTRMIOS:
-	case TIOCSLCKTRMIOS:
-		return tty_mode_ioctl(tty, file, cmd, (unsigned long) compat_ptr(arg));
-	default:
-		return -ENOIOCTLCMD;
-	}
-}
-EXPORT_SYMBOL(n_tty_compat_ioctl_helper);
-#endif
-
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 53bc3659dcef..670b8cbd0896 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -711,52 +711,9 @@ COMPATIBLE_IOCTL(0x4B50)   /* KDGHWCLK - not in the kernel, but don't complain *
 COMPATIBLE_IOCTL(0x4B51)   /* KDSHWCLK - not in the kernel, but don't complain */
 
 /* Big T */
-COMPATIBLE_IOCTL(TCGETA)
-COMPATIBLE_IOCTL(TCSETA)
-COMPATIBLE_IOCTL(TCSETAW)
-COMPATIBLE_IOCTL(TCSETAF)
-COMPATIBLE_IOCTL(TCSBRK)
-COMPATIBLE_IOCTL(TCXONC)
-COMPATIBLE_IOCTL(TCFLSH)
-COMPATIBLE_IOCTL(TCGETS)
-COMPATIBLE_IOCTL(TCSETS)
-COMPATIBLE_IOCTL(TCSETSW)
-COMPATIBLE_IOCTL(TCSETSF)
 COMPATIBLE_IOCTL(TIOCLINUX)
-COMPATIBLE_IOCTL(TIOCSBRK)
-COMPATIBLE_IOCTL(TIOCGDEV)
-COMPATIBLE_IOCTL(TIOCCBRK)
-COMPATIBLE_IOCTL(TIOCGSID)
-COMPATIBLE_IOCTL(TIOCGICOUNT)
-COMPATIBLE_IOCTL(TIOCGEXCL)
 /* Little t */
-COMPATIBLE_IOCTL(TIOCGETD)
-COMPATIBLE_IOCTL(TIOCSETD)
-COMPATIBLE_IOCTL(TIOCEXCL)
-COMPATIBLE_IOCTL(TIOCNXCL)
-COMPATIBLE_IOCTL(TIOCCONS)
-COMPATIBLE_IOCTL(TIOCGSOFTCAR)
-COMPATIBLE_IOCTL(TIOCSSOFTCAR)
-COMPATIBLE_IOCTL(TIOCSWINSZ)
-COMPATIBLE_IOCTL(TIOCGWINSZ)
-COMPATIBLE_IOCTL(TIOCMGET)
-COMPATIBLE_IOCTL(TIOCMBIC)
-COMPATIBLE_IOCTL(TIOCMBIS)
-COMPATIBLE_IOCTL(TIOCMSET)
-COMPATIBLE_IOCTL(TIOCNOTTY)
-COMPATIBLE_IOCTL(TIOCSTI)
 COMPATIBLE_IOCTL(TIOCOUTQ)
-COMPATIBLE_IOCTL(TIOCSPGRP)
-COMPATIBLE_IOCTL(TIOCGPGRP)
-COMPATIBLE_IOCTL(TIOCSERGETLSR)
-COMPATIBLE_IOCTL(TIOCSRS485)
-COMPATIBLE_IOCTL(TIOCGRS485)
-#ifdef TCGETS2
-COMPATIBLE_IOCTL(TCGETS2)
-COMPATIBLE_IOCTL(TCSETS2)
-COMPATIBLE_IOCTL(TCSETSW2)
-COMPATIBLE_IOCTL(TCSETSF2)
-#endif
 /* Little f */
 COMPATIBLE_IOCTL(FIOCLEX)
 COMPATIBLE_IOCTL(FIONCLEX)
@@ -1219,10 +1176,6 @@ COMPATIBLE_IOCTL(JSIOCGAXES)
 COMPATIBLE_IOCTL(JSIOCGBUTTONS)
 COMPATIBLE_IOCTL(JSIOCGNAME(0))
 
-#ifdef TIOCGLTC
-COMPATIBLE_IOCTL(TIOCGLTC)
-COMPATIBLE_IOCTL(TIOCSLTC)
-#endif
 #ifdef TIOCSTART
 /*
  * For these two we have definitions in ioctls.h and/or termios.h on
@@ -1312,10 +1265,6 @@ static long do_ioctl_trans(unsigned int cmd,
 	 * so we must not do a compat_ptr() translation.
 	 */
 	switch (cmd) {
-	/* Big T */
-	case TCSBRKP:
-	case TIOCMIWAIT:
-	case TIOCSCTTY:
 	/* RAID */
 	case HOT_REMOVE_DISK:
 	case HOT_ADD_DISK:
diff --git a/include/linux/tty.h b/include/linux/tty.h
index c56e3978b00f..414db2bce715 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -746,8 +746,6 @@ static inline int tty_audit_push(void)
 /* tty_ioctl.c */
 extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 		       unsigned int cmd, unsigned long arg);
-extern long n_tty_compat_ioctl_helper(struct tty_struct *tty, struct file *file,
-		       unsigned int cmd, unsigned long arg);
 
 /* vt.c */
 
-- 
cgit v1.2.3


From 2f46a2c1d4eb982b82c199e1bd5cddab12681275 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 11 Sep 2018 21:53:32 -0400
Subject: tty_ioctl(): start taking TIOC[SG]SERIAL into separate methods

->set_serial() and ->get_serial() resp., both taking tty and
a kernel pointer to serial_struct.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/tty/tty_io.c       | 35 ++++++++++++++++++++++++++++++-----
 include/linux/tty_driver.h |  3 +++
 2 files changed, 33 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index ef2a8766d34f..b96bfd051d59 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2456,22 +2456,40 @@ static int tty_tiocgicount(struct tty_struct *tty, void __user *arg)
 	return 0;
 }
 
-static void tty_warn_deprecated_flags(struct serial_struct __user *ss)
+static int tty_tiocsserial(struct tty_struct *tty, struct serial_struct __user *ss)
 {
 	static DEFINE_RATELIMIT_STATE(depr_flags,
 			DEFAULT_RATELIMIT_INTERVAL,
 			DEFAULT_RATELIMIT_BURST);
 	char comm[TASK_COMM_LEN];
+	struct serial_struct v;
 	int flags;
 
-	if (get_user(flags, &ss->flags))
-		return;
+	if (copy_from_user(&v, ss, sizeof(struct serial_struct)))
+		return -EFAULT;
 
-	flags &= ASYNC_DEPRECATED;
+	flags = v.flags & ASYNC_DEPRECATED;
 
 	if (flags && __ratelimit(&depr_flags))
 		pr_warn("%s: '%s' is using deprecated serial flags (with no effect): %.8x\n",
 			__func__, get_task_comm(comm, current), flags);
+	if (!tty->ops->set_serial)
+		return -ENOIOCTLCMD;
+	return tty->ops->set_serial(tty, &v);
+}
+
+static int tty_tiocgserial(struct tty_struct *tty, struct serial_struct __user *ss)
+{
+	struct serial_struct v;
+	int err;
+
+	memset(&v, 0, sizeof(struct serial_struct));
+	if (!tty->ops->get_serial)
+		return -ENOIOCTLCMD;
+	err = tty->ops->get_serial(tty, &v);
+	if (!err && copy_to_user(ss, &v, sizeof(struct serial_struct)))
+		err = -EFAULT;
+	return err;
 }
 
 /*
@@ -2603,7 +2621,14 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		}
 		break;
 	case TIOCSSERIAL:
-		tty_warn_deprecated_flags(p);
+		retval = tty_tiocsserial(tty, p);
+		if (retval != -ENOIOCTLCMD)
+			return retval;
+		break;
+	case TIOCGSERIAL:
+		retval = tty_tiocgserial(tty, p);
+		if (retval != -ENOIOCTLCMD)
+			return retval;
 		break;
 	case TIOCGPTPEER:
 		/* Special because the struct file is needed */
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 71dbc891851a..358446247ccd 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -249,6 +249,7 @@
 struct tty_struct;
 struct tty_driver;
 struct serial_icounter_struct;
+struct serial_struct;
 
 struct tty_operations {
 	struct tty_struct * (*lookup)(struct tty_driver *driver,
@@ -287,6 +288,8 @@ struct tty_operations {
 	int (*set_termiox)(struct tty_struct *tty, struct termiox *tnew);
 	int (*get_icount)(struct tty_struct *tty,
 				struct serial_icounter_struct *icount);
+	int  (*get_serial)(struct tty_struct *tty, struct serial_struct *p);
+	int  (*set_serial)(struct tty_struct *tty, struct serial_struct *p);
 	void (*show_fdinfo)(struct tty_struct *tty, struct seq_file *m);
 #ifdef CONFIG_CONSOLE_POLL
 	int (*poll_init)(struct tty_driver *driver, int line, char *options);
-- 
cgit v1.2.3


From 52bb6677d530d37055092d86b4eab69dce6c166a Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Fri, 14 Sep 2018 16:00:51 +0800
Subject: net: move definition of pcpu_lstats to header file

pcpu_lstats is defined in several files, so unify them as one
and move to header file

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/loopback.c    |  6 ------
 drivers/net/nlmon.c       |  6 ------
 drivers/net/vsockmon.c    | 14 ++++----------
 include/linux/netdevice.h |  6 ++++++
 4 files changed, 10 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 30612497643c..a7207fa7e451 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -59,12 +59,6 @@
 #include <net/net_namespace.h>
 #include <linux/u64_stats_sync.h>
 
-struct pcpu_lstats {
-	u64			packets;
-	u64			bytes;
-	struct u64_stats_sync	syncp;
-};
-
 /* The higher levels take care of making this non-reentrant (it's
  * called with bh's disabled).
  */
diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c
index 4b22955de191..dd0db7534cb3 100644
--- a/drivers/net/nlmon.c
+++ b/drivers/net/nlmon.c
@@ -6,12 +6,6 @@
 #include <linux/if_arp.h>
 #include <net/rtnetlink.h>
 
-struct pcpu_lstats {
-	u64 packets;
-	u64 bytes;
-	struct u64_stats_sync syncp;
-};
-
 static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	int len = skb->len;
diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c
index c28bdce14fd5..7bad5c95551f 100644
--- a/drivers/net/vsockmon.c
+++ b/drivers/net/vsockmon.c
@@ -11,12 +11,6 @@
 #define DEFAULT_MTU (VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + \
 		     sizeof(struct af_vsockmon_hdr))
 
-struct pcpu_lstats {
-	u64 rx_packets;
-	u64 rx_bytes;
-	struct u64_stats_sync syncp;
-};
-
 static int vsockmon_dev_init(struct net_device *dev)
 {
 	dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
@@ -56,8 +50,8 @@ static netdev_tx_t vsockmon_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats);
 
 	u64_stats_update_begin(&stats->syncp);
-	stats->rx_bytes += len;
-	stats->rx_packets++;
+	stats->bytes += len;
+	stats->packets++;
 	u64_stats_update_end(&stats->syncp);
 
 	dev_kfree_skb(skb);
@@ -80,8 +74,8 @@ vsockmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 
 		do {
 			start = u64_stats_fetch_begin_irq(&vstats->syncp);
-			tbytes = vstats->rx_bytes;
-			tpackets = vstats->rx_packets;
+			tbytes = vstats->bytes;
+			tpackets = vstats->packets;
 		} while (u64_stats_fetch_retry_irq(&vstats->syncp, start));
 
 		packets += tpackets;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e2b3bd750c98..baed5d5088c5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2382,6 +2382,12 @@ struct pcpu_sw_netstats {
 	struct u64_stats_sync   syncp;
 };
 
+struct pcpu_lstats {
+	u64 packets;
+	u64 bytes;
+	struct u64_stats_sync syncp;
+};
+
 #define __netdev_alloc_pcpu_stats(type, gfp)				\
 ({									\
 	typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
-- 
cgit v1.2.3


From d58e468b1112dcd1d5193c0a89ff9f98b5a3e8b9 Mon Sep 17 00:00:00 2001
From: Petar Penkov <ppenkov@google.com>
Date: Fri, 14 Sep 2018 07:46:18 -0700
Subject: flow_dissector: implements flow dissector BPF hook

Adds a hook for programs of type BPF_PROG_TYPE_FLOW_DISSECTOR and
attach type BPF_FLOW_DISSECTOR that is executed in the flow dissector
path. The BPF program is per-network namespace.

Signed-off-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h         |   1 +
 include/linux/bpf_types.h   |   1 +
 include/linux/skbuff.h      |   7 +++
 include/net/net_namespace.h |   3 +
 include/net/sch_generic.h   |  12 +++-
 include/uapi/linux/bpf.h    |  26 +++++++++
 kernel/bpf/syscall.c        |   8 +++
 kernel/bpf/verifier.c       |  32 +++++++++++
 net/core/filter.c           |  70 +++++++++++++++++++++++
 net/core/flow_dissector.c   | 134 ++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 291 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 523481a3471b..988a00797bcd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -212,6 +212,7 @@ enum bpf_reg_type {
 	PTR_TO_PACKET_META,	 /* skb->data - meta_len */
 	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
+	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
 };
 
 /* The information passed from prog-specific *_is_valid_access
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index cd26c090e7c0..22083712dd18 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -32,6 +32,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #ifdef CONFIG_INET
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 17a13e4785fc..ce0e863f02a2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -243,6 +243,8 @@ struct scatterlist;
 struct pipe_inode_info;
 struct iov_iter;
 struct napi_struct;
+struct bpf_prog;
+union bpf_attr;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
@@ -1192,6 +1194,11 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 			     const struct flow_dissector_key *key,
 			     unsigned int key_count);
 
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+				       struct bpf_prog *prog);
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
+
 bool __skb_flow_dissect(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
 			void *target_container,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 9b5fdc50519a..99d4148e0f90 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -43,6 +43,7 @@ struct ctl_table_header;
 struct net_generic;
 struct uevent_sock;
 struct netns_ipvs;
+struct bpf_prog;
 
 
 #define NETDEV_HASHBITS    8
@@ -145,6 +146,8 @@ struct net {
 #endif
 	struct net_generic __rcu	*gen;
 
+	struct bpf_prog __rcu	*flow_dissector_prog;
+
 	/* Note : following structs are cache line aligned */
 #ifdef CONFIG_XFRM
 	struct netns_xfrm	xfrm;
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a6d00093f35e..1b81ba85fd2d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -19,6 +19,7 @@ struct Qdisc_ops;
 struct qdisc_walker;
 struct tcf_walker;
 struct module;
+struct bpf_flow_keys;
 
 typedef int tc_setup_cb_t(enum tc_setup_type type,
 			  void *type_data, void *cb_priv);
@@ -307,9 +308,14 @@ struct tcf_proto {
 };
 
 struct qdisc_skb_cb {
-	unsigned int		pkt_len;
-	u16			slave_dev_queue_mapping;
-	u16			tc_classid;
+	union {
+		struct {
+			unsigned int		pkt_len;
+			u16			slave_dev_queue_mapping;
+			u16			tc_classid;
+		};
+		struct bpf_flow_keys *flow_keys;
+	};
 #define QDISC_CB_PRIV_LEN 20
 	unsigned char		data[QDISC_CB_PRIV_LEN];
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 66917a4eba27..aa5ccd2385ed 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -152,6 +152,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LWT_SEG6LOCAL,
 	BPF_PROG_TYPE_LIRC_MODE2,
 	BPF_PROG_TYPE_SK_REUSEPORT,
+	BPF_PROG_TYPE_FLOW_DISSECTOR,
 };
 
 enum bpf_attach_type {
@@ -172,6 +173,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP4_SENDMSG,
 	BPF_CGROUP_UDP6_SENDMSG,
 	BPF_LIRC_MODE2,
+	BPF_FLOW_DISSECTOR,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2333,6 +2335,7 @@ struct __sk_buff {
 	/* ... here. */
 
 	__u32 data_meta;
+	struct bpf_flow_keys *flow_keys;
 };
 
 struct bpf_tunnel_key {
@@ -2778,4 +2781,27 @@ enum bpf_task_fd_type {
 	BPF_FD_TYPE_URETPROBE,		/* filename + offset */
 };
 
+struct bpf_flow_keys {
+	__u16	nhoff;
+	__u16	thoff;
+	__u16	addr_proto;			/* ETH_P_* of valid addrs */
+	__u8	is_frag;
+	__u8	is_first_frag;
+	__u8	is_encap;
+	__u8	ip_proto;
+	__be16	n_proto;
+	__be16	sport;
+	__be16	dport;
+	union {
+		struct {
+			__be32	ipv4_src;
+			__be32	ipv4_dst;
+		};
+		struct {
+			__u32	ipv6_src[4];	/* in6_addr; network order */
+			__u32	ipv6_dst[4];	/* in6_addr; network order */
+		};
+	};
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3c9636f03bb2..b3c2d09bcf7a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1615,6 +1615,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_LIRC_MODE2:
 		ptype = BPF_PROG_TYPE_LIRC_MODE2;
 		break;
+	case BPF_FLOW_DISSECTOR:
+		ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1636,6 +1639,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_LIRC_MODE2:
 		ret = lirc_prog_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+		ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
+		break;
 	default:
 		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 	}
@@ -1688,6 +1694,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
 	case BPF_LIRC_MODE2:
 		return lirc_prog_detach(attr);
+	case BPF_FLOW_DISSECTOR:
+		return skb_flow_dissector_bpf_prog_detach(attr);
 	default:
 		return -EINVAL;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6ff1bac1795d..8ccbff4fff93 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -261,6 +261,7 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_PACKET]		= "pkt",
 	[PTR_TO_PACKET_META]	= "pkt_meta",
 	[PTR_TO_PACKET_END]	= "pkt_end",
+	[PTR_TO_FLOW_KEYS]	= "flow_keys",
 };
 
 static char slot_type_char[] = {
@@ -965,6 +966,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_PACKET:
 	case PTR_TO_PACKET_META:
 	case PTR_TO_PACKET_END:
+	case PTR_TO_FLOW_KEYS:
 	case CONST_PTR_TO_MAP:
 		return true;
 	default:
@@ -1238,6 +1240,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	case BPF_PROG_TYPE_LWT_XMIT:
 	case BPF_PROG_TYPE_SK_SKB:
 	case BPF_PROG_TYPE_SK_MSG:
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		if (meta)
 			return meta->pkt_access;
 
@@ -1321,6 +1324,18 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 	return -EACCES;
 }
 
+static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
+				  int size)
+{
+	if (size < 0 || off < 0 ||
+	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
+		verbose(env, "invalid access to flow keys off=%d size=%d\n",
+			off, size);
+		return -EACCES;
+	}
+	return 0;
+}
+
 static bool __is_pointer_value(bool allow_ptr_leaks,
 			       const struct bpf_reg_state *reg)
 {
@@ -1422,6 +1437,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 		 * right in front, treat it the very same way.
 		 */
 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
+	case PTR_TO_FLOW_KEYS:
+		pointer_desc = "flow keys ";
+		break;
 	case PTR_TO_MAP_VALUE:
 		pointer_desc = "value ";
 		break;
@@ -1692,6 +1710,17 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		err = check_packet_access(env, regno, off, size, false);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
+	} else if (reg->type == PTR_TO_FLOW_KEYS) {
+		if (t == BPF_WRITE && value_regno >= 0 &&
+		    is_pointer_value(env, value_regno)) {
+			verbose(env, "R%d leaks addr into flow keys\n",
+				value_regno);
+			return -EACCES;
+		}
+
+		err = check_flow_keys_access(env, off, size);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
 	} else {
 		verbose(env, "R%d invalid mem access '%s'\n", regno,
 			reg_type_str[reg->type]);
@@ -1839,6 +1868,8 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 	case PTR_TO_PACKET_META:
 		return check_packet_access(env, regno, reg->off, access_size,
 					   zero_size_allowed);
+	case PTR_TO_FLOW_KEYS:
+		return check_flow_keys_access(env, reg->off, access_size);
 	case PTR_TO_MAP_VALUE:
 		return check_map_access(env, regno, reg->off, access_size,
 					zero_size_allowed);
@@ -4366,6 +4397,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case PTR_TO_CTX:
 	case CONST_PTR_TO_MAP:
 	case PTR_TO_PACKET_END:
+	case PTR_TO_FLOW_KEYS:
 		/* Only valid matches are exact, which memcmp() above
 		 * would have accepted
 		 */
diff --git a/net/core/filter.c b/net/core/filter.c
index bf5b6efd369a..9cc76f134ddb 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5123,6 +5123,17 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+static const struct bpf_func_proto *
+flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_load_bytes:
+		return &bpf_skb_load_bytes_proto;
+	default:
+		return bpf_base_func_proto(func_id);
+	}
+}
+
 static const struct bpf_func_proto *
 lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -5241,6 +5252,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		if (size != size_default)
 			return false;
 		break;
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
+		if (size != sizeof(struct bpf_flow_keys *))
+			return false;
+		break;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -5266,6 +5281,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, data_end):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
@@ -5291,6 +5307,7 @@ static bool lwt_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
 		return false;
 	}
 
@@ -5501,6 +5518,7 @@ static bool tc_cls_act_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_end):
 		info->reg_type = PTR_TO_PACKET_END;
 		break;
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 		return false;
 	}
@@ -5702,6 +5720,7 @@ static bool sk_skb_is_valid_access(int off, int size,
 	switch (off) {
 	case bpf_ctx_range(struct __sk_buff, tc_classid):
 	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
 		return false;
 	}
 
@@ -5761,6 +5780,39 @@ static bool sk_msg_is_valid_access(int off, int size,
 	return true;
 }
 
+static bool flow_dissector_is_valid_access(int off, int size,
+					   enum bpf_access_type type,
+					   const struct bpf_prog *prog,
+					   struct bpf_insn_access_aux *info)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
+		info->reg_type = PTR_TO_FLOW_KEYS;
+		break;
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+		return false;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				  const struct bpf_insn *si,
 				  struct bpf_insn *insn_buf,
@@ -6055,6 +6107,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				      bpf_target_off(struct sock_common,
 						     skc_num, 2, target_size));
 		break;
+
+	case offsetof(struct __sk_buff, flow_keys):
+		off  = si->off;
+		off -= offsetof(struct __sk_buff, flow_keys);
+		off += offsetof(struct sk_buff, cb);
+		off += offsetof(struct qdisc_skb_cb, flow_keys);
+		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+				      si->src_reg, off);
+		break;
 	}
 
 	return insn - insn_buf;
@@ -7018,6 +7079,15 @@ const struct bpf_verifier_ops sk_msg_verifier_ops = {
 const struct bpf_prog_ops sk_msg_prog_ops = {
 };
 
+const struct bpf_verifier_ops flow_dissector_verifier_ops = {
+	.get_func_proto		= flow_dissector_func_proto,
+	.is_valid_access	= flow_dissector_is_valid_access,
+	.convert_ctx_access	= bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops flow_dissector_prog_ops = {
+};
+
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index ce9eeeb7c024..5c5dd74b5b3b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -25,6 +25,9 @@
 #include <net/flow_dissector.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <uapi/linux/batadv_packet.h>
+#include <linux/bpf.h>
+
+static DEFINE_MUTEX(flow_dissector_mutex);
 
 static void dissector_set_key(struct flow_dissector *flow_dissector,
 			      enum flow_dissector_key_id key_id)
@@ -62,6 +65,44 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 }
 EXPORT_SYMBOL(skb_flow_dissector_init);
 
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+				       struct bpf_prog *prog)
+{
+	struct bpf_prog *attached;
+	struct net *net;
+
+	net = current->nsproxy->net_ns;
+	mutex_lock(&flow_dissector_mutex);
+	attached = rcu_dereference_protected(net->flow_dissector_prog,
+					     lockdep_is_held(&flow_dissector_mutex));
+	if (attached) {
+		/* Only one BPF program can be attached at a time */
+		mutex_unlock(&flow_dissector_mutex);
+		return -EEXIST;
+	}
+	rcu_assign_pointer(net->flow_dissector_prog, prog);
+	mutex_unlock(&flow_dissector_mutex);
+	return 0;
+}
+
+int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+	struct bpf_prog *attached;
+	struct net *net;
+
+	net = current->nsproxy->net_ns;
+	mutex_lock(&flow_dissector_mutex);
+	attached = rcu_dereference_protected(net->flow_dissector_prog,
+					     lockdep_is_held(&flow_dissector_mutex));
+	if (!attached) {
+		mutex_unlock(&flow_dissector_mutex);
+		return -ENOENT;
+	}
+	bpf_prog_put(attached);
+	RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
+	mutex_unlock(&flow_dissector_mutex);
+	return 0;
+}
 /**
  * skb_flow_get_be16 - extract be16 entity
  * @skb: sk_buff to extract from
@@ -588,6 +629,60 @@ static bool skb_flow_dissect_allowed(int *num_hdrs)
 	return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS);
 }
 
+static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
+				     struct flow_dissector *flow_dissector,
+				     void *target_container)
+{
+	struct flow_dissector_key_control *key_control;
+	struct flow_dissector_key_basic *key_basic;
+	struct flow_dissector_key_addrs *key_addrs;
+	struct flow_dissector_key_ports *key_ports;
+
+	key_control = skb_flow_dissector_target(flow_dissector,
+						FLOW_DISSECTOR_KEY_CONTROL,
+						target_container);
+	key_control->thoff = flow_keys->thoff;
+	if (flow_keys->is_frag)
+		key_control->flags |= FLOW_DIS_IS_FRAGMENT;
+	if (flow_keys->is_first_frag)
+		key_control->flags |= FLOW_DIS_FIRST_FRAG;
+	if (flow_keys->is_encap)
+		key_control->flags |= FLOW_DIS_ENCAPSULATION;
+
+	key_basic = skb_flow_dissector_target(flow_dissector,
+					      FLOW_DISSECTOR_KEY_BASIC,
+					      target_container);
+	key_basic->n_proto = flow_keys->n_proto;
+	key_basic->ip_proto = flow_keys->ip_proto;
+
+	if (flow_keys->addr_proto == ETH_P_IP &&
+	    dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+		key_addrs = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+						      target_container);
+		key_addrs->v4addrs.src = flow_keys->ipv4_src;
+		key_addrs->v4addrs.dst = flow_keys->ipv4_dst;
+		key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+	} else if (flow_keys->addr_proto == ETH_P_IPV6 &&
+		   dissector_uses_key(flow_dissector,
+				      FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+		key_addrs = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+						      target_container);
+		memcpy(&key_addrs->v6addrs, &flow_keys->ipv6_src,
+		       sizeof(key_addrs->v6addrs));
+		key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+	}
+
+	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+		key_ports = skb_flow_dissector_target(flow_dissector,
+						      FLOW_DISSECTOR_KEY_PORTS,
+						      target_container);
+		key_ports->src = flow_keys->sport;
+		key_ports->dst = flow_keys->dport;
+	}
+}
+
 /**
  * __skb_flow_dissect - extract the flow_keys struct and return it
  * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified
@@ -619,6 +714,7 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 	struct flow_dissector_key_vlan *key_vlan;
 	enum flow_dissect_ret fdret;
 	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
+	struct bpf_prog *attached;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -658,6 +754,44 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 					      FLOW_DISSECTOR_KEY_BASIC,
 					      target_container);
 
+	rcu_read_lock();
+	attached = skb ? rcu_dereference(dev_net(skb->dev)->flow_dissector_prog)
+		       : NULL;
+	if (attached) {
+		/* Note that even though the const qualifier is discarded
+		 * throughout the execution of the BPF program, all changes(the
+		 * control block) are reverted after the BPF program returns.
+		 * Therefore, __skb_flow_dissect does not alter the skb.
+		 */
+		struct bpf_flow_keys flow_keys = {};
+		struct bpf_skb_data_end cb_saved;
+		struct bpf_skb_data_end *cb;
+		u32 result;
+
+		cb = (struct bpf_skb_data_end *)skb->cb;
+
+		/* Save Control Block */
+		memcpy(&cb_saved, cb, sizeof(cb_saved));
+		memset(cb, 0, sizeof(cb_saved));
+
+		/* Pass parameters to the BPF program */
+		cb->qdisc_cb.flow_keys = &flow_keys;
+		flow_keys.nhoff = nhoff;
+
+		bpf_compute_data_pointers((struct sk_buff *)skb);
+		result = BPF_PROG_RUN(attached, skb);
+
+		/* Restore state */
+		memcpy(cb, &cb_saved, sizeof(cb_saved));
+
+		__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
+					 target_container);
+		key_control->thoff = min_t(u16, key_control->thoff, skb->len);
+		rcu_read_unlock();
+		return result == BPF_OK;
+	}
+	rcu_read_unlock();
+
 	if (dissector_uses_key(flow_dissector,
 			       FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 		struct ethhdr *eth = eth_hdr(skb);
-- 
cgit v1.2.3


From 36f47383c7f9fa0fdc20a92635435ff77ff71a6f Mon Sep 17 00:00:00 2001
From: Fabien Parent <fparent@baylibre.com>
Date: Fri, 10 Aug 2018 15:13:47 +0200
Subject: mfd: cros: add charger port count command definition

A new more command has been added to the ChromeOS embedded controller
that allows to get the number of charger port count. Unlike
EC_CMD_USB_PD_PORTS, this new command also includes the dedicated
port if present.

This command will be used to expose the dedicated charger port
in the ChromeOS charger driver.

Signed-off-by: Fabien Parent <fparent@baylibre.com>
Acked-for-MFD-by: Lee Jones <lee.jones@linaro.org>
Reviewed-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/linux/mfd/cros_ec_commands.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 6e1ab9bead28..20ee71f10865 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -3102,6 +3102,16 @@ struct ec_params_usb_pd_info_request {
 	uint8_t port;
 } __packed;
 
+/*
+ * This command will return the number of USB PD charge port + the number
+ * of dedicated port present.
+ * EC_CMD_USB_PD_PORTS does NOT include the dedicated ports
+ */
+#define EC_CMD_CHARGE_PORT_COUNT 0x0105
+struct ec_response_charge_port_count {
+	uint8_t port_count;
+} __packed;
+
 /* Read USB-PD Device discovery info */
 #define EC_CMD_USB_PD_DISCOVERY 0x0113
 struct ec_params_usb_pd_discovery_entry {
-- 
cgit v1.2.3


From 8f5be0ec23bb9ef3f96659c8dff1340b876600bf Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Mon, 13 Aug 2018 09:52:09 +0300
Subject: kernfs: update comment about kernfs_path() return value

Now it returns the length of the full path or error code.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Fixes: 3abb1d90f5d9 ("kernfs: make kernfs_path*() behave in the style of strlcpy()")
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kernfs.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 814643f7ee52..5b36b1287a5a 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -477,10 +477,11 @@ static inline void kernfs_init(void) { }
  * @buf: buffer to copy @kn's name into
  * @buflen: size of @buf
  *
- * Builds and returns the full path of @kn in @buf of @buflen bytes.  The
- * path is built from the end of @buf so the returned pointer usually
- * doesn't match @buf.  If @buf isn't long enough, @buf is nul terminated
- * and %NULL is returned.
+ * If @kn is NULL result will be "(null)".
+ *
+ * Returns the length of the full path.  If the full length is equal to or
+ * greater than @buflen, @buf contains the truncated path with the trailing
+ * '\0'.  On error, -errno is returned.
  */
 static inline int kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
 {
-- 
cgit v1.2.3


From 2953d80ff04862b26a2e628fb3948868f54d753d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 31 Aug 2018 20:29:37 +0200
Subject: netfilter: remove obsolete need_conntrack stub

as of a0ae2562c6c4b27 ("netfilter: conntrack: remove l3proto
abstraction") there are no users anymore.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h | 3 ---
 net/netfilter/nf_conntrack_standalone.c       | 7 -------
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 03097fa70975..e142b2b5f1ea 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -19,7 +19,4 @@ struct ip_conntrack_stat {
 	unsigned int search_restart;
 };
 
-/* call to create an explicit dependency on nf_conntrack. */
-void need_conntrack(void);
-
 #endif /* _NF_CONNTRACK_COMMON_H */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 13279f683da9..e3b329ebafd3 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -720,10 +720,3 @@ static void __exit nf_conntrack_standalone_fini(void)
 
 module_init(nf_conntrack_standalone_init);
 module_exit(nf_conntrack_standalone_fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
-   They should call this. */
-void need_conntrack(void)
-{
-}
-EXPORT_SYMBOL_GPL(need_conntrack);
-- 
cgit v1.2.3


From 0baf9eb25572eea0cf48249c1d152d8373956a7c Mon Sep 17 00:00:00 2001
From: David Lechner <david@lechnology.com>
Date: Wed, 12 Sep 2018 19:39:17 -0500
Subject: spi: add new SPI_CS_WORD flag

This adds a new SPI mode flag, SPI_CS_WORD, that is used to indicate
that a SPI device requires the chip select to be toggled after each
word that is transferred.

Signed-off-by: David Lechner <david@lechnology.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index a64235e05321..7cc1466111f5 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -163,6 +163,7 @@ struct spi_device {
 #define	SPI_TX_QUAD	0x200			/* transmit with 4 wires */
 #define	SPI_RX_DUAL	0x400			/* receive with 2 wires */
 #define	SPI_RX_QUAD	0x800			/* receive with 4 wires */
+#define SPI_CS_WORD	0x1000			/* toggle cs after each word */
 	int			irq;
 	void			*controller_state;
 	void			*controller_data;
@@ -177,7 +178,6 @@ struct spi_device {
 	 * the controller talks to each chip, like:
 	 *  - memory packing (12 bit samples into low bits, others zeroed)
 	 *  - priority
-	 *  - drop chipselect after each word
 	 *  - chipselect delays
 	 *  - ...
 	 */
-- 
cgit v1.2.3


From efdfeb079cc3b6c7d9c19959c5ed65ce2510dd1d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 6 Sep 2018 14:24:36 +0200
Subject: regulator: fixed: Convert to use GPIO descriptor only

As we augmented the regulator core to accept a GPIO descriptor instead
of a GPIO number, we can augment the fixed GPIO regulator to look up
and pass that descriptor directly from device tree or board GPIO
descriptor look up tables.

Some boards just auto-enumerate their fixed regulator platform devices
and I have assumed they get names like "fixed-regulator.0" but it's
pretty hard to guess this. I need some testing from board maintainers to
be sure. Other boards are straight forward, using just plain
"fixed-regulator" (ID -1) or "fixed-regulator.1" hammering down the
device ID.

It seems the da9055 and da9211 has never got around to actually passing
any enable gpio into its platform data (not the in-tree code anyway) so we
can just decide to simply pass a descriptor instead.

The fixed GPIO-controlled regulator in mach-pxa/ezx.c was confusingly named
"*_dummy_supply_device" while it is a very real device backed by a GPIO
line. There is nothing dummy about it at all, so I renamed it with the
infix *_regulator_* as part of this patch set.

Intel MID portions tested by Andy.

Tested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> # Check the x86 BCM stuff
Acked-by: Tony Lindgren <tony@atomide.com> # OMAP1,2,3 maintainer
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Reviewed-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 arch/arm/mach-imx/mach-mx21ads.c                   | 12 +++++++-
 arch/arm/mach-imx/mach-mx27ads.c                   | 12 +++++++-
 arch/arm/mach-mmp/brownstone.c                     | 12 +++++++-
 arch/arm/mach-omap1/board-ams-delta.c              | 12 ++++++--
 arch/arm/mach-omap2/pdata-quirks.c                 | 16 ++++++++++-
 arch/arm/mach-pxa/em-x270.c                        |  1 -
 arch/arm/mach-pxa/ezx.c                            | 33 ++++++++++++++--------
 arch/arm/mach-pxa/magician.c                       |  2 +-
 arch/arm/mach-pxa/raumfeld.c                       | 12 ++++++--
 arch/arm/mach-pxa/zeus.c                           | 23 +++++++++++++--
 arch/arm/mach-s3c64xx/mach-crag6410.c              |  1 -
 arch/arm/mach-s3c64xx/mach-smdk6410.c              |  1 -
 arch/arm/mach-sa1100/assabet.c                     | 21 +++++++++-----
 arch/arm/mach-sa1100/generic.c                     |  5 ++--
 arch/arm/mach-sa1100/generic.h                     |  3 +-
 arch/arm/mach-sa1100/shannon.c                     |  4 +--
 arch/sh/boards/mach-ecovec24/setup.c               | 27 ++++++++++++++++--
 .../intel-mid/device_libs/platform_bcm43xx.c       | 17 +++++++++--
 drivers/regulator/fixed-helper.c                   |  1 -
 drivers/regulator/fixed.c                          | 33 +++++++++++-----------
 include/linux/regulator/fixed.h                    |  3 --
 21 files changed, 188 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-imx/mach-mx21ads.c b/arch/arm/mach-imx/mach-mx21ads.c
index 5e366824814f..2e1e540f2e5a 100644
--- a/arch/arm/mach-imx/mach-mx21ads.c
+++ b/arch/arm/mach-imx/mach-mx21ads.c
@@ -18,6 +18,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/physmap.h>
 #include <linux/gpio/driver.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
@@ -175,6 +176,7 @@ static struct resource mx21ads_mmgpio_resource =
 	DEFINE_RES_MEM_NAMED(MX21ADS_IO_REG, SZ_2, "dat");
 
 static struct bgpio_pdata mx21ads_mmgpio_pdata = {
+	.label	= "mx21ads-mmgpio",
 	.base	= MX21ADS_MMGPIO_BASE,
 	.ngpio	= 16,
 };
@@ -203,7 +205,6 @@ static struct regulator_init_data mx21ads_lcd_regulator_init_data = {
 static struct fixed_voltage_config mx21ads_lcd_regulator_pdata = {
 	.supply_name	= "LCD",
 	.microvolts	= 3300000,
-	.gpio		= MX21ADS_IO_LCDON,
 	.enable_high	= 1,
 	.init_data	= &mx21ads_lcd_regulator_init_data,
 };
@@ -216,6 +217,14 @@ static struct platform_device mx21ads_lcd_regulator = {
 	},
 };
 
+static struct gpiod_lookup_table mx21ads_lcd_regulator_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.0", /* Let's hope ID 0 is what we get */
+	.table = {
+		GPIO_LOOKUP("mx21ads-mmgpio", 9, NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 /*
  * Connected is a portrait Sharp-QVGA display
  * of type: LQ035Q7DB02
@@ -311,6 +320,7 @@ static void __init mx21ads_late_init(void)
 {
 	imx21_add_mxc_mmc(0, &mx21ads_sdhc_pdata);
 
+	gpiod_add_lookup_table(&mx21ads_lcd_regulator_gpiod_table);
 	platform_add_devices(platform_devices, ARRAY_SIZE(platform_devices));
 
 	mx21ads_cs8900_resources[1].start =
diff --git a/arch/arm/mach-imx/mach-mx27ads.c b/arch/arm/mach-imx/mach-mx27ads.c
index a04bb094ded1..f5e04047ed13 100644
--- a/arch/arm/mach-imx/mach-mx27ads.c
+++ b/arch/arm/mach-imx/mach-mx27ads.c
@@ -16,6 +16,7 @@
 #include <linux/gpio/driver.h>
 /* Needed for gpio_to_irq() */
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/map.h>
@@ -230,10 +231,17 @@ static struct regulator_init_data mx27ads_lcd_regulator_init_data = {
 static struct fixed_voltage_config mx27ads_lcd_regulator_pdata = {
 	.supply_name	= "LCD",
 	.microvolts	= 3300000,
-	.gpio		= MX27ADS_LCD_GPIO,
 	.init_data	= &mx27ads_lcd_regulator_init_data,
 };
 
+static struct gpiod_lookup_table mx27ads_lcd_regulator_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.0", /* Let's hope ID 0 is what we get */
+	.table = {
+		GPIO_LOOKUP("LCD", 0, NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static void __init mx27ads_regulator_init(void)
 {
 	struct gpio_chip *vchip;
@@ -247,6 +255,8 @@ static void __init mx27ads_regulator_init(void)
 	vchip->set		= vgpio_set;
 	gpiochip_add_data(vchip, NULL);
 
+	gpiod_add_lookup_table(&mx27ads_lcd_regulator_gpiod_table);
+
 	platform_device_register_data(NULL, "reg-fixed-voltage",
 				      PLATFORM_DEVID_AUTO,
 				      &mx27ads_lcd_regulator_pdata,
diff --git a/arch/arm/mach-mmp/brownstone.c b/arch/arm/mach-mmp/brownstone.c
index d1613b954926..a04e249c654b 100644
--- a/arch/arm/mach-mmp/brownstone.c
+++ b/arch/arm/mach-mmp/brownstone.c
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/gpio-pxa.h>
+#include <linux/gpio/machine.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/max8649.h>
 #include <linux/regulator/fixed.h>
@@ -148,7 +149,6 @@ static struct regulator_init_data brownstone_v_5vp_data = {
 static struct fixed_voltage_config brownstone_v_5vp = {
 	.supply_name		= "v_5vp",
 	.microvolts		= 5000000,
-	.gpio			= GPIO_5V_ENABLE,
 	.enable_high		= 1,
 	.enabled_at_boot	= 1,
 	.init_data		= &brownstone_v_5vp_data,
@@ -162,6 +162,15 @@ static struct platform_device brownstone_v_5vp_device = {
 	},
 };
 
+static struct gpiod_lookup_table brownstone_v_5vp_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.1", /* .id set to 1 above */
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO_5V_ENABLE,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static struct max8925_platform_data brownstone_max8925_info = {
 	.irq_base		= MMP_NR_IRQS,
 };
@@ -217,6 +226,7 @@ static void __init brownstone_init(void)
 	mmp2_add_isram(&mmp2_isram_platdata);
 
 	/* enable 5v regulator */
+	gpiod_add_lookup_table(&brownstone_v_5vp_gpiod_table);
 	platform_device_register(&brownstone_v_5vp_device);
 }
 
diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c
index dd28d2614d7f..f226973f3d8c 100644
--- a/arch/arm/mach-omap1/board-ams-delta.c
+++ b/arch/arm/mach-omap1/board-ams-delta.c
@@ -300,7 +300,6 @@ static struct regulator_init_data modem_nreset_data = {
 static struct fixed_voltage_config modem_nreset_config = {
 	.supply_name		= "modem_nreset",
 	.microvolts		= 3300000,
-	.gpio			= AMS_DELTA_GPIO_PIN_MODEM_NRESET,
 	.startup_delay		= 25000,
 	.enable_high		= 1,
 	.enabled_at_boot	= 1,
@@ -315,6 +314,15 @@ static struct platform_device modem_nreset_device = {
 	},
 };
 
+static struct gpiod_lookup_table ams_delta_nreset_gpiod_table = {
+	.dev_id = "reg-fixed-voltage",
+	.table = {
+		GPIO_LOOKUP(LATCH2_LABEL, LATCH2_PIN_MODEM_NRESET,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 struct modem_private_data {
 	struct regulator *regulator;
 };
@@ -568,7 +576,6 @@ static struct regulator_init_data keybrd_pwr_initdata = {
 static struct fixed_voltage_config keybrd_pwr_config = {
 	.supply_name		= "keybrd_pwr",
 	.microvolts		= 5000000,
-	.gpio			= AMS_DELTA_GPIO_PIN_KEYBRD_PWR,
 	.enable_high		= 1,
 	.init_data		= &keybrd_pwr_initdata,
 };
@@ -602,6 +609,7 @@ static struct platform_device *ams_delta_devices[] __initdata = {
 };
 
 static struct gpiod_lookup_table *ams_delta_gpio_tables[] __initdata = {
+	&ams_delta_nreset_gpiod_table,
 	&ams_delta_audio_gpio_table,
 	&keybrd_pwr_gpio_table,
 	&ams_delta_lcd_gpio_table,
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 7f02743edbe4..d0f7a7cc70cb 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -10,6 +10,7 @@
 #include <linux/clk.h>
 #include <linux/davinci_emac.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/of_platform.h>
@@ -328,7 +329,6 @@ static struct regulator_init_data pandora_vmmc3 = {
 static struct fixed_voltage_config pandora_vwlan = {
 	.supply_name		= "vwlan",
 	.microvolts		= 1800000, /* 1.8V */
-	.gpio			= PANDORA_WIFI_NRESET_GPIO,
 	.startup_delay		= 50000, /* 50ms */
 	.enable_high		= 1,
 	.init_data		= &pandora_vmmc3,
@@ -342,6 +342,19 @@ static struct platform_device pandora_vwlan_device = {
 	},
 };
 
+static struct gpiod_lookup_table pandora_vwlan_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.1",
+	.table = {
+		/*
+		 * As this is a low GPIO number it should be at the first
+		 * GPIO bank.
+		 */
+		GPIO_LOOKUP("gpio-0-31", PANDORA_WIFI_NRESET_GPIO,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static void pandora_wl1251_init_card(struct mmc_card *card)
 {
 	/*
@@ -403,6 +416,7 @@ fail:
 static void __init omap3_pandora_legacy_init(void)
 {
 	platform_device_register(&pandora_backlight);
+	gpiod_add_lookup_table(&pandora_vwlan_gpiod_table);
 	platform_device_register(&pandora_vwlan_device);
 	omap_hsmmc_init(pandora_mmc3);
 	omap_hsmmc_late_init(pandora_mmc3);
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 29be04c6cc48..b14c47a6ee6b 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -986,7 +986,6 @@ static struct fixed_voltage_config camera_dummy_config = {
 	.supply_name		= "camera_vdd",
 	.input_supply		= "vcc cam",
 	.microvolts		= 2800000,
-	.gpio			= -1,
 	.enable_high		= 0,
 	.init_data		= &camera_dummy_initdata,
 };
diff --git a/arch/arm/mach-pxa/ezx.c b/arch/arm/mach-pxa/ezx.c
index 2c90b58f347d..565965e9acc7 100644
--- a/arch/arm/mach-pxa/ezx.c
+++ b/arch/arm/mach-pxa/ezx.c
@@ -21,6 +21,7 @@
 #include <linux/regulator/fixed.h>
 #include <linux/input.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio_keys.h>
 #include <linux/leds-lp3944.h>
 #include <linux/platform_data/i2c-pxa.h>
@@ -698,31 +699,39 @@ static struct pxa27x_keypad_platform_data e2_keypad_platform_data = {
 
 #if defined(CONFIG_MACH_EZX_A780) || defined(CONFIG_MACH_EZX_A910)
 /* camera */
-static struct regulator_consumer_supply camera_dummy_supplies[] = {
+static struct regulator_consumer_supply camera_regulator_supplies[] = {
 	REGULATOR_SUPPLY("vdd", "0-005d"),
 };
 
-static struct regulator_init_data camera_dummy_initdata = {
-	.consumer_supplies = camera_dummy_supplies,
-	.num_consumer_supplies = ARRAY_SIZE(camera_dummy_supplies),
+static struct regulator_init_data camera_regulator_initdata = {
+	.consumer_supplies = camera_regulator_supplies,
+	.num_consumer_supplies = ARRAY_SIZE(camera_regulator_supplies),
 	.constraints = {
 		.valid_ops_mask = REGULATOR_CHANGE_STATUS,
 	},
 };
 
-static struct fixed_voltage_config camera_dummy_config = {
+static struct fixed_voltage_config camera_regulator_config = {
 	.supply_name		= "camera_vdd",
 	.microvolts		= 2800000,
-	.gpio			= GPIO50_nCAM_EN,
 	.enable_high		= 0,
-	.init_data		= &camera_dummy_initdata,
+	.init_data		= &camera_regulator_initdata,
 };
 
-static struct platform_device camera_supply_dummy_device = {
+static struct platform_device camera_supply_regulator_device = {
 	.name	= "reg-fixed-voltage",
 	.id	= 1,
 	.dev	= {
-		.platform_data = &camera_dummy_config,
+		.platform_data = &camera_regulator_config,
+	},
+};
+
+static struct gpiod_lookup_table camera_supply_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.1",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO50_nCAM_EN,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
 	},
 };
 #endif
@@ -800,7 +809,7 @@ static struct i2c_board_info a780_i2c_board_info[] = {
 
 static struct platform_device *a780_devices[] __initdata = {
 	&a780_gpio_keys,
-	&camera_supply_dummy_device,
+	&camera_supply_regulator_device,
 };
 
 static void __init a780_init(void)
@@ -823,6 +832,7 @@ static void __init a780_init(void)
 	if (a780_camera_init() == 0)
 		pxa_set_camera_info(&a780_pxacamera_platform_data);
 
+	gpiod_add_lookup_table(&camera_supply_gpiod_table);
 	pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
 	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
 	platform_add_devices(ARRAY_AND_SIZE(a780_devices));
@@ -1098,7 +1108,7 @@ static struct i2c_board_info __initdata a910_i2c_board_info[] = {
 
 static struct platform_device *a910_devices[] __initdata = {
 	&a910_gpio_keys,
-	&camera_supply_dummy_device,
+	&camera_supply_regulator_device,
 };
 
 static void __init a910_init(void)
@@ -1121,6 +1131,7 @@ static void __init a910_init(void)
 	if (a910_camera_init() == 0)
 		pxa_set_camera_info(&a910_pxacamera_platform_data);
 
+	gpiod_add_lookup_table(&camera_supply_gpiod_table);
 	pwm_add_table(ezx_pwm_lookup, ARRAY_SIZE(ezx_pwm_lookup));
 	platform_add_devices(ARRAY_AND_SIZE(ezx_devices));
 	platform_add_devices(ARRAY_AND_SIZE(a910_devices));
diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c
index c5325d1ae77b..14c0f80bc9e7 100644
--- a/arch/arm/mach-pxa/magician.c
+++ b/arch/arm/mach-pxa/magician.c
@@ -18,6 +18,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
 #include <linux/mfd/htc-pasic3.h>
@@ -696,7 +697,6 @@ static struct regulator_init_data vads7846_regulator = {
 static struct fixed_voltage_config vads7846 = {
 	.supply_name	= "vads7846",
 	.microvolts	= 3300000, /* probably */
-	.gpio		= -EINVAL,
 	.startup_delay	= 0,
 	.init_data	= &vads7846_regulator,
 };
diff --git a/arch/arm/mach-pxa/raumfeld.c b/arch/arm/mach-pxa/raumfeld.c
index 034345546f84..bd3c23ad6ce6 100644
--- a/arch/arm/mach-pxa/raumfeld.c
+++ b/arch/arm/mach-pxa/raumfeld.c
@@ -886,7 +886,6 @@ static struct regulator_init_data audio_va_initdata = {
 static struct fixed_voltage_config audio_va_config = {
 	.supply_name		= "audio_va",
 	.microvolts		= 5000000,
-	.gpio			= GPIO_AUDIO_VA_ENABLE,
 	.enable_high		= 1,
 	.enabled_at_boot	= 0,
 	.init_data		= &audio_va_initdata,
@@ -900,6 +899,15 @@ static struct platform_device audio_va_device = {
 	},
 };
 
+static struct gpiod_lookup_table audio_va_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.0",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", GPIO_AUDIO_VA_ENABLE,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 /* Dummy supplies for Codec's VD/VLC */
 
 static struct regulator_consumer_supply audio_dummy_supplies[] = {
@@ -918,7 +926,6 @@ static struct regulator_init_data audio_dummy_initdata = {
 static struct fixed_voltage_config audio_dummy_config = {
 	.supply_name		= "audio_vd",
 	.microvolts		= 3300000,
-	.gpio			= -1,
 	.init_data		= &audio_dummy_initdata,
 };
 
@@ -1033,6 +1040,7 @@ static void __init raumfeld_audio_init(void)
 	else
 		gpio_direction_output(GPIO_MCLK_RESET, 1);
 
+	gpiod_add_lookup_table(&audio_va_gpiod_table);
 	platform_add_devices(ARRAY_AND_SIZE(audio_regulator_devices));
 }
 
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index e3851795d6d7..d53ea12fc766 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -17,6 +17,7 @@
 #include <linux/irq.h>
 #include <linux/pm.h>
 #include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/serial_8250.h>
 #include <linux/dm9000.h>
 #include <linux/mmc/host.h>
@@ -410,7 +411,6 @@ static struct regulator_init_data can_regulator_init_data = {
 static struct fixed_voltage_config can_regulator_pdata = {
 	.supply_name	= "CAN_SHDN",
 	.microvolts	= 3300000,
-	.gpio		= ZEUS_CAN_SHDN_GPIO,
 	.init_data	= &can_regulator_init_data,
 };
 
@@ -422,6 +422,15 @@ static struct platform_device can_regulator_device = {
 	},
 };
 
+static struct gpiod_lookup_table can_regulator_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.0",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", ZEUS_CAN_SHDN_GPIO,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static struct mcp251x_platform_data zeus_mcp2515_pdata = {
 	.oscillator_frequency	= 16*1000*1000,
 };
@@ -538,7 +547,6 @@ static struct regulator_init_data zeus_ohci_regulator_data = {
 static struct fixed_voltage_config zeus_ohci_regulator_config = {
 	.supply_name		= "vbus2",
 	.microvolts		= 5000000, /* 5.0V */
-	.gpio			= ZEUS_USB2_PWREN_GPIO,
 	.enable_high		= 1,
 	.startup_delay		= 0,
 	.init_data		= &zeus_ohci_regulator_data,
@@ -552,6 +560,15 @@ static struct platform_device zeus_ohci_regulator_device = {
 	},
 };
 
+static struct gpiod_lookup_table zeus_ohci_regulator_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.0",
+	.table = {
+		GPIO_LOOKUP("gpio-pxa", ZEUS_USB2_PWREN_GPIO,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static struct pxaohci_platform_data zeus_ohci_platform_data = {
 	.port_mode	= PMM_NPS_MODE,
 	/* Clear Power Control Polarity Low and set Power Sense
@@ -855,6 +872,8 @@ static void __init zeus_init(void)
 
 	pxa2xx_mfp_config(ARRAY_AND_SIZE(zeus_pin_config));
 
+	gpiod_add_lookup_table(&can_regulator_gpiod_table);
+	gpiod_add_lookup_table(&zeus_ohci_regulator_gpiod_table);
 	platform_add_devices(zeus_devices, ARRAY_SIZE(zeus_devices));
 
 	zeus_register_ohci();
diff --git a/arch/arm/mach-s3c64xx/mach-crag6410.c b/arch/arm/mach-s3c64xx/mach-crag6410.c
index f04650297487..379424d72ae7 100644
--- a/arch/arm/mach-s3c64xx/mach-crag6410.c
+++ b/arch/arm/mach-s3c64xx/mach-crag6410.c
@@ -352,7 +352,6 @@ static struct fixed_voltage_config wallvdd_pdata = {
 	.supply_name = "WALLVDD",
 	.microvolts = 5000000,
 	.init_data = &wallvdd_data,
-	.gpio = -EINVAL,
 };
 
 static struct platform_device wallvdd_device = {
diff --git a/arch/arm/mach-s3c64xx/mach-smdk6410.c b/arch/arm/mach-s3c64xx/mach-smdk6410.c
index c46fa5dfd2e0..908e5aa831c8 100644
--- a/arch/arm/mach-s3c64xx/mach-smdk6410.c
+++ b/arch/arm/mach-s3c64xx/mach-smdk6410.c
@@ -222,7 +222,6 @@ static struct fixed_voltage_config smdk6410_b_pwr_5v_pdata = {
 	.supply_name = "B_PWR_5V",
 	.microvolts = 5000000,
 	.init_data = &smdk6410_b_pwr_5v_data,
-	.gpio = -EINVAL,
 };
 
 static struct platform_device smdk6410_b_pwr_5v = {
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 575ec085cffa..3e8c0948abcc 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -101,7 +101,7 @@ static int __init assabet_init_gpio(void __iomem *reg, u32 def_val)
 
 	assabet_bcr_gc = gc;
 
-	return gc->base;
+	return 0;
 }
 
 /*
@@ -471,6 +471,14 @@ static struct fixed_voltage_config assabet_cf_vcc_pdata __initdata = {
 	.enable_high = 1,
 };
 
+static struct gpiod_lookup_table assabet_cf_vcc_gpio_table = {
+	.dev_id = "reg-fixed-voltage.0",
+	.table = {
+		GPIO_LOOKUP("assabet", 0, NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static void __init assabet_init(void)
 {
 	/*
@@ -517,9 +525,11 @@ static void __init assabet_init(void)
 			neponset_resources, ARRAY_SIZE(neponset_resources));
 #endif
 	} else {
+		gpiod_add_lookup_table(&assabet_cf_vcc_gpio_table);
 		sa11x0_register_fixed_regulator(0, &assabet_cf_vcc_pdata,
-					 assabet_cf_vcc_consumers,
-					 ARRAY_SIZE(assabet_cf_vcc_consumers));
+					assabet_cf_vcc_consumers,
+					ARRAY_SIZE(assabet_cf_vcc_consumers),
+					true);
 
 	}
 
@@ -802,7 +812,6 @@ fs_initcall(assabet_leds_init);
 
 void __init assabet_init_irq(void)
 {
-	unsigned int assabet_gpio_base;
 	u32 def_val;
 
 	sa1100_init_irq();
@@ -817,9 +826,7 @@ void __init assabet_init_irq(void)
 	 *
 	 * This must precede any driver calls to BCR_set() or BCR_clear().
 	 */
-	assabet_gpio_base = assabet_init_gpio((void *)&ASSABET_BCR, def_val);
-
-	assabet_cf_vcc_pdata.gpio = assabet_gpio_base + 0;
+	assabet_init_gpio((void *)&ASSABET_BCR, def_val);
 }
 
 MACHINE_START(ASSABET, "Intel-Assabet")
diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c
index 7167ddf84a0e..800321c6cbd8 100644
--- a/arch/arm/mach-sa1100/generic.c
+++ b/arch/arm/mach-sa1100/generic.c
@@ -348,7 +348,8 @@ void __init sa11x0_init_late(void)
 
 int __init sa11x0_register_fixed_regulator(int n,
 	struct fixed_voltage_config *cfg,
-	struct regulator_consumer_supply *supplies, unsigned num_supplies)
+	struct regulator_consumer_supply *supplies, unsigned num_supplies,
+	bool uses_gpio)
 {
 	struct regulator_init_data *id;
 
@@ -356,7 +357,7 @@ int __init sa11x0_register_fixed_regulator(int n,
 	if (!cfg->init_data)
 		return -ENOMEM;
 
-	if (cfg->gpio < 0)
+	if (!uses_gpio)
 		id->constraints.always_on = 1;
 	id->constraints.name = cfg->supply_name;
 	id->constraints.min_uV = cfg->microvolts;
diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h
index 5f3cb52fa6ab..158a4fd5ca24 100644
--- a/arch/arm/mach-sa1100/generic.h
+++ b/arch/arm/mach-sa1100/generic.h
@@ -54,4 +54,5 @@ void sa11x0_register_pcmcia(int socket, struct gpiod_lookup_table *);
 struct fixed_voltage_config;
 struct regulator_consumer_supply;
 int sa11x0_register_fixed_regulator(int n, struct fixed_voltage_config *cfg,
-	struct regulator_consumer_supply *supplies, unsigned num_supplies);
+	struct regulator_consumer_supply *supplies, unsigned num_supplies,
+	bool uses_gpio);
diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c
index 22f7fe0b809f..5bc82e2671c6 100644
--- a/arch/arm/mach-sa1100/shannon.c
+++ b/arch/arm/mach-sa1100/shannon.c
@@ -102,14 +102,14 @@ static struct fixed_voltage_config shannon_cf_vcc_pdata __initdata = {
 	.supply_name = "cf-power",
 	.microvolts = 3300000,
 	.enabled_at_boot = 1,
-	.gpio = -EINVAL,
 };
 
 static void __init shannon_init(void)
 {
 	sa11x0_register_fixed_regulator(0, &shannon_cf_vcc_pdata,
 					shannon_cf_vcc_consumers,
-					ARRAY_SIZE(shannon_cf_vcc_consumers));
+					ARRAY_SIZE(shannon_cf_vcc_consumers),
+					false);
 	sa11x0_register_pcmcia(0, &shannon_pcmcia0_gpio_table);
 	sa11x0_register_pcmcia(1, &shannon_pcmcia1_gpio_table);
 	sa11x0_ppc_configure_mcp();
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index adc61d14172c..06a894526a0b 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -633,7 +633,6 @@ static struct regulator_init_data cn12_power_init_data = {
 static struct fixed_voltage_config cn12_power_info = {
 	.supply_name = "CN12 SD/MMC Vdd",
 	.microvolts = 3300000,
-	.gpio = GPIO_PTB7,
 	.enable_high = 1,
 	.init_data = &cn12_power_init_data,
 };
@@ -646,6 +645,16 @@ static struct platform_device cn12_power = {
 	},
 };
 
+static struct gpiod_lookup_table cn12_power_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.0",
+	.table = {
+		/* Offset 7 on port B */
+		GPIO_LOOKUP("sh7724_pfc", GPIO_PTB7,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 #if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
 /* SDHI0 */
 static struct regulator_consumer_supply sdhi0_power_consumers[] =
@@ -665,7 +674,6 @@ static struct regulator_init_data sdhi0_power_init_data = {
 static struct fixed_voltage_config sdhi0_power_info = {
 	.supply_name = "CN11 SD/MMC Vdd",
 	.microvolts = 3300000,
-	.gpio = GPIO_PTB6,
 	.enable_high = 1,
 	.init_data = &sdhi0_power_init_data,
 };
@@ -678,6 +686,16 @@ static struct platform_device sdhi0_power = {
 	},
 };
 
+static struct gpiod_lookup_table sdhi0_power_gpiod_table = {
+	.dev_id = "reg-fixed-voltage.1",
+	.table = {
+		/* Offset 6 on port B */
+		GPIO_LOOKUP("sh7724_pfc", GPIO_PTB6,
+			    NULL, GPIO_ACTIVE_HIGH),
+		{ },
+	},
+};
+
 static struct tmio_mmc_data sdhi0_info = {
 	.chan_priv_tx	= (void *)SHDMA_SLAVE_SDHI0_TX,
 	.chan_priv_rx	= (void *)SHDMA_SLAVE_SDHI0_RX,
@@ -1413,6 +1431,11 @@ static int __init arch_setup(void)
 				    DMA_MEMORY_EXCLUSIVE);
 	platform_device_add(ecovec_ceu_devices[1]);
 
+	gpiod_add_lookup_table(&cn12_power_gpiod_table);
+#if defined(CONFIG_MMC_SDHI) || defined(CONFIG_MMC_SDHI_MODULE)
+	gpiod_add_lookup_table(&sdhi0_power_gpiod_table);
+#endif
+
 	return platform_add_devices(ecovec_devices,
 				    ARRAY_SIZE(ecovec_devices));
 }
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
index 4392c15ed9e0..dbfc5cf2aa93 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
@@ -10,7 +10,7 @@
  * of the License.
  */
 
-#include <linux/gpio.h>
+#include <linux/gpio/machine.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/machine.h>
 #include <linux/regulator/fixed.h>
@@ -43,7 +43,6 @@ static struct fixed_voltage_config bcm43xx_vmmc = {
 	 * real voltage and signaling are still 1.8V.
 	 */
 	.microvolts		= 2000000,		/* 1.8V */
-	.gpio			= -EINVAL,
 	.startup_delay		= 250 * 1000,		/* 250ms */
 	.enable_high		= 1,			/* active high */
 	.enabled_at_boot	= 0,			/* disabled at boot */
@@ -58,11 +57,23 @@ static struct platform_device bcm43xx_vmmc_regulator = {
 	},
 };
 
+static struct gpiod_lookup_table bcm43xx_vmmc_gpio_table = {
+	.dev_id	= "reg-fixed-voltage.0",
+	.table	= {
+		GPIO_LOOKUP("0000:00:0c.0", -1, NULL, GPIO_ACTIVE_LOW),
+		{}
+	},
+};
+
 static int __init bcm43xx_regulator_register(void)
 {
+	struct gpiod_lookup_table *table = &bcm43xx_vmmc_gpio_table;
+	struct gpiod_lookup *lookup = table->table;
 	int ret;
 
-	bcm43xx_vmmc.gpio = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME);
+	lookup[0].chip_hwnum = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME);
+	gpiod_add_lookup_table(table);
+
 	ret = platform_device_register(&bcm43xx_vmmc_regulator);
 	if (ret) {
 		pr_err("%s: vmmc regulator register failed\n", __func__);
diff --git a/drivers/regulator/fixed-helper.c b/drivers/regulator/fixed-helper.c
index 777fac6fb4cb..2c6098e6f4bc 100644
--- a/drivers/regulator/fixed-helper.c
+++ b/drivers/regulator/fixed-helper.c
@@ -43,7 +43,6 @@ struct platform_device *regulator_register_always_on(int id, const char *name,
 	}
 
 	data->cfg.microvolts = uv;
-	data->cfg.gpio = -EINVAL;
 	data->cfg.enabled_at_boot = 1;
 	data->cfg.init_data = &data->init_data;
 
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 988a7472c2ab..1142f195529b 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -24,10 +24,9 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/fixed.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/regulator/of_regulator.h>
 #include <linux/regulator/machine.h>
 
@@ -78,10 +77,6 @@ of_get_fixed_voltage_config(struct device *dev,
 	if (init_data->constraints.boot_on)
 		config->enabled_at_boot = true;
 
-	config->gpio = of_get_named_gpio(np, "gpio", 0);
-	if ((config->gpio < 0) && (config->gpio != -ENOENT))
-		return ERR_PTR(config->gpio);
-
 	of_property_read_u32(np, "startup-delay-us", &config->startup_delay);
 
 	config->enable_high = of_property_read_bool(np, "enable-active-high");
@@ -102,6 +97,7 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 	struct fixed_voltage_config *config;
 	struct fixed_voltage_data *drvdata;
 	struct regulator_config cfg = { };
+	enum gpiod_flags gflags;
 	int ret;
 
 	drvdata = devm_kzalloc(&pdev->dev, sizeof(struct fixed_voltage_data),
@@ -150,25 +146,28 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 
 	drvdata->desc.fixed_uV = config->microvolts;
 
-	if (gpio_is_valid(config->gpio)) {
-		cfg.ena_gpio = config->gpio;
-		if (pdev->dev.of_node)
-			cfg.ena_gpio_initialized = true;
-	}
 	cfg.ena_gpio_invert = !config->enable_high;
 	if (config->enabled_at_boot) {
 		if (config->enable_high)
-			cfg.ena_gpio_flags |= GPIOF_OUT_INIT_HIGH;
+			gflags = GPIOD_OUT_HIGH;
 		else
-			cfg.ena_gpio_flags |= GPIOF_OUT_INIT_LOW;
+			gflags = GPIOD_OUT_LOW;
 	} else {
 		if (config->enable_high)
-			cfg.ena_gpio_flags |= GPIOF_OUT_INIT_LOW;
+			gflags = GPIOD_OUT_LOW;
 		else
-			cfg.ena_gpio_flags |= GPIOF_OUT_INIT_HIGH;
+			gflags = GPIOD_OUT_HIGH;
 	}
-	if (config->gpio_is_open_drain)
-		cfg.ena_gpio_flags |= GPIOF_OPEN_DRAIN;
+	if (config->gpio_is_open_drain) {
+		if (gflags == GPIOD_OUT_HIGH)
+			gflags = GPIOD_OUT_HIGH_OPEN_DRAIN;
+		else
+			gflags = GPIOD_OUT_LOW_OPEN_DRAIN;
+	}
+
+	cfg.ena_gpiod = devm_gpiod_get_optional(&pdev->dev, NULL, gflags);
+	if (IS_ERR(cfg.ena_gpiod))
+		return PTR_ERR(cfg.ena_gpiod);
 
 	cfg.dev = &pdev->dev;
 	cfg.init_data = config->init_data;
diff --git a/include/linux/regulator/fixed.h b/include/linux/regulator/fixed.h
index 48918be649d4..1a4340ed8e2b 100644
--- a/include/linux/regulator/fixed.h
+++ b/include/linux/regulator/fixed.h
@@ -24,8 +24,6 @@ struct regulator_init_data;
  * @supply_name:	Name of the regulator supply
  * @input_supply:	Name of the input regulator supply
  * @microvolts:		Output voltage of regulator
- * @gpio:		GPIO to use for enable control
- * 			set to -EINVAL if not used
  * @startup_delay:	Start-up time in microseconds
  * @gpio_is_open_drain: Gpio pin is open drain or normal type.
  *			If it is open drain type then HIGH will be set
@@ -49,7 +47,6 @@ struct fixed_voltage_config {
 	const char *supply_name;
 	const char *input_supply;
 	int microvolts;
-	int gpio;
 	unsigned startup_delay;
 	unsigned gpio_is_open_drain:1;
 	unsigned enable_high:1;
-- 
cgit v1.2.3


From 1ccce46c5e8b8a0d2606fb8bb72bff069ffdc3ab Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Tue, 14 Aug 2018 17:14:30 -0700
Subject: PCI: Remove unused NFP32xx IDs

Defines for NFP32xx are no longer used anywhere, remove them.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci_ids.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index d157983b84cf..f4e278493f5b 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2539,8 +2539,6 @@
 #define PCI_VENDOR_ID_HUAWEI         	0x19e5
 
 #define PCI_VENDOR_ID_NETRONOME		0x19ee
-#define PCI_DEVICE_ID_NETRONOME_NFP3200	0x3200
-#define PCI_DEVICE_ID_NETRONOME_NFP3240	0x3240
 #define PCI_DEVICE_ID_NETRONOME_NFP4000	0x4000
 #define PCI_DEVICE_ID_NETRONOME_NFP5000	0x5000
 #define PCI_DEVICE_ID_NETRONOME_NFP6000	0x6000
-- 
cgit v1.2.3


From 6a5f95b5a4f4ff29e4071bc5b95f8f3a2aef046b Mon Sep 17 00:00:00 2001
From: Fieah Lim <kw@fieahl.im>
Date: Tue, 11 Sep 2018 03:59:01 +0800
Subject: cpuidle: Remove unnecessary wrapper cpuidle_get_last_residency()

cpuidle_get_last_residency() is just a wrapper for retrieving
the last_residency member of struct cpuidle_device.  It's also
weirdly the only wrapper function for accessing cpuidle_* struct
member (by my best guess is it could be a leftover from v2.x).

Anyhow, since the only two users (the ladder and menu governors)
can access dev->last_residency directly, and it's more intuitive to
do it that way, let's just get rid of the wrapper.

This patch tidies up CPU idle code a bit without functional changes.

Signed-off-by: Fieah Lim <kw@fieahl.im>
[ rjw: Changelog cleanup ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/ladder.c |  2 +-
 drivers/cpuidle/governors/menu.c   |  2 +-
 include/linux/cpuidle.h            | 10 ----------
 3 files changed, 2 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 704880a6612a..f0dddc66af26 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -80,7 +80,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 
 	last_state = &ldev->states[last_idx];
 
-	last_residency = cpuidle_get_last_residency(dev) - drv->states[last_idx].exit_latency;
+	last_residency = dev->last_residency - drv->states[last_idx].exit_latency;
 
 	/* consider promotion */
 	if (last_idx < drv->state_count - 1 &&
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index e26a40971b26..021b08dd139c 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -514,7 +514,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		measured_us = 9 * MAX_INTERESTING / 10;
 	} else {
 		/* measured value */
-		measured_us = cpuidle_get_last_residency(dev);
+		measured_us = dev->last_residency;
 
 		/* Deduct exit latency */
 		if (measured_us > 2 * target->exit_latency)
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 4325d6fdde9b..d262f620890e 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -99,16 +99,6 @@ struct cpuidle_device {
 DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
 DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
 
-/**
- * cpuidle_get_last_residency - retrieves the last state's residency time
- * @dev: the target CPU
- */
-static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
-{
-	return dev->last_residency;
-}
-
-
 /****************************
  * CPUIDLE DRIVER INTERFACE *
  ****************************/
-- 
cgit v1.2.3


From 10c63443b74d1ef5c1b3bb104a9e6e40dc2437ff Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Aug 2018 14:54:03 +0200
Subject: Revert "serial: sh-sci: Remove SCIx_RZ_SCIFA_REGTYPE"

This reverts commit 7acece71a517cad83a0842a94d94c13f271b680c.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 31 +++++++++++++++++++++++++++++++
 include/linux/serial_sci.h  |  1 +
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index ac4424bf6b13..5d42c9a63001 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -291,6 +291,33 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 		.error_clear = SCIF_ERROR_CLEAR,
 	},
 
+	/*
+	 * The "SCIFA" that is in RZ/T and RZ/A2.
+	 * It looks like a normal SCIF with FIFO data, but with a
+	 * compressed address space. Also, the break out of interrupts
+	 * are different: ERI/BRI, RXI, TXI, TEI, DRI.
+	 */
+	[SCIx_RZ_SCIFA_REGTYPE] = {
+		.regs = {
+			[SCSMR]		= { 0x00, 16 },
+			[SCBRR]		= { 0x02,  8 },
+			[SCSCR]		= { 0x04, 16 },
+			[SCxTDR]	= { 0x06,  8 },
+			[SCxSR]		= { 0x08, 16 },
+			[SCxRDR]	= { 0x0A,  8 },
+			[SCFCR]		= { 0x0C, 16 },
+			[SCFDR]		= { 0x0E, 16 },
+			[SCSPTR]	= { 0x10, 16 },
+			[SCLSR]		= { 0x12, 16 },
+		},
+		.fifosize = 16,
+		.overrun_reg = SCLSR,
+		.overrun_mask = SCLSR_ORER,
+		.sampling_rate_mask = SCI_SR(32),
+		.error_mask = SCIF_DEFAULT_ERROR_MASK,
+		.error_clear = SCIF_ERROR_CLEAR,
+	},
+
 	/*
 	 * Common SH-3 SCIF definitions.
 	 */
@@ -3110,6 +3137,10 @@ static const struct of_device_id of_sci_match[] = {
 		.compatible = "renesas,scif-r7s72100",
 		.data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE),
 	},
+	{
+		.compatible = "renesas,scif-r7s9210",
+		.data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE),
+	},
 	/* Family-specific types */
 	{
 		.compatible = "renesas,rcar-gen1-scif",
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index c0e795d95477..1c89611e0e06 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,6 +36,7 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
+	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3


From 76f99ae5b54d48430d1f0c5512a84da0ff9761e0 Mon Sep 17 00:00:00 2001
From: Dou Liyang <douly.fnst@cn.fujitsu.com>
Date: Sun, 9 Sep 2018 01:58:38 +0800
Subject: irq/matrix: Spread managed interrupts on allocation

Linux spreads out the non managed interrupt across the possible target CPUs
to avoid vector space exhaustion.

Managed interrupts are treated differently, as for them the vectors are
reserved (with guarantee) when the interrupt descriptors are initialized.

When the interrupt is requested a real vector is assigned. The assignment
logic uses the first CPU in the affinity mask for assignment. If the
interrupt has more than one CPU in the affinity mask, which happens when a
multi queue device has less queues than CPUs, then doing the same search as
for non managed interrupts makes sense as it puts the interrupt on the
least interrupt plagued CPU. For single CPU affine vectors that's obviously
a NOOP.

Restructre the matrix allocation code so it does the 'best CPU' search, add
the sanity check for an empty affinity mask and adapt the call site in the
x86 vector management code.

[ tglx: Added the empty mask check to the core and improved change log ]

Signed-off-by: Dou Liyang <douly.fnst@cn.fujitsu.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: hpa@zytor.com
Link: https://lkml.kernel.org/r/20180908175838.14450-2-dou_liyang@163.com
---
 arch/x86/kernel/apic/vector.c |  9 ++++-----
 include/linux/irq.h           |  3 ++-
 kernel/irq/matrix.c           | 17 ++++++++++++++---
 3 files changed, 20 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 7654febd5102..652e7ffa9b9d 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -313,14 +313,13 @@ assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
 	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	int vector, cpu;
 
-	cpumask_and(vector_searchmask, vector_searchmask, affmsk);
-	cpu = cpumask_first(vector_searchmask);
-	if (cpu >= nr_cpu_ids)
-		return -EINVAL;
+	cpumask_and(vector_searchmask, dest, affmsk);
+
 	/* set_affinity might call here for nothing */
 	if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
 		return 0;
-	vector = irq_matrix_alloc_managed(vector_matrix, cpu);
+	vector = irq_matrix_alloc_managed(vector_matrix, vector_searchmask,
+					  &cpu);
 	trace_vector_alloc_managed(irqd->irq, vector, vector);
 	if (vector < 0)
 		return vector;
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 201de12a9957..c9bffda04a45 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -1151,7 +1151,8 @@ void irq_matrix_offline(struct irq_matrix *m);
 void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, bool replace);
 int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk);
 void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk);
-int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu);
+int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk,
+				unsigned int *mapped_cpu);
 void irq_matrix_reserve(struct irq_matrix *m);
 void irq_matrix_remove_reserved(struct irq_matrix *m);
 int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk,
diff --git a/kernel/irq/matrix.c b/kernel/irq/matrix.c
index 67768bbe736e..6e6d467f3dec 100644
--- a/kernel/irq/matrix.c
+++ b/kernel/irq/matrix.c
@@ -260,11 +260,21 @@ void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk)
  * @m:		Matrix pointer
  * @cpu:	On which CPU the interrupt should be allocated
  */
-int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu)
+int irq_matrix_alloc_managed(struct irq_matrix *m, const struct cpumask *msk,
+			     unsigned int *mapped_cpu)
 {
-	struct cpumap *cm = per_cpu_ptr(m->maps, cpu);
-	unsigned int bit, end = m->alloc_end;
+	unsigned int bit, cpu, end = m->alloc_end;
+	struct cpumap *cm;
+
+	if (cpumask_empty(msk))
+		return -EINVAL;
 
+	cpu = matrix_find_best_cpu(m, msk);
+	if (cpu == UINT_MAX)
+		return -ENOSPC;
+
+	cm = per_cpu_ptr(m->maps, cpu);
+	end = m->alloc_end;
 	/* Get managed bit which are not allocated */
 	bitmap_andnot(m->scratch_map, cm->managed_map, cm->alloc_map, end);
 	bit = find_first_bit(m->scratch_map, end);
@@ -273,6 +283,7 @@ int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu)
 	set_bit(bit, cm->alloc_map);
 	cm->allocated++;
 	m->total_allocated++;
+	*mapped_cpu = cpu;
 	trace_irq_matrix_alloc_managed(bit, cpu, m, cm);
 	return bit;
 }
-- 
cgit v1.2.3


From 4372ea94d40c5676814fc6d815a64caed963cb9f Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Wed, 18 Apr 2018 10:00:47 -0500
Subject: ipmi: Finally get rid of ipmi_user_t and ipmi_smi_t

All the users have been removed, we can remove the typedefs.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 include/linux/ipmi.h     | 2 +-
 include/linux/ipmi_smi.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 41f5c086f670..ef61676cfe05 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -27,7 +27,7 @@ struct device;
  * Opaque type for a IPMI message user.  One of these is needed to
  * send and receive messages.
  */
-typedef struct ipmi_user *ipmi_user_t;
+struct ipmi_user;
 
 /*
  * Stuff coming from the receive interface comes as one of these.
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 7d5fd38d5282..8c4e2ab696c3 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -28,7 +28,7 @@ struct device;
  */
 
 /* Structure for the low-level drivers. */
-typedef struct ipmi_smi *ipmi_smi_t;
+struct ipmi_smi;
 
 /*
  * Messages to/from the lower layer.  The smi interface will take one
-- 
cgit v1.2.3


From 05c3d056086a6217a77937b7fa0df35ec75715e6 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Mon, 26 Feb 2018 12:49:16 -0600
Subject: pci:ipmi: Move IPMI PCI class id defines to pci_ids.h

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/char/ipmi/ipmi_si_pci.c | 5 -----
 include/linux/pci_ids.h         | 4 ++++
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/ipmi/ipmi_si_pci.c b/drivers/char/ipmi/ipmi_si_pci.c
index f54ca6869ed2..bdba12852a90 100644
--- a/drivers/char/ipmi/ipmi_si_pci.c
+++ b/drivers/char/ipmi/ipmi_si_pci.c
@@ -18,11 +18,6 @@ module_param_named(trypci, si_trypci, bool, 0);
 MODULE_PARM_DESC(trypci, "Setting this to zero will disable the"
 		 " default scan of the interfaces identified via pci");
 
-#define PCI_CLASS_SERIAL_IPMI		0x0c07
-#define PCI_CLASS_SERIAL_IPMI_SMIC	0x0c0700
-#define PCI_CLASS_SERIAL_IPMI_KCS	0x0c0701
-#define PCI_CLASS_SERIAL_IPMI_BT	0x0c0702
-
 #define PCI_DEVICE_ID_HP_MMC 0x121A
 
 static void ipmi_pci_cleanup(struct si_sm_io *io)
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index d157983b84cf..d4afd8086ed9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -117,6 +117,10 @@
 #define PCI_CLASS_SERIAL_USB_DEVICE	0x0c03fe
 #define PCI_CLASS_SERIAL_FIBER		0x0c04
 #define PCI_CLASS_SERIAL_SMBUS		0x0c05
+#define PCI_CLASS_SERIAL_IPMI		0x0c07
+#define PCI_CLASS_SERIAL_IPMI_SMIC	0x0c0700
+#define PCI_CLASS_SERIAL_IPMI_KCS	0x0c0701
+#define PCI_CLASS_SERIAL_IPMI_BT	0x0c0702
 
 #define PCI_BASE_CLASS_WIRELESS			0x0d
 #define PCI_CLASS_WIRELESS_RF_CONTROLLER	0x0d10
-- 
cgit v1.2.3


From 81c4b5bf30de01a0f6b43ccaa1d220f4a0a5d99c Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 8 Sep 2018 09:59:01 +0200
Subject: PCI: hotplug: Constify hotplug_slot_ops

Hotplug drivers cannot declare their hotplug_slot_ops const, making them
attractive targets for attackers, because upon registration of a hotplug
slot, __pci_hp_initialize() writes to the "owner" and "mod_name" members
in that struct.

Fix by moving these members to struct hotplug_slot and constify every
driver's hotplug_slot_ops except for pciehp.

pciehp constructs its hotplug_slot_ops at runtime based on the PCIe
port's capabilities, hence cannot declare them const.  It can be
converted to __write_rarely once that's mainlined:
http://www.openwall.com/lists/kernel-hardening/2016/11/16/3

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>  # drivers/pci/hotplug/rpa*
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86
Cc: Len Brown <lenb@kernel.org>
Cc: Scott Murray <scott@spiteful.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Oliver OHalloran <oliveroh@au1.ibm.com>
Cc: Gavin Shan <gwshan@linux.vnet.ibm.com>
Cc: Sebastian Ott <sebott@linux.vnet.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Corentin Chary <corentin.chary@gmail.com>
Cc: Darren Hart <dvhart@infradead.org>
---
 drivers/pci/hotplug/acpiphp_core.c      |  2 +-
 drivers/pci/hotplug/cpci_hotplug_core.c |  2 +-
 drivers/pci/hotplug/cpqphp_core.c       |  2 +-
 drivers/pci/hotplug/ibmphp.h            |  2 +-
 drivers/pci/hotplug/ibmphp_core.c       |  2 +-
 drivers/pci/hotplug/pci_hotplug_core.c  | 27 ++++++++++++++-------------
 drivers/pci/hotplug/pnv_php.c           |  2 +-
 drivers/pci/hotplug/rpaphp.h            |  2 +-
 drivers/pci/hotplug/rpaphp_core.c       |  2 +-
 drivers/pci/hotplug/s390_pci_hpc.c      |  2 +-
 drivers/pci/hotplug/sgi_hotplug.c       |  2 +-
 drivers/pci/hotplug/shpchp_core.c       |  2 +-
 drivers/pci/pci.c                       |  4 ++--
 drivers/pci/slot.c                      |  2 +-
 drivers/platform/x86/asus-wmi.c         |  3 +--
 drivers/platform/x86/eeepc-laptop.c     |  3 +--
 include/linux/pci_hotplug.h             | 10 +++++-----
 17 files changed, 35 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index ad32ffbc4b91..e883cef0f3bc 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -57,7 +57,7 @@ static int get_attention_status(struct hotplug_slot *slot, u8 *value);
 static int get_latch_status(struct hotplug_slot *slot, u8 *value);
 static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
 
-static struct hotplug_slot_ops acpi_hotplug_slot_ops = {
+static const struct hotplug_slot_ops acpi_hotplug_slot_ops = {
 	.enable_slot		= enable_slot,
 	.disable_slot		= disable_slot,
 	.set_attention_status	= set_attention_status,
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index 52a339baf06c..97c32e4c74c8 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -57,7 +57,7 @@ static int get_attention_status(struct hotplug_slot *slot, u8 *value);
 static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
 static int get_latch_status(struct hotplug_slot *slot, u8 *value);
 
-static struct hotplug_slot_ops cpci_hotplug_slot_ops = {
+static const struct hotplug_slot_ops cpci_hotplug_slot_ops = {
 	.enable_slot = enable_slot,
 	.disable_slot = disable_slot,
 	.set_attention_status = set_attention_status,
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 5a06636e910a..3409b62fceac 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -560,7 +560,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-static struct hotplug_slot_ops cpqphp_hotplug_slot_ops = {
+static const struct hotplug_slot_ops cpqphp_hotplug_slot_ops = {
 	.set_attention_status =	set_attention_status,
 	.enable_slot =		process_SI,
 	.disable_slot =		process_SS,
diff --git a/drivers/pci/hotplug/ibmphp.h b/drivers/pci/hotplug/ibmphp.h
index fddb78606c74..db387e10581e 100644
--- a/drivers/pci/hotplug/ibmphp.h
+++ b/drivers/pci/hotplug/ibmphp.h
@@ -740,7 +740,7 @@ int ibmphp_do_disable_slot(struct slot *slot_cur);
 int ibmphp_update_slot_info(struct slot *);	/* This function is called from HPC, so we need it to not be be static */
 int ibmphp_configure_card(struct pci_func *, u8);
 int ibmphp_unconfigure_card(struct slot **, int);
-extern struct hotplug_slot_ops ibmphp_hotplug_slot_ops;
+extern const struct hotplug_slot_ops ibmphp_hotplug_slot_ops;
 
 #endif				//__IBMPHP_H
 
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 4ea57e9019f1..b82fdc17040d 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -1259,7 +1259,7 @@ error:
 	goto exit;
 }
 
-struct hotplug_slot_ops ibmphp_hotplug_slot_ops = {
+const struct hotplug_slot_ops ibmphp_hotplug_slot_ops = {
 	.set_attention_status =		set_attention_status,
 	.enable_slot =			enable_slot,
 	.disable_slot =			ibmphp_disable_slot,
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index 90fde5f106d8..ede2ed6f4ce0 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -49,15 +49,15 @@ static DEFINE_MUTEX(pci_hp_mutex);
 #define GET_STATUS(name, type)	\
 static int get_##name(struct hotplug_slot *slot, type *value)		\
 {									\
-	struct hotplug_slot_ops *ops = slot->ops;			\
+	const struct hotplug_slot_ops *ops = slot->ops;			\
 	int retval = 0;							\
-	if (!try_module_get(ops->owner))				\
+	if (!try_module_get(slot->owner))				\
 		return -ENODEV;						\
 	if (ops->get_##name)						\
 		retval = ops->get_##name(slot, value);			\
 	else								\
 		*value = slot->info->name;				\
-	module_put(ops->owner);						\
+	module_put(slot->owner);					\
 	return retval;							\
 }
 
@@ -90,7 +90,7 @@ static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf,
 	power = (u8)(lpower & 0xff);
 	dbg("power = %d\n", power);
 
-	if (!try_module_get(slot->ops->owner)) {
+	if (!try_module_get(slot->owner)) {
 		retval = -ENODEV;
 		goto exit;
 	}
@@ -109,7 +109,7 @@ static ssize_t power_write_file(struct pci_slot *pci_slot, const char *buf,
 		err("Illegal value specified for power\n");
 		retval = -EINVAL;
 	}
-	module_put(slot->ops->owner);
+	module_put(slot->owner);
 
 exit:
 	if (retval)
@@ -138,7 +138,8 @@ static ssize_t attention_read_file(struct pci_slot *pci_slot, char *buf)
 static ssize_t attention_write_file(struct pci_slot *pci_slot, const char *buf,
 				    size_t count)
 {
-	struct hotplug_slot_ops *ops = pci_slot->hotplug->ops;
+	struct hotplug_slot *slot = pci_slot->hotplug;
+	const struct hotplug_slot_ops *ops = slot->ops;
 	unsigned long lattention;
 	u8 attention;
 	int retval = 0;
@@ -147,13 +148,13 @@ static ssize_t attention_write_file(struct pci_slot *pci_slot, const char *buf,
 	attention = (u8)(lattention & 0xff);
 	dbg(" - attention = %d\n", attention);
 
-	if (!try_module_get(ops->owner)) {
+	if (!try_module_get(slot->owner)) {
 		retval = -ENODEV;
 		goto exit;
 	}
 	if (ops->set_attention_status)
-		retval = ops->set_attention_status(pci_slot->hotplug, attention);
-	module_put(ops->owner);
+		retval = ops->set_attention_status(slot, attention);
+	module_put(slot->owner);
 
 exit:
 	if (retval)
@@ -213,13 +214,13 @@ static ssize_t test_write_file(struct pci_slot *pci_slot, const char *buf,
 	test = (u32)(ltest & 0xffffffff);
 	dbg("test = %d\n", test);
 
-	if (!try_module_get(slot->ops->owner)) {
+	if (!try_module_get(slot->owner)) {
 		retval = -ENODEV;
 		goto exit;
 	}
 	if (slot->ops->hardware_test)
 		retval = slot->ops->hardware_test(slot, test);
-	module_put(slot->ops->owner);
+	module_put(slot->owner);
 
 exit:
 	if (retval)
@@ -447,8 +448,8 @@ int __pci_hp_initialize(struct hotplug_slot *slot, struct pci_bus *bus,
 	if ((slot->info == NULL) || (slot->ops == NULL))
 		return -EINVAL;
 
-	slot->ops->owner = owner;
-	slot->ops->mod_name = mod_name;
+	slot->owner = owner;
+	slot->mod_name = mod_name;
 
 	/*
 	 * No problems if we call this interface from both ACPI_PCI_SLOT
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 3276a5e4c430..12b92a0ff688 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -530,7 +530,7 @@ static int pnv_php_disable_slot(struct hotplug_slot *slot)
 	return ret;
 }
 
-static struct hotplug_slot_ops php_slot_ops = {
+static const struct hotplug_slot_ops php_slot_ops = {
 	.get_power_status	= pnv_php_get_power_state,
 	.get_adapter_status	= pnv_php_get_adapter_state,
 	.set_attention_status	= pnv_php_set_attention_state,
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index c8311724bd76..f83347819f7b 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -70,7 +70,7 @@ struct slot {
 	struct hotplug_slot *hotplug_slot;
 };
 
-extern struct hotplug_slot_ops rpaphp_hotplug_slot_ops;
+extern const struct hotplug_slot_ops rpaphp_hotplug_slot_ops;
 extern struct list_head rpaphp_slot_head;
 
 /* function prototypes */
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 857c358b727b..8620a3f8c987 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -477,7 +477,7 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
 	return 0;
 }
 
-struct hotplug_slot_ops rpaphp_hotplug_slot_ops = {
+const struct hotplug_slot_ops rpaphp_hotplug_slot_ops = {
 	.enable_slot = enable_slot,
 	.disable_slot = disable_slot,
 	.set_attention_status = set_attention_status,
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index 93b5341d282c..5bd45fd4a92a 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -130,7 +130,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 	return 0;
 }
 
-static struct hotplug_slot_ops s390_hotplug_slot_ops = {
+static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
 	.enable_slot =		enable_slot,
 	.disable_slot =		disable_slot,
 	.get_power_status =	get_power_status,
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index babd23409f61..af4c28c574dd 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -80,7 +80,7 @@ static int enable_slot(struct hotplug_slot *slot);
 static int disable_slot(struct hotplug_slot *slot);
 static inline int get_power_status(struct hotplug_slot *slot, u8 *value);
 
-static struct hotplug_slot_ops sn_hotplug_slot_ops = {
+static const struct hotplug_slot_ops sn_hotplug_slot_ops = {
 	.enable_slot            = enable_slot,
 	.disable_slot           = disable_slot,
 	.get_power_status       = get_power_status,
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 97cee23f3d51..26cbea04237c 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -51,7 +51,7 @@ static int get_attention_status(struct hotplug_slot *slot, u8 *value);
 static int get_latch_status(struct hotplug_slot *slot, u8 *value);
 static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
 
-static struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
+static const struct hotplug_slot_ops shpchp_hotplug_slot_ops = {
 	.set_attention_status =	set_attention_status,
 	.enable_slot =		enable_slot,
 	.disable_slot =		disable_slot,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1835f3a7aa8d..0e54588825cb 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4571,13 +4571,13 @@ static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, int probe)
 {
 	int rc = -ENOTTY;
 
-	if (!hotplug || !try_module_get(hotplug->ops->owner))
+	if (!hotplug || !try_module_get(hotplug->owner))
 		return rc;
 
 	if (hotplug->ops->reset_slot)
 		rc = hotplug->ops->reset_slot(hotplug, probe);
 
-	module_put(hotplug->ops->owner);
+	module_put(hotplug->owner);
 
 	return rc;
 }
diff --git a/drivers/pci/slot.c b/drivers/pci/slot.c
index e634229ece89..145cd953b518 100644
--- a/drivers/pci/slot.c
+++ b/drivers/pci/slot.c
@@ -371,7 +371,7 @@ void pci_hp_create_module_link(struct pci_slot *pci_slot)
 
 	if (!slot || !slot->ops)
 		return;
-	kobj = kset_find_obj(module_kset, slot->ops->mod_name);
+	kobj = kset_find_obj(module_kset, slot->mod_name);
 	if (!kobj)
 		return;
 	ret = sysfs_create_link(&pci_slot->kobj, kobj, "module");
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 2d6e272315a8..a8aa2eadfd82 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -868,8 +868,7 @@ static int asus_get_adapter_status(struct hotplug_slot *hotplug_slot,
 	return 0;
 }
 
-static struct hotplug_slot_ops asus_hotplug_slot_ops = {
-	.owner = THIS_MODULE,
+static const struct hotplug_slot_ops asus_hotplug_slot_ops = {
 	.get_adapter_status = asus_get_adapter_status,
 	.get_power_status = asus_get_adapter_status,
 };
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index a4bbf6ecd1f0..41a364376e91 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -726,8 +726,7 @@ static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot,
 	return 0;
 }
 
-static struct hotplug_slot_ops eeepc_hotplug_slot_ops = {
-	.owner = THIS_MODULE,
+static const struct hotplug_slot_ops eeepc_hotplug_slot_ops = {
 	.get_adapter_status = eeepc_get_adapter_status,
 	.get_power_status = eeepc_get_adapter_status,
 };
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index a6d6650a0490..372dbe95c207 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -16,8 +16,6 @@
 
 /**
  * struct hotplug_slot_ops -the callbacks that the hotplug pci core can use
- * @owner: The module owner of this structure
- * @mod_name: The module name (KBUILD_MODNAME) of this structure
  * @enable_slot: Called when the user wants to enable a specific pci slot
  * @disable_slot: Called when the user wants to disable a specific pci slot
  * @set_attention_status: Called to set the specific slot's attention LED to
@@ -46,8 +44,6 @@
  * set an LED, enable / disable power, etc.)
  */
 struct hotplug_slot_ops {
-	struct module *owner;
-	const char *mod_name;
 	int (*enable_slot)		(struct hotplug_slot *slot);
 	int (*disable_slot)		(struct hotplug_slot *slot);
 	int (*set_attention_status)	(struct hotplug_slot *slot, u8 value);
@@ -82,15 +78,19 @@ struct hotplug_slot_info {
  * this slot.
  * @private: used by the hotplug pci controller driver to store whatever it
  * needs.
+ * @owner: The module owner of this structure
+ * @mod_name: The module name (KBUILD_MODNAME) of this structure
  */
 struct hotplug_slot {
-	struct hotplug_slot_ops		*ops;
+	const struct hotplug_slot_ops	*ops;
 	struct hotplug_slot_info	*info;
 	void				*private;
 
 	/* Variables below this are for use only by the hotplug pci core. */
 	struct list_head		slot_list;
 	struct pci_slot			*pci_slot;
+	struct module			*owner;
+	const char			*mod_name;
 };
 
 static inline const char *hotplug_slot_name(const struct hotplug_slot *slot)
-- 
cgit v1.2.3


From a7da21613c4efcd4cc0235e6a30bec96ae47c619 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 8 Sep 2018 09:59:01 +0200
Subject: PCI: hotplug: Drop hotplug_slot_info

Ever since the PCI hotplug core was introduced in 2002, drivers had to
allocate and register a struct hotplug_slot_info for every slot:
https://git.kernel.org/tglx/history/c/a8a2069f432c

Apparently the idea was that drivers furnish the hotplug core with an
up-to-date card presence status, power status, latch status and
attention indicator status as well as notify the hotplug core of changes
thereof.  However only 4 out of 12 hotplug drivers bother to notify the
hotplug core with pci_hp_change_slot_info() and the hotplug core never
made any use of the information:  There is just a single macro in
pci_hotplug_core.c, GET_STATUS(), which uses the hotplug_slot_info if
the driver lacks the corresponding callback in hotplug_slot_ops.  The
macro is called when the user reads the attribute via sysfs.

Now, if the callback isn't defined, the attribute isn't exposed in sysfs
in the first place (see e.g. has_power_file()).  There are only two
situations when the hotplug_slot_info would actually be accessed:

* If the driver defines ->enable_slot or ->disable_slot but not
  ->get_power_status.

* If the driver defines ->set_attention_status but not
  ->get_attention_status.

There is no driver doing the former and just a single driver doing the
latter, namely pnv_php.c.  Amend it with a ->get_attention_status
callback.  With that, the hotplug_slot_info becomes completely unused by
the PCI hotplug core.  But a few drivers use it internally as a cache:

cpcihp uses it to cache the latch_status and adapter_status.
cpqhp uses it to cache the adapter_status.
pnv_php and rpaphp use it to cache the attention_status.
shpchp uses it to cache all four values.

Amend these drivers to cache the information in their private slot
struct.  shpchp's slot struct already contains members to cache the
power_status and adapter_status, so additional members are only needed
for the other two values.  In the case of cpqphp, the cached value is
only accessed in a single place, so instead of caching it, read the
current value from the hardware.

Caution:  acpiphp, cpci, cpqhp, shpchp, asus-wmi and eeepc-laptop
populate the hotplug_slot_info with initial values on probe.  That code
is herewith removed.  There is a theoretical chance that the code has
side effects without which the driver fails to function, e.g. if the
ACPI method to read the adapter status needs to be executed at least
once on probe.  That seems unlikely to me, still maintainers should
review the changes carefully for this possibility.

Rafael adds: "I'm not aware of any case in which it will break anything,
[...] but if that happens, it may be necessary to add the execution of
the control methods in question directly to the initialization part."

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>  # drivers/pci/hotplug/rpa*
Acked-by: Sebastian Ott <sebott@linux.ibm.com>        # drivers/pci/hotplug/s390*
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86
Cc: Len Brown <lenb@kernel.org>
Cc: Scott Murray <scott@spiteful.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Oliver OHalloran <oliveroh@au1.ibm.com>
Cc: Gavin Shan <gwshan@linux.vnet.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Corentin Chary <corentin.chary@gmail.com>
Cc: Darren Hart <dvhart@infradead.org>
---
 arch/powerpc/include/asm/pnv-pci.h      |  2 +-
 drivers/pci/hotplug/acpiphp.h           |  1 -
 drivers/pci/hotplug/acpiphp_core.c      |  6 ---
 drivers/pci/hotplug/cpci_hotplug.h      |  2 +
 drivers/pci/hotplug/cpci_hotplug_core.c | 72 +++++++--------------------------
 drivers/pci/hotplug/cpqphp_core.c       | 22 +---------
 drivers/pci/hotplug/cpqphp_ctrl.c       | 31 +-------------
 drivers/pci/hotplug/ibmphp_core.c       | 27 +------------
 drivers/pci/hotplug/ibmphp_ebda.c       | 33 ---------------
 drivers/pci/hotplug/pci_hotplug_core.c  | 26 +-----------
 drivers/pci/hotplug/pciehp_core.c       |  8 ----
 drivers/pci/hotplug/pnv_php.c           | 24 ++++++++---
 drivers/pci/hotplug/rpaphp.h            |  1 +
 drivers/pci/hotplug/rpaphp_core.c       |  4 +-
 drivers/pci/hotplug/rpaphp_pci.c        | 11 +----
 drivers/pci/hotplug/rpaphp_slot.c       |  9 +----
 drivers/pci/hotplug/s390_pci_hpc.c      | 12 ------
 drivers/pci/hotplug/sgi_hotplug.c       |  9 -----
 drivers/pci/hotplug/shpchp.h            |  2 +
 drivers/pci/hotplug/shpchp_core.c       | 31 +++++---------
 drivers/pci/hotplug/shpchp_ctrl.c       | 21 +++-------
 drivers/platform/x86/asus-wmi.c         | 10 -----
 drivers/platform/x86/eeepc-laptop.c     | 10 -----
 include/linux/pci_hotplug.h             | 30 --------------
 24 files changed, 64 insertions(+), 340 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h
index 7f627e3f4da4..630eb8b1b7ed 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -54,7 +54,6 @@ void pnv_cxl_release_hwirq_ranges(struct cxl_irq_ranges *irqs,
 
 struct pnv_php_slot {
 	struct hotplug_slot		slot;
-	struct hotplug_slot_info	slot_info;
 	uint64_t			id;
 	char				*name;
 	int				slot_no;
@@ -72,6 +71,7 @@ struct pnv_php_slot {
 	struct pci_dev			*pdev;
 	struct pci_bus			*bus;
 	bool				power_state_check;
+	u8				attention_state;
 	void				*fdt;
 	void				*dt;
 	struct of_changeset		ocs;
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index e438a2d734f2..8377e736ea69 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -35,7 +35,6 @@ struct acpiphp_slot;
 struct slot {
 	struct hotplug_slot	*hotplug_slot;
 	struct acpiphp_slot	*acpi_slot;
-	struct hotplug_slot_info info;
 	unsigned int sun;	/* ACPI _SUN (Slot User Number) value */
 };
 
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index e883cef0f3bc..abd4f8d7e16a 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -270,16 +270,10 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 	if (!slot->hotplug_slot)
 		goto error_slot;
 
-	slot->hotplug_slot->info = &slot->info;
-
 	slot->hotplug_slot->private = slot;
 	slot->hotplug_slot->ops = &acpi_hotplug_slot_ops;
 
 	slot->acpi_slot = acpiphp_slot;
-	slot->hotplug_slot->info->power_status = acpiphp_get_power_status(slot->acpi_slot);
-	slot->hotplug_slot->info->attention_status = 0;
-	slot->hotplug_slot->info->latch_status = acpiphp_get_latch_status(slot->acpi_slot);
-	slot->hotplug_slot->info->adapter_status = acpiphp_get_adapter_status(slot->acpi_slot);
 
 	acpiphp_slot->slot = slot;
 	slot->sun = sun;
diff --git a/drivers/pci/hotplug/cpci_hotplug.h b/drivers/pci/hotplug/cpci_hotplug.h
index 4658557be01a..a35f40a2290c 100644
--- a/drivers/pci/hotplug/cpci_hotplug.h
+++ b/drivers/pci/hotplug/cpci_hotplug.h
@@ -32,6 +32,8 @@ struct slot {
 	unsigned int devfn;
 	struct pci_bus *bus;
 	struct pci_dev *dev;
+	unsigned int latch_status:1;
+	unsigned int adapter_status:1;
 	unsigned int extracting;
 	struct hotplug_slot *hotplug_slot;
 	struct list_head slot_list;
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index 97c32e4c74c8..a17fb24c28cd 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -67,26 +67,6 @@ static const struct hotplug_slot_ops cpci_hotplug_slot_ops = {
 	.get_latch_status = get_latch_status,
 };
 
-static int
-update_latch_status(struct hotplug_slot *hotplug_slot, u8 value)
-{
-	struct hotplug_slot_info info;
-
-	memcpy(&info, hotplug_slot->info, sizeof(struct hotplug_slot_info));
-	info.latch_status = value;
-	return pci_hp_change_slot_info(hotplug_slot, &info);
-}
-
-static int
-update_adapter_status(struct hotplug_slot *hotplug_slot, u8 value)
-{
-	struct hotplug_slot_info info;
-
-	memcpy(&info, hotplug_slot->info, sizeof(struct hotplug_slot_info));
-	info.adapter_status = value;
-	return pci_hp_change_slot_info(hotplug_slot, &info);
-}
-
 static int
 enable_slot(struct hotplug_slot *hotplug_slot)
 {
@@ -135,8 +115,7 @@ disable_slot(struct hotplug_slot *hotplug_slot)
 			goto disable_error;
 	}
 
-	if (update_adapter_status(slot->hotplug_slot, 0))
-		warn("failure to update adapter file");
+	slot->adapter_status = 0;
 
 	if (slot->extracting) {
 		slot->extracting = 0;
@@ -184,20 +163,23 @@ set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 static int
 get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	*value = hotplug_slot->info->adapter_status;
+	struct slot *slot = hotplug_slot->private;
+
+	*value = slot->adapter_status;
 	return 0;
 }
 
 static int
 get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	*value = hotplug_slot->info->latch_status;
+	struct slot *slot = hotplug_slot->private;
+
+	*value = slot->latch_status;
 	return 0;
 }
 
 static void release_slot(struct slot *slot)
 {
-	kfree(slot->hotplug_slot->info);
 	kfree(slot->hotplug_slot);
 	pci_dev_put(slot->dev);
 	kfree(slot);
@@ -210,7 +192,6 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 {
 	struct slot *slot;
 	struct hotplug_slot *hotplug_slot;
-	struct hotplug_slot_info *info;
 	char name[SLOT_NAME_SIZE];
 	int status;
 	int i;
@@ -237,13 +218,6 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 		}
 		slot->hotplug_slot = hotplug_slot;
 
-		info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL);
-		if (!info) {
-			status = -ENOMEM;
-			goto error_hpslot;
-		}
-		hotplug_slot->info = info;
-
 		slot->bus = bus;
 		slot->number = i;
 		slot->devfn = PCI_DEVFN(i, 0);
@@ -253,19 +227,11 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 		hotplug_slot->private = slot;
 		hotplug_slot->ops = &cpci_hotplug_slot_ops;
 
-		/*
-		 * Initialize the slot info structure with some known
-		 * good values.
-		 */
-		dbg("initializing slot %s", name);
-		info->power_status = cpci_get_power_status(slot);
-		info->attention_status = cpci_get_attention_status(slot);
-
 		dbg("registering slot %s", name);
 		status = pci_hp_register(slot->hotplug_slot, bus, i, name);
 		if (status) {
 			err("pci_hp_register failed with error %d", status);
-			goto error_info;
+			goto error_hpslot;
 		}
 		dbg("slot registered with name: %s", slot_name(slot));
 
@@ -276,8 +242,6 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 		up_write(&list_rwsem);
 	}
 	return 0;
-error_info:
-	kfree(info);
 error_hpslot:
 	kfree(hotplug_slot);
 error_slot:
@@ -359,10 +323,8 @@ init_slots(int clear_ins)
 			    __func__, slot_name(slot));
 		dev = pci_get_slot(slot->bus, PCI_DEVFN(slot->number, 0));
 		if (dev) {
-			if (update_adapter_status(slot->hotplug_slot, 1))
-				warn("failure to update adapter file");
-			if (update_latch_status(slot->hotplug_slot, 1))
-				warn("failure to update latch file");
+			slot->adapter_status = 1;
+			slot->latch_status = 1;
 			slot->dev = dev;
 		}
 	}
@@ -424,11 +386,8 @@ check_slots(void)
 			dbg("%s - slot %s HS_CSR (2) = %04x",
 			    __func__, slot_name(slot), hs_csr);
 
-			if (update_latch_status(slot->hotplug_slot, 1))
-				warn("failure to update latch file");
-
-			if (update_adapter_status(slot->hotplug_slot, 1))
-				warn("failure to update adapter file");
+			slot->latch_status = 1;
+			slot->adapter_status = 1;
 
 			cpci_led_off(slot);
 
@@ -449,9 +408,7 @@ check_slots(void)
 			    __func__, slot_name(slot), hs_csr);
 
 			if (!slot->extracting) {
-				if (update_latch_status(slot->hotplug_slot, 0))
-					warn("failure to update latch file");
-
+				slot->latch_status = 0;
 				slot->extracting = 1;
 				atomic_inc(&extracting);
 			}
@@ -465,8 +422,7 @@ check_slots(void)
 				 */
 				err("card in slot %s was improperly removed",
 				    slot_name(slot));
-				if (update_adapter_status(slot->hotplug_slot, 0))
-					warn("failure to update adapter file");
+				slot->adapter_status = 0;
 				slot->extracting = 0;
 				atomic_dec(&extracting);
 			}
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 3409b62fceac..bb354a7fc112 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -276,7 +276,6 @@ static int ctrl_slot_cleanup(struct controller *ctrl)
 	while (old_slot) {
 		next_slot = old_slot->next;
 		pci_hp_deregister(old_slot->hotplug_slot);
-		kfree(old_slot->hotplug_slot->info);
 		kfree(old_slot->hotplug_slot);
 		kfree(old_slot);
 		old_slot = next_slot;
@@ -579,7 +578,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 {
 	struct slot *slot;
 	struct hotplug_slot *hotplug_slot;
-	struct hotplug_slot_info *hotplug_slot_info;
 	struct pci_bus *bus = ctrl->pci_bus;
 	u8 number_of_slots;
 	u8 slot_device;
@@ -613,14 +611,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 		}
 		hotplug_slot = slot->hotplug_slot;
 
-		hotplug_slot->info = kzalloc(sizeof(*(hotplug_slot->info)),
-							GFP_KERNEL);
-		if (!hotplug_slot->info) {
-			result = -ENOMEM;
-			goto error_hpslot;
-		}
-		hotplug_slot_info = hotplug_slot->info;
-
 		slot->ctrl = ctrl;
 		slot->bus = ctrl->bus;
 		slot->device = slot_device;
@@ -673,14 +663,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 		snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
 		hotplug_slot->ops = &cpqphp_hotplug_slot_ops;
 
-		hotplug_slot_info->power_status = get_slot_enabled(ctrl, slot);
-		hotplug_slot_info->attention_status =
-			cpq_get_attention_status(ctrl, slot);
-		hotplug_slot_info->latch_status =
-			cpq_get_latch_status(ctrl, slot);
-		hotplug_slot_info->adapter_status =
-			get_presence_status(ctrl, slot);
-
 		dbg("registering bus %d, dev %d, number %d, ctrl->slot_device_offset %d, slot %d\n",
 				slot->bus, slot->device,
 				slot->number, ctrl->slot_device_offset,
@@ -691,7 +673,7 @@ static int ctrl_slot_setup(struct controller *ctrl,
 					 name);
 		if (result) {
 			err("pci_hp_register failed with error %d\n", result);
-			goto error_info;
+			goto error_hpslot;
 		}
 
 		slot->next = ctrl->slot;
@@ -703,8 +685,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 	}
 
 	return 0;
-error_info:
-	kfree(hotplug_slot_info);
 error_hpslot:
 	kfree(hotplug_slot);
 error_slot:
diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index 616df442520b..9c4826ac6a4f 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -1130,9 +1130,9 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_
 	for (slot = ctrl->slot; slot; slot = slot->next) {
 		if (slot->device == (hp_slot + ctrl->slot_device_offset))
 			continue;
-		if (!slot->hotplug_slot || !slot->hotplug_slot->info)
+		if (!slot->hotplug_slot)
 			continue;
-		if (slot->hotplug_slot->info->adapter_status == 0)
+		if (get_presence_status(ctrl, slot) == 0)
 			continue;
 		/* If another adapter is running on the same segment but at a
 		 * lower speed/mode, we allow the new adapter to function at
@@ -1767,24 +1767,6 @@ void cpqhp_event_stop_thread(void)
 }
 
 
-static int update_slot_info(struct controller *ctrl, struct slot *slot)
-{
-	struct hotplug_slot_info *info;
-	int result;
-
-	info = kmalloc(sizeof(*info), GFP_KERNEL);
-	if (!info)
-		return -ENOMEM;
-
-	info->power_status = get_slot_enabled(ctrl, slot);
-	info->attention_status = cpq_get_attention_status(ctrl, slot);
-	info->latch_status = cpq_get_latch_status(ctrl, slot);
-	info->adapter_status = get_presence_status(ctrl, slot);
-	result = pci_hp_change_slot_info(slot->hotplug_slot, info);
-	kfree(info);
-	return result;
-}
-
 static void interrupt_event_handler(struct controller *ctrl)
 {
 	int loop = 0;
@@ -1884,9 +1866,6 @@ static void interrupt_event_handler(struct controller *ctrl)
 				/***********POWER FAULT */
 				else if (ctrl->event_queue[loop].event_type == INT_POWER_FAULT) {
 					dbg("power fault\n");
-				} else {
-					/* refresh notification */
-					update_slot_info(ctrl, p_slot);
 				}
 
 				ctrl->event_queue[loop].event_type = 0;
@@ -2057,9 +2036,6 @@ int cpqhp_process_SI(struct controller *ctrl, struct pci_func *func)
 	if (rc)
 		dbg("%s: rc = %d\n", __func__, rc);
 
-	if (p_slot)
-		update_slot_info(ctrl, p_slot);
-
 	return rc;
 }
 
@@ -2125,9 +2101,6 @@ int cpqhp_process_SS(struct controller *ctrl, struct pci_func *func)
 		rc = 1;
 	}
 
-	if (p_slot)
-		update_slot_info(ctrl, p_slot);
-
 	return rc;
 }
 
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index b82fdc17040d..96e5b1f544ac 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -582,29 +582,10 @@ static int validate(struct slot *slot_cur, int opn)
  ****************************************************************************/
 int ibmphp_update_slot_info(struct slot *slot_cur)
 {
-	struct hotplug_slot_info *info;
 	struct pci_bus *bus = slot_cur->hotplug_slot->pci_slot->bus;
-	int rc;
 	u8 bus_speed;
 	u8 mode;
 
-	info = kmalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL);
-	if (!info)
-		return -ENOMEM;
-
-	info->power_status = SLOT_PWRGD(slot_cur->status);
-	info->attention_status = SLOT_ATTN(slot_cur->status,
-						slot_cur->ext_status);
-	info->latch_status = SLOT_LATCH(slot_cur->status);
-	if (!SLOT_PRESENT(slot_cur->status)) {
-		info->adapter_status = 0;
-/*		info->max_adapter_speed_status = MAX_ADAPTER_NONE; */
-	} else {
-		info->adapter_status = 1;
-/*		get_max_adapter_speed_1(slot_cur->hotplug_slot,
-					&info->max_adapter_speed_status, 0); */
-	}
-
 	bus_speed = slot_cur->bus_on->current_speed;
 	mode = slot_cur->bus_on->current_bus_mode;
 
@@ -630,9 +611,7 @@ int ibmphp_update_slot_info(struct slot *slot_cur)
 	bus->cur_bus_speed = bus_speed;
 	// To do: bus_names
 
-	rc = pci_hp_change_slot_info(slot_cur->hotplug_slot, info);
-	kfree(info);
-	return rc;
+	return 0;
 }
 
 
@@ -684,7 +663,6 @@ static void free_slots(void)
 		ibmphp_unconfigure_card(&slot_cur, -1);
 
 		pci_hp_destroy(slot_cur->hotplug_slot);
-		kfree(slot_cur->hotplug_slot->info);
 		kfree(slot_cur->hotplug_slot);
 		kfree(slot_cur);
 	}
@@ -1095,8 +1073,7 @@ static int enable_slot(struct hotplug_slot *hs)
 
 	slot_cur->func = kzalloc(sizeof(struct pci_func), GFP_KERNEL);
 	if (!slot_cur->func) {
-		/* We cannot do update_slot_info here, since no memory for
-		 * kmalloc n.e.ways, and update_slot_info allocates some */
+		/* do update_slot_info here? */
 		rc = -ENOMEM;
 		goto error_power;
 	}
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index 6f8e90e3ec08..c05d066ab0d5 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -671,31 +671,6 @@ static int fillslotinfo(struct hotplug_slot *hotplug_slot)
 
 	slot = hotplug_slot->private;
 	rc = ibmphp_hpc_readslot(slot, READ_ALLSTAT, NULL);
-	if (rc)
-		return rc;
-
-	// power - enabled:1  not:0
-	hotplug_slot->info->power_status = SLOT_POWER(slot->status);
-
-	// attention - off:0, on:1, blinking:2
-	hotplug_slot->info->attention_status = SLOT_ATTN(slot->status, slot->ext_status);
-
-	// latch - open:1 closed:0
-	hotplug_slot->info->latch_status = SLOT_LATCH(slot->status);
-
-	// pci board - present:1 not:0
-	if (SLOT_PRESENT(slot->status))
-		hotplug_slot->info->adapter_status = 1;
-	else
-		hotplug_slot->info->adapter_status = 0;
-/*
-	if (slot->bus_on->supported_bus_mode
-		&& (slot->bus_on->supported_speed == BUS_SPEED_66))
-		hotplug_slot->info->max_bus_speed_status = BUS_SPEED_66PCIX;
-	else
-		hotplug_slot->info->max_bus_speed_status = slot->bus_on->supported_speed;
-*/
-
 	return rc;
 }
 
@@ -877,12 +852,6 @@ static int __init ebda_rsrc_controller(void)
 				goto error_no_hp_slot;
 			}
 
-			hp_slot_ptr->info = kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL);
-			if (!hp_slot_ptr->info) {
-				rc = -ENOMEM;
-				goto error_no_hp_info;
-			}
-
 			tmp_slot = kzalloc(sizeof(*tmp_slot), GFP_KERNEL);
 			if (!tmp_slot) {
 				rc = -ENOMEM;
@@ -955,8 +924,6 @@ static int __init ebda_rsrc_controller(void)
 error:
 	kfree(hp_slot_ptr->private);
 error_no_slot:
-	kfree(hp_slot_ptr->info);
-error_no_hp_info:
 	kfree(hp_slot_ptr);
 error_no_hp_slot:
 	free_ebda_hpc(hpc_ptr);
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index ede2ed6f4ce0..5ac31f683b85 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -55,8 +55,6 @@ static int get_##name(struct hotplug_slot *slot, type *value)		\
 		return -ENODEV;						\
 	if (ops->get_##name)						\
 		retval = ops->get_##name(slot, value);			\
-	else								\
-		*value = slot->info->name;				\
 	module_put(slot->owner);					\
 	return retval;							\
 }
@@ -445,7 +443,7 @@ int __pci_hp_initialize(struct hotplug_slot *slot, struct pci_bus *bus,
 
 	if (slot == NULL)
 		return -ENODEV;
-	if ((slot->info == NULL) || (slot->ops == NULL))
+	if (slot->ops == NULL)
 		return -EINVAL;
 
 	slot->owner = owner;
@@ -560,28 +558,6 @@ void pci_hp_destroy(struct hotplug_slot *slot)
 }
 EXPORT_SYMBOL_GPL(pci_hp_destroy);
 
-/**
- * pci_hp_change_slot_info - changes the slot's information structure in the core
- * @slot: pointer to the slot whose info has changed
- * @info: pointer to the info copy into the slot's info structure
- *
- * @slot must have been registered with the pci
- * hotplug subsystem previously with a call to pci_hp_register().
- *
- * Returns 0 if successful, anything else for an error.
- */
-int pci_hp_change_slot_info(struct hotplug_slot *slot,
-			    struct hotplug_slot_info *info)
-{
-	if (!slot || !info)
-		return -ENODEV;
-
-	memcpy(slot->info, info, sizeof(struct hotplug_slot_info));
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(pci_hp_change_slot_info);
-
 static int __init pci_hotplug_init(void)
 {
 	int result;
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index 80cc7ba534bf..ac5baf887c5d 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -52,7 +52,6 @@ static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
 static int init_slot(struct controller *ctrl)
 {
 	struct hotplug_slot *hotplug = NULL;
-	struct hotplug_slot_info *info = NULL;
 	struct hotplug_slot_ops *ops = NULL;
 	char name[SLOT_NAME_SIZE];
 	int retval = -ENOMEM;
@@ -61,10 +60,6 @@ static int init_slot(struct controller *ctrl)
 	if (!hotplug)
 		goto out;
 
-	info = kzalloc(sizeof(*info), GFP_KERNEL);
-	if (!info)
-		goto out;
-
 	/* Setup hotplug slot ops */
 	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
 	if (!ops)
@@ -86,7 +81,6 @@ static int init_slot(struct controller *ctrl)
 	}
 
 	/* register this slot with the hotplug pci core */
-	hotplug->info = info;
 	hotplug->private = ctrl;
 	hotplug->ops = ops;
 	ctrl->hotplug_slot = hotplug;
@@ -99,7 +93,6 @@ static int init_slot(struct controller *ctrl)
 out:
 	if (retval) {
 		kfree(ops);
-		kfree(info);
 		kfree(hotplug);
 	}
 	return retval;
@@ -111,7 +104,6 @@ static void cleanup_slot(struct controller *ctrl)
 
 	pci_hp_destroy(hotplug_slot);
 	kfree(hotplug_slot->ops);
-	kfree(hotplug_slot->info);
 	kfree(hotplug_slot);
 }
 
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 12b92a0ff688..5bb63430262e 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -328,6 +328,11 @@ out:
 	return ret;
 }
 
+static inline struct pnv_php_slot *to_pnv_php_slot(struct hotplug_slot *slot)
+{
+	return container_of(slot, struct pnv_php_slot, slot);
+}
+
 int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
 				 uint8_t state)
 {
@@ -378,7 +383,6 @@ static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state)
 			 ret);
 	} else {
 		*state = power_state;
-		slot->info->power_status = power_state;
 	}
 
 	return 0;
@@ -397,7 +401,6 @@ static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state)
 	ret = pnv_pci_get_presence_state(php_slot->id, &presence);
 	if (ret >= 0) {
 		*state = presence;
-		slot->info->adapter_status = presence;
 		ret = 0;
 	} else {
 		pci_warn(php_slot->pdev, "Error %d getting presence\n", ret);
@@ -406,10 +409,20 @@ static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state)
 	return ret;
 }
 
+static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state)
+{
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+
+	*state = php_slot->attention_state;
+	return 0;
+}
+
 static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state)
 {
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
+
 	/* FIXME: Make it real once firmware supports it */
-	slot->info->attention_status = state;
+	php_slot->attention_state = state;
 
 	return 0;
 }
@@ -501,8 +514,7 @@ scan:
 
 static int pnv_php_enable_slot(struct hotplug_slot *slot)
 {
-	struct pnv_php_slot *php_slot = container_of(slot,
-						     struct pnv_php_slot, slot);
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
 
 	return pnv_php_enable(php_slot, true);
 }
@@ -533,6 +545,7 @@ static int pnv_php_disable_slot(struct hotplug_slot *slot)
 static const struct hotplug_slot_ops php_slot_ops = {
 	.get_power_status	= pnv_php_get_power_state,
 	.get_adapter_status	= pnv_php_get_adapter_state,
+	.get_attention_status	= pnv_php_get_attention_state,
 	.set_attention_status	= pnv_php_set_attention_state,
 	.enable_slot		= pnv_php_enable_slot,
 	.disable_slot		= pnv_php_disable_slot,
@@ -594,7 +607,6 @@ static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
 	php_slot->id	                = id;
 	php_slot->power_state_check     = false;
 	php_slot->slot.ops              = &php_slot_ops;
-	php_slot->slot.info             = &php_slot->slot_info;
 	php_slot->slot.private          = php_slot;
 
 	INIT_LIST_HEAD(&php_slot->children);
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index f83347819f7b..26a3dd731b5e 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -63,6 +63,7 @@ struct slot {
 	u32 index;
 	u32 type;
 	u32 power_domain;
+	u8 attention_status;
 	char *name;
 	struct device_node *dn;
 	struct pci_bus *bus;
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 8620a3f8c987..898e78dcd311 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -66,7 +66,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value)
 
 	rc = rtas_set_indicator(DR_INDICATOR, slot->index, value);
 	if (!rc)
-		hotplug_slot->info->attention_status = value;
+		slot->attention_status = value;
 
 	return rc;
 }
@@ -95,7 +95,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	struct slot *slot = (struct slot *)hotplug_slot->private;
-	*value = slot->hotplug_slot->info->attention_status;
+	*value = slot->attention_status;
 	return 0;
 }
 
diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c
index 0aac33e15dab..beca61badeea 100644
--- a/drivers/pci/hotplug/rpaphp_pci.c
+++ b/drivers/pci/hotplug/rpaphp_pci.c
@@ -54,25 +54,21 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state)
  * rpaphp_enable_slot - record slot state, config pci device
  * @slot: target &slot
  *
- * Initialize values in the slot, and the hotplug_slot info
- * structures to indicate if there is a pci card plugged into
- * the slot. If the slot is not empty, run the pcibios routine
+ * Initialize values in the slot structure to indicate if there is a pci card
+ * plugged into the slot. If the slot is not empty, run the pcibios routine
  * to get pcibios stuff correctly set up.
  */
 int rpaphp_enable_slot(struct slot *slot)
 {
 	int rc, level, state;
 	struct pci_bus *bus;
-	struct hotplug_slot_info *info = slot->hotplug_slot->info;
 
-	info->adapter_status = NOT_VALID;
 	slot->state = EMPTY;
 
 	/* Find out if the power is turned on for the slot */
 	rc = rtas_get_power_level(slot->power_domain, &level);
 	if (rc)
 		return rc;
-	info->power_status = level;
 
 	/* Figure out if there is an adapter in the slot */
 	rc = rpaphp_get_sensor_state(slot, &state);
@@ -85,13 +81,11 @@ int rpaphp_enable_slot(struct slot *slot)
 		return -EINVAL;
 	}
 
-	info->adapter_status = EMPTY;
 	slot->bus = bus;
 	slot->pci_devs = &bus->devices;
 
 	/* if there's an adapter in the slot, go add the pci devices */
 	if (state == PRESENT) {
-		info->adapter_status = NOT_CONFIGURED;
 		slot->state = NOT_CONFIGURED;
 
 		/* non-empty slot has to have child */
@@ -105,7 +99,6 @@ int rpaphp_enable_slot(struct slot *slot)
 			pci_hp_add_devices(bus);
 
 		if (!list_empty(&bus->devices)) {
-			info->adapter_status = CONFIGURED;
 			slot->state = CONFIGURED;
 		}
 
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
index b916c8e4372d..6e2658ce300b 100644
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -21,7 +21,6 @@
 /* free up the memory used by a slot */
 void dealloc_slot_struct(struct slot *slot)
 {
-	kfree(slot->hotplug_slot->info);
 	kfree(slot->name);
 	kfree(slot->hotplug_slot);
 	kfree(slot);
@@ -38,13 +37,9 @@ struct slot *alloc_slot_struct(struct device_node *dn,
 	slot->hotplug_slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
 	if (!slot->hotplug_slot)
 		goto error_slot;
-	slot->hotplug_slot->info = kzalloc(sizeof(struct hotplug_slot_info),
-					   GFP_KERNEL);
-	if (!slot->hotplug_slot->info)
-		goto error_hpslot;
 	slot->name = kstrdup(drc_name, GFP_KERNEL);
 	if (!slot->name)
-		goto error_info;
+		goto error_hpslot;
 	slot->dn = dn;
 	slot->index = drc_index;
 	slot->power_domain = power_domain;
@@ -53,8 +48,6 @@ struct slot *alloc_slot_struct(struct device_node *dn,
 
 	return (slot);
 
-error_info:
-	kfree(slot->hotplug_slot->info);
 error_hpslot:
 	kfree(slot->hotplug_slot);
 error_slot:
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index 5bd45fd4a92a..d04634b0defe 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -140,7 +140,6 @@ static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
 int zpci_init_slot(struct zpci_dev *zdev)
 {
 	struct hotplug_slot *hotplug_slot;
-	struct hotplug_slot_info *info;
 	char name[SLOT_NAME_SIZE];
 	struct slot *slot;
 	int rc;
@@ -160,16 +159,8 @@ int zpci_init_slot(struct zpci_dev *zdev)
 	slot->hotplug_slot = hotplug_slot;
 	slot->zdev = zdev;
 
-	info = kzalloc(sizeof(*info), GFP_KERNEL);
-	if (!info)
-		goto error_info;
-	hotplug_slot->info = info;
-
 	hotplug_slot->ops = &s390_hotplug_slot_ops;
 
-	get_power_status(hotplug_slot, &info->power_status);
-	get_adapter_status(hotplug_slot, &info->adapter_status);
-
 	snprintf(name, SLOT_NAME_SIZE, "%08x", zdev->fid);
 	rc = pci_hp_register(slot->hotplug_slot, zdev->bus,
 			     ZPCI_DEVFN, name);
@@ -180,8 +171,6 @@ int zpci_init_slot(struct zpci_dev *zdev)
 	return 0;
 
 error_reg:
-	kfree(info);
-error_info:
 	kfree(hotplug_slot);
 error_hp:
 	kfree(slot);
@@ -199,7 +188,6 @@ void zpci_exit_slot(struct zpci_dev *zdev)
 			continue;
 		list_del(&slot->slot_list);
 		pci_hp_deregister(slot->hotplug_slot);
-		kfree(slot->hotplug_slot->info);
 		kfree(slot->hotplug_slot);
 		kfree(slot);
 	}
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index af4c28c574dd..e103826c83e3 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -585,7 +585,6 @@ static inline int get_power_status(struct hotplug_slot *bss_hotplug_slot,
 
 static void sn_release_slot(struct hotplug_slot *bss_hotplug_slot)
 {
-	kfree(bss_hotplug_slot->info);
 	kfree(bss_hotplug_slot->private);
 	kfree(bss_hotplug_slot);
 }
@@ -614,14 +613,6 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
 			goto alloc_err;
 		}
 
-		bss_hotplug_slot->info =
-			kzalloc(sizeof(struct hotplug_slot_info),
-				GFP_KERNEL);
-		if (!bss_hotplug_slot->info) {
-			rc = -ENOMEM;
-			goto alloc_err;
-		}
-
 		if (sn_hp_slot_private_alloc(bss_hotplug_slot,
 					     pci_bus, device, name)) {
 			rc = -ENOMEM;
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 516e4835019c..a7bb816e6f25 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -67,7 +67,9 @@ struct slot {
 	u32 number;
 	u8 is_a_board;
 	u8 state;
+	u8 attention_save;
 	u8 presence_save;
+	u8 latch_save;
 	u8 pwr_save;
 	struct controller *ctrl;
 	const struct hpc_ops *hpc_ops;
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index 26cbea04237c..b7181b7e7b98 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -65,7 +65,6 @@ static int init_slots(struct controller *ctrl)
 {
 	struct slot *slot;
 	struct hotplug_slot *hotplug_slot;
-	struct hotplug_slot_info *info;
 	char name[SLOT_NAME_SIZE];
 	int retval;
 	int i;
@@ -84,13 +83,6 @@ static int init_slots(struct controller *ctrl)
 		}
 		slot->hotplug_slot = hotplug_slot;
 
-		info = kzalloc(sizeof(*info), GFP_KERNEL);
-		if (!info) {
-			retval = -ENOMEM;
-			goto error_hpslot;
-		}
-		hotplug_slot->info = info;
-
 		slot->hp_slot = i;
 		slot->ctrl = ctrl;
 		slot->bus = ctrl->pci_dev->subordinate->number;
@@ -101,7 +93,7 @@ static int init_slots(struct controller *ctrl)
 		slot->wq = alloc_workqueue("shpchp-%d", 0, 0, slot->number);
 		if (!slot->wq) {
 			retval = -ENOMEM;
-			goto error_info;
+			goto error_hpslot;
 		}
 
 		mutex_init(&slot->lock);
@@ -124,10 +116,10 @@ static int init_slots(struct controller *ctrl)
 			goto error_slotwq;
 		}
 
-		get_power_status(hotplug_slot, &info->power_status);
-		get_attention_status(hotplug_slot, &info->attention_status);
-		get_latch_status(hotplug_slot, &info->latch_status);
-		get_adapter_status(hotplug_slot, &info->adapter_status);
+		get_power_status(hotplug_slot, &slot->pwr_save);
+		get_attention_status(hotplug_slot, &slot->attention_save);
+		get_latch_status(hotplug_slot, &slot->latch_save);
+		get_adapter_status(hotplug_slot, &slot->presence_save);
 
 		list_add(&slot->slot_list, &ctrl->slot_list);
 	}
@@ -135,8 +127,6 @@ static int init_slots(struct controller *ctrl)
 	return 0;
 error_slotwq:
 	destroy_workqueue(slot->wq);
-error_info:
-	kfree(info);
 error_hpslot:
 	kfree(hotplug_slot);
 error_slot:
@@ -154,7 +144,6 @@ void cleanup_slots(struct controller *ctrl)
 		cancel_delayed_work(&slot->work);
 		destroy_workqueue(slot->wq);
 		pci_hp_deregister(slot->hotplug_slot);
-		kfree(slot->hotplug_slot->info);
 		kfree(slot->hotplug_slot);
 		kfree(slot);
 	}
@@ -170,7 +159,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 	ctrl_dbg(slot->ctrl, "%s: physical_slot = %s\n",
 		 __func__, slot_name(slot));
 
-	hotplug_slot->info->attention_status = status;
+	slot->attention_save = status;
 	slot->hpc_ops->set_attention_status(slot, status);
 
 	return 0;
@@ -206,7 +195,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 	retval = slot->hpc_ops->get_power_status(slot, value);
 	if (retval < 0)
-		*value = hotplug_slot->info->power_status;
+		*value = slot->pwr_save;
 
 	return 0;
 }
@@ -221,7 +210,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 	retval = slot->hpc_ops->get_attention_status(slot, value);
 	if (retval < 0)
-		*value = hotplug_slot->info->attention_status;
+		*value = slot->attention_save;
 
 	return 0;
 }
@@ -236,7 +225,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 	retval = slot->hpc_ops->get_latch_status(slot, value);
 	if (retval < 0)
-		*value = hotplug_slot->info->latch_status;
+		*value = slot->latch_save;
 
 	return 0;
 }
@@ -251,7 +240,7 @@ static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 	retval = slot->hpc_ops->get_adapter_status(slot, value);
 	if (retval < 0)
-		*value = hotplug_slot->info->adapter_status;
+		*value = slot->presence_save;
 
 	return 0;
 }
diff --git a/drivers/pci/hotplug/shpchp_ctrl.c b/drivers/pci/hotplug/shpchp_ctrl.c
index 1267dcc5a531..078003dcde5b 100644
--- a/drivers/pci/hotplug/shpchp_ctrl.c
+++ b/drivers/pci/hotplug/shpchp_ctrl.c
@@ -446,23 +446,12 @@ void shpchp_queue_pushbutton_work(struct work_struct *work)
 	mutex_unlock(&p_slot->lock);
 }
 
-static int update_slot_info (struct slot *slot)
+static void update_slot_info(struct slot *slot)
 {
-	struct hotplug_slot_info *info;
-	int result;
-
-	info = kmalloc(sizeof(*info), GFP_KERNEL);
-	if (!info)
-		return -ENOMEM;
-
-	slot->hpc_ops->get_power_status(slot, &(info->power_status));
-	slot->hpc_ops->get_attention_status(slot, &(info->attention_status));
-	slot->hpc_ops->get_latch_status(slot, &(info->latch_status));
-	slot->hpc_ops->get_adapter_status(slot, &(info->adapter_status));
-
-	result = pci_hp_change_slot_info(slot->hotplug_slot, info);
-	kfree (info);
-	return result;
+	slot->hpc_ops->get_power_status(slot, &slot->pwr_save);
+	slot->hpc_ops->get_attention_status(slot, &slot->attention_save);
+	slot->hpc_ops->get_latch_status(slot, &slot->latch_save);
+	slot->hpc_ops->get_adapter_status(slot, &slot->presence_save);
 }
 
 /*
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index a8aa2eadfd82..019b037319e3 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -902,15 +902,8 @@ static int asus_setup_pci_hotplug(struct asus_wmi *asus)
 	if (!asus->hotplug_slot)
 		goto error_slot;
 
-	asus->hotplug_slot->info = kzalloc(sizeof(struct hotplug_slot_info),
-					   GFP_KERNEL);
-	if (!asus->hotplug_slot->info)
-		goto error_info;
-
 	asus->hotplug_slot->private = asus;
 	asus->hotplug_slot->ops = &asus_hotplug_slot_ops;
-	asus_get_adapter_status(asus->hotplug_slot,
-				&asus->hotplug_slot->info->adapter_status);
 
 	ret = pci_hp_register(asus->hotplug_slot, bus, 0, "asus-wifi");
 	if (ret) {
@@ -921,8 +914,6 @@ static int asus_setup_pci_hotplug(struct asus_wmi *asus)
 	return 0;
 
 error_register:
-	kfree(asus->hotplug_slot->info);
-error_info:
 	kfree(asus->hotplug_slot);
 	asus->hotplug_slot = NULL;
 error_slot:
@@ -1055,7 +1046,6 @@ static void asus_wmi_rfkill_exit(struct asus_wmi *asus)
 	asus_rfkill_hotplug(asus);
 	if (asus->hotplug_slot) {
 		pci_hp_deregister(asus->hotplug_slot);
-		kfree(asus->hotplug_slot->info);
 		kfree(asus->hotplug_slot);
 	}
 	if (asus->hotplug_workqueue)
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 41a364376e91..028b20f82962 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -745,15 +745,8 @@ static int eeepc_setup_pci_hotplug(struct eeepc_laptop *eeepc)
 	if (!eeepc->hotplug_slot)
 		goto error_slot;
 
-	eeepc->hotplug_slot->info = kzalloc(sizeof(struct hotplug_slot_info),
-					    GFP_KERNEL);
-	if (!eeepc->hotplug_slot->info)
-		goto error_info;
-
 	eeepc->hotplug_slot->private = eeepc;
 	eeepc->hotplug_slot->ops = &eeepc_hotplug_slot_ops;
-	eeepc_get_adapter_status(eeepc->hotplug_slot,
-				 &eeepc->hotplug_slot->info->adapter_status);
 
 	ret = pci_hp_register(eeepc->hotplug_slot, bus, 0, "eeepc-wifi");
 	if (ret) {
@@ -764,8 +757,6 @@ static int eeepc_setup_pci_hotplug(struct eeepc_laptop *eeepc)
 	return 0;
 
 error_register:
-	kfree(eeepc->hotplug_slot->info);
-error_info:
 	kfree(eeepc->hotplug_slot);
 	eeepc->hotplug_slot = NULL;
 error_slot:
@@ -831,7 +822,6 @@ static void eeepc_rfkill_exit(struct eeepc_laptop *eeepc)
 
 	if (eeepc->hotplug_slot) {
 		pci_hp_deregister(eeepc->hotplug_slot);
-		kfree(eeepc->hotplug_slot->info);
 		kfree(eeepc->hotplug_slot);
 	}
 
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 372dbe95c207..6f07a4e1de8d 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -23,17 +23,9 @@
  * @hardware_test: Called to run a specified hardware test on the specified
  * slot.
  * @get_power_status: Called to get the current power status of a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
  * @get_attention_status: Called to get the current attention status of a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
  * @get_latch_status: Called to get the current latch status of a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
  * @get_adapter_status: Called to get see if an adapter is present in the slot or not.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
  * @reset_slot: Optional interface to allow override of a bus reset for the
  *	slot for cases where a secondary bus reset can result in spurious
  *	hotplug events or where a slot can be reset independent of the bus.
@@ -55,27 +47,9 @@ struct hotplug_slot_ops {
 	int (*reset_slot)		(struct hotplug_slot *slot, int probe);
 };
 
-/**
- * struct hotplug_slot_info - used to notify the hotplug pci core of the state of the slot
- * @power_status: if power is enabled or not (1/0)
- * @attention_status: if the attention light is enabled or not (1/0)
- * @latch_status: if the latch (if any) is open or closed (1/0)
- * @adapter_status: if there is a pci board present in the slot or not (1/0)
- *
- * Used to notify the hotplug pci core of the status of a specific slot.
- */
-struct hotplug_slot_info {
-	u8	power_status;
-	u8	attention_status;
-	u8	latch_status;
-	u8	adapter_status;
-};
-
 /**
  * struct hotplug_slot - used to register a physical slot with the hotplug pci core
  * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot
- * @info: pointer to the &struct hotplug_slot_info for the initial values for
- * this slot.
  * @private: used by the hotplug pci controller driver to store whatever it
  * needs.
  * @owner: The module owner of this structure
@@ -83,7 +57,6 @@ struct hotplug_slot_info {
  */
 struct hotplug_slot {
 	const struct hotplug_slot_ops	*ops;
-	struct hotplug_slot_info	*info;
 	void				*private;
 
 	/* Variables below this are for use only by the hotplug pci core. */
@@ -110,9 +83,6 @@ void pci_hp_del(struct hotplug_slot *slot);
 void pci_hp_destroy(struct hotplug_slot *slot);
 void pci_hp_deregister(struct hotplug_slot *slot);
 
-int __must_check pci_hp_change_slot_info(struct hotplug_slot *slot,
-					 struct hotplug_slot_info *info);
-
 /* use a define to avoid include chaining to get THIS_MODULE & friends */
 #define pci_hp_register(slot, pbus, devnr, name) \
 	__pci_hp_register(slot, pbus, devnr, name, THIS_MODULE, KBUILD_MODNAME)
-- 
cgit v1.2.3


From 125450f814418b9f889c9885831467d1b2e25a7d Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 8 Sep 2018 09:59:01 +0200
Subject: PCI: hotplug: Embed hotplug_slot

When the PCI hotplug core and its first user, cpqphp, were introduced in
February 2002 with historic commit a8a2069f432c, cpqphp allocated a slot
struct for its internal use plus a hotplug_slot struct to be registered
with the hotplug core and linked the two with pointers:
https://git.kernel.org/tglx/history/c/a8a2069f432c

Nowadays, the predominant pattern in the tree is to embed ("subclass")
such structures in one another and cast to the containing struct with
container_of().  But it wasn't until July 2002 that container_of() was
introduced with historic commit ec4f214232cf:
https://git.kernel.org/tglx/history/c/ec4f214232cf

pnv_php, introduced in 2016, did the right thing and embedded struct
hotplug_slot in its internal struct pnv_php_slot, but all other drivers
cargo-culted cpqphp's design and linked separate structs with pointers.

Embedding structs is preferrable to linking them with pointers because
it requires fewer allocations, thereby reducing overhead and simplifying
error paths.  Casting an embedded struct to the containing struct
becomes a cheap subtraction rather than a dereference.  And having fewer
pointers reduces the risk of them pointing nowhere either accidentally
or due to an attack.

Convert all drivers to embed struct hotplug_slot in their internal slot
struct.  The "private" pointer in struct hotplug_slot thereby becomes
unused, so drop it.

Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Tyrel Datwyler <tyreld@linux.vnet.ibm.com>  # drivers/pci/hotplug/rpa*
Acked-by: Sebastian Ott <sebott@linux.ibm.com>        # drivers/pci/hotplug/s390*
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com> # drivers/platform/x86
Cc: Len Brown <lenb@kernel.org>
Cc: Scott Murray <scott@spiteful.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Oliver OHalloran <oliveroh@au1.ibm.com>
Cc: Gavin Shan <gwshan@linux.vnet.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Corentin Chary <corentin.chary@gmail.com>
Cc: Darren Hart <dvhart@infradead.org>
---
 drivers/pci/hotplug/acpiphp.h           |  9 +++-
 drivers/pci/hotplug/acpiphp_core.c      | 28 ++++------
 drivers/pci/hotplug/acpiphp_ibm.c       |  2 +-
 drivers/pci/hotplug/cpci_hotplug.h      |  9 +++-
 drivers/pci/hotplug/cpci_hotplug_core.c | 37 +++++--------
 drivers/pci/hotplug/cpci_hotplug_pci.c  |  6 +--
 drivers/pci/hotplug/cpqphp.h            |  9 +++-
 drivers/pci/hotplug/cpqphp_core.c       | 37 +++++--------
 drivers/pci/hotplug/cpqphp_ctrl.c       |  2 -
 drivers/pci/hotplug/ibmphp.h            |  7 ++-
 drivers/pci/hotplug/ibmphp_core.c       | 92 ++++++++++++++-------------------
 drivers/pci/hotplug/ibmphp_ebda.c       | 37 ++++---------
 drivers/pci/hotplug/pciehp.h            | 11 ++--
 drivers/pci/hotplug/pciehp_core.c       | 37 +++++--------
 drivers/pci/hotplug/pciehp_ctrl.c       |  4 +-
 drivers/pci/hotplug/pciehp_hpc.c        |  8 +--
 drivers/pci/hotplug/pnv_php.c           |  9 ++--
 drivers/pci/hotplug/rpaphp.h            |  7 ++-
 drivers/pci/hotplug/rpaphp_core.c       | 14 ++---
 drivers/pci/hotplug/rpaphp_slot.c       | 15 ++----
 drivers/pci/hotplug/s390_pci_hpc.c      | 30 +++++------
 drivers/pci/hotplug/sgi_hotplug.c       | 52 ++++++++-----------
 drivers/pci/hotplug/shpchp.h            |  6 +--
 drivers/pci/hotplug/shpchp_core.c       | 17 ++----
 drivers/platform/x86/asus-wmi.c         | 26 ++++------
 drivers/platform/x86/eeepc-laptop.c     | 30 +++++------
 include/linux/pci_hotplug.h             |  3 --
 27 files changed, 223 insertions(+), 321 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index 8377e736ea69..cf3058404f41 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -33,14 +33,19 @@ struct acpiphp_slot;
  * struct slot - slot information for each *physical* slot
  */
 struct slot {
-	struct hotplug_slot	*hotplug_slot;
+	struct hotplug_slot	hotplug_slot;
 	struct acpiphp_slot	*acpi_slot;
 	unsigned int sun;	/* ACPI _SUN (Slot User Number) value */
 };
 
 static inline const char *slot_name(struct slot *slot)
 {
-	return hotplug_slot_name(slot->hotplug_slot);
+	return hotplug_slot_name(&slot->hotplug_slot);
+}
+
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
 }
 
 /*
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index abd4f8d7e16a..c9e2bd40c038 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -118,7 +118,7 @@ EXPORT_SYMBOL_GPL(acpiphp_unregister_attention);
  */
 static int enable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
 
@@ -135,7 +135,7 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
  */
 static int disable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
 
@@ -179,7 +179,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
  */
 static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
 
@@ -225,7 +225,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
  */
 static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
 
@@ -245,7 +245,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
  */
 static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	pr_debug("%s - physical_slot = %s\n", __func__, slot_name(slot));
 
@@ -266,12 +266,7 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 	if (!slot)
 		goto error;
 
-	slot->hotplug_slot = kzalloc(sizeof(*slot->hotplug_slot), GFP_KERNEL);
-	if (!slot->hotplug_slot)
-		goto error_slot;
-
-	slot->hotplug_slot->private = slot;
-	slot->hotplug_slot->ops = &acpi_hotplug_slot_ops;
+	slot->hotplug_slot.ops = &acpi_hotplug_slot_ops;
 
 	slot->acpi_slot = acpiphp_slot;
 
@@ -279,20 +274,18 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot,
 	slot->sun = sun;
 	snprintf(name, SLOT_NAME_SIZE, "%u", sun);
 
-	retval = pci_hp_register(slot->hotplug_slot, acpiphp_slot->bus,
+	retval = pci_hp_register(&slot->hotplug_slot, acpiphp_slot->bus,
 				 acpiphp_slot->device, name);
 	if (retval == -EBUSY)
-		goto error_hpslot;
+		goto error_slot;
 	if (retval) {
 		pr_err("pci_hp_register failed with error %d\n", retval);
-		goto error_hpslot;
+		goto error_slot;
 	}
 
 	pr_info("Slot [%s] registered\n", slot_name(slot));
 
 	return 0;
-error_hpslot:
-	kfree(slot->hotplug_slot);
 error_slot:
 	kfree(slot);
 error:
@@ -306,8 +299,7 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot)
 
 	pr_info("Slot [%s] unregistered\n", slot_name(slot));
 
-	pci_hp_deregister(slot->hotplug_slot);
-	kfree(slot->hotplug_slot);
+	pci_hp_deregister(&slot->hotplug_slot);
 	kfree(slot);
 }
 
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 41713f16ff97..df48b3b03ab4 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -41,7 +41,7 @@ MODULE_VERSION(DRIVER_VERSION);
 #define IBM_HARDWARE_ID1 "IBM37D0"
 #define IBM_HARDWARE_ID2 "IBM37D4"
 
-#define hpslot_to_sun(A) (((struct slot *)((A)->private))->sun)
+#define hpslot_to_sun(A) (to_slot(A)->sun)
 
 /* union apci_descriptor - allows access to the
  * various device descriptors that are embedded in the
diff --git a/drivers/pci/hotplug/cpci_hotplug.h b/drivers/pci/hotplug/cpci_hotplug.h
index a35f40a2290c..f33ff2bca414 100644
--- a/drivers/pci/hotplug/cpci_hotplug.h
+++ b/drivers/pci/hotplug/cpci_hotplug.h
@@ -35,7 +35,7 @@ struct slot {
 	unsigned int latch_status:1;
 	unsigned int adapter_status:1;
 	unsigned int extracting;
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 	struct list_head slot_list;
 };
 
@@ -60,7 +60,12 @@ struct cpci_hp_controller {
 
 static inline const char *slot_name(struct slot *slot)
 {
-	return hotplug_slot_name(slot->hotplug_slot);
+	return hotplug_slot_name(&slot->hotplug_slot);
+}
+
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
 }
 
 int cpci_hp_register_controller(struct cpci_hp_controller *controller);
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index a17fb24c28cd..603eadf3d965 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -70,7 +70,7 @@ static const struct hotplug_slot_ops cpci_hotplug_slot_ops = {
 static int
 enable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	int retval = 0;
 
 	dbg("%s - physical_slot = %s", __func__, slot_name(slot));
@@ -83,7 +83,7 @@ enable_slot(struct hotplug_slot *hotplug_slot)
 static int
 disable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	int retval = 0;
 
 	dbg("%s - physical_slot = %s", __func__, slot_name(slot));
@@ -139,7 +139,7 @@ cpci_get_power_status(struct slot *slot)
 static int
 get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	*value = cpci_get_power_status(slot);
 	return 0;
@@ -148,7 +148,7 @@ get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 static int
 get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	*value = cpci_get_attention_status(slot);
 	return 0;
@@ -157,13 +157,13 @@ get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 static int
 set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
-	return cpci_set_attention_status(hotplug_slot->private, status);
+	return cpci_set_attention_status(to_slot(hotplug_slot), status);
 }
 
 static int
 get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	*value = slot->adapter_status;
 	return 0;
@@ -172,7 +172,7 @@ get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 static int
 get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	*value = slot->latch_status;
 	return 0;
@@ -180,7 +180,6 @@ get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 static void release_slot(struct slot *slot)
 {
-	kfree(slot->hotplug_slot);
 	pci_dev_put(slot->dev);
 	kfree(slot);
 }
@@ -191,7 +190,6 @@ int
 cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 {
 	struct slot *slot;
-	struct hotplug_slot *hotplug_slot;
 	char name[SLOT_NAME_SIZE];
 	int status;
 	int i;
@@ -210,28 +208,19 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 			goto error;
 		}
 
-		hotplug_slot =
-			kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
-		if (!hotplug_slot) {
-			status = -ENOMEM;
-			goto error_slot;
-		}
-		slot->hotplug_slot = hotplug_slot;
-
 		slot->bus = bus;
 		slot->number = i;
 		slot->devfn = PCI_DEVFN(i, 0);
 
 		snprintf(name, SLOT_NAME_SIZE, "%02x:%02x", bus->number, i);
 
-		hotplug_slot->private = slot;
-		hotplug_slot->ops = &cpci_hotplug_slot_ops;
+		slot->hotplug_slot.ops = &cpci_hotplug_slot_ops;
 
 		dbg("registering slot %s", name);
-		status = pci_hp_register(slot->hotplug_slot, bus, i, name);
+		status = pci_hp_register(&slot->hotplug_slot, bus, i, name);
 		if (status) {
 			err("pci_hp_register failed with error %d", status);
-			goto error_hpslot;
+			goto error_slot;
 		}
 		dbg("slot registered with name: %s", slot_name(slot));
 
@@ -242,8 +231,6 @@ cpci_hp_register_bus(struct pci_bus *bus, u8 first, u8 last)
 		up_write(&list_rwsem);
 	}
 	return 0;
-error_hpslot:
-	kfree(hotplug_slot);
 error_slot:
 	kfree(slot);
 error:
@@ -269,7 +256,7 @@ cpci_hp_unregister_bus(struct pci_bus *bus)
 			slots--;
 
 			dbg("deregistering slot %s", slot_name(slot));
-			pci_hp_deregister(slot->hotplug_slot);
+			pci_hp_deregister(&slot->hotplug_slot);
 			release_slot(slot);
 		}
 	}
@@ -571,7 +558,7 @@ cleanup_slots(void)
 		goto cleanup_null;
 	list_for_each_entry_safe(slot, tmp, &slot_list, slot_list) {
 		list_del(&slot->slot_list);
-		pci_hp_deregister(slot->hotplug_slot);
+		pci_hp_deregister(&slot->hotplug_slot);
 		release_slot(slot);
 	}
 cleanup_null:
diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c
index 389b8fb50cd9..2c16adb7f4ec 100644
--- a/drivers/pci/hotplug/cpci_hotplug_pci.c
+++ b/drivers/pci/hotplug/cpci_hotplug_pci.c
@@ -194,8 +194,7 @@ int cpci_led_on(struct slot *slot)
 					      slot->devfn,
 					      hs_cap + 2,
 					      hs_csr)) {
-			err("Could not set LOO for slot %s",
-			    hotplug_slot_name(slot->hotplug_slot));
+			err("Could not set LOO for slot %s", slot_name(slot));
 			return -ENODEV;
 		}
 	}
@@ -223,8 +222,7 @@ int cpci_led_off(struct slot *slot)
 					      slot->devfn,
 					      hs_cap + 2,
 					      hs_csr)) {
-			err("Could not clear LOO for slot %s",
-			    hotplug_slot_name(slot->hotplug_slot));
+			err("Could not clear LOO for slot %s", slot_name(slot));
 			return -ENODEV;
 		}
 	}
diff --git a/drivers/pci/hotplug/cpqphp.h b/drivers/pci/hotplug/cpqphp.h
index db78b394a075..77e4e0142fbc 100644
--- a/drivers/pci/hotplug/cpqphp.h
+++ b/drivers/pci/hotplug/cpqphp.h
@@ -260,7 +260,7 @@ struct slot {
 	u8 hp_slot;
 	struct controller *ctrl;
 	void __iomem *p_sm_slot;
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 };
 
 struct pci_resource {
@@ -445,7 +445,12 @@ extern u8 cpqhp_disk_irq;
 
 static inline const char *slot_name(struct slot *slot)
 {
-	return hotplug_slot_name(slot->hotplug_slot);
+	return hotplug_slot_name(&slot->hotplug_slot);
+}
+
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
 }
 
 /*
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index bb354a7fc112..95b7d60cf119 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -275,8 +275,7 @@ static int ctrl_slot_cleanup(struct controller *ctrl)
 
 	while (old_slot) {
 		next_slot = old_slot->next;
-		pci_hp_deregister(old_slot->hotplug_slot);
-		kfree(old_slot->hotplug_slot);
+		pci_hp_deregister(&old_slot->hotplug_slot);
 		kfree(old_slot);
 		old_slot = next_slot;
 	}
@@ -418,7 +417,7 @@ cpqhp_set_attention_status(struct controller *ctrl, struct pci_func *func,
 static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
 	struct pci_func *slot_func;
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct controller *ctrl = slot->ctrl;
 	u8 bus;
 	u8 devfn;
@@ -445,7 +444,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 static int process_SI(struct hotplug_slot *hotplug_slot)
 {
 	struct pci_func *slot_func;
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct controller *ctrl = slot->ctrl;
 	u8 bus;
 	u8 devfn;
@@ -477,7 +476,7 @@ static int process_SI(struct hotplug_slot *hotplug_slot)
 static int process_SS(struct hotplug_slot *hotplug_slot)
 {
 	struct pci_func *slot_func;
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct controller *ctrl = slot->ctrl;
 	u8 bus;
 	u8 devfn;
@@ -504,7 +503,7 @@ static int process_SS(struct hotplug_slot *hotplug_slot)
 
 static int hardware_test(struct hotplug_slot *hotplug_slot, u32 value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct controller *ctrl = slot->ctrl;
 
 	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
@@ -515,7 +514,7 @@ static int hardware_test(struct hotplug_slot *hotplug_slot, u32 value)
 
 static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct controller *ctrl = slot->ctrl;
 
 	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
@@ -526,7 +525,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct controller *ctrl = slot->ctrl;
 
 	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
@@ -537,7 +536,7 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct controller *ctrl = slot->ctrl;
 
 	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
@@ -549,7 +548,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct controller *ctrl = slot->ctrl;
 
 	dbg("%s - physical_slot = %s\n", __func__, slot_name(slot));
@@ -577,7 +576,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 			void __iomem *smbios_table)
 {
 	struct slot *slot;
-	struct hotplug_slot *hotplug_slot;
 	struct pci_bus *bus = ctrl->pci_bus;
 	u8 number_of_slots;
 	u8 slot_device;
@@ -603,14 +601,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 			goto error;
 		}
 
-		slot->hotplug_slot = kzalloc(sizeof(*(slot->hotplug_slot)),
-						GFP_KERNEL);
-		if (!slot->hotplug_slot) {
-			result = -ENOMEM;
-			goto error_slot;
-		}
-		hotplug_slot = slot->hotplug_slot;
-
 		slot->ctrl = ctrl;
 		slot->bus = ctrl->bus;
 		slot->device = slot_device;
@@ -659,21 +649,20 @@ static int ctrl_slot_setup(struct controller *ctrl,
 			((read_slot_enable(ctrl) << 2) >> ctrl_slot) & 0x04;
 
 		/* register this slot with the hotplug pci core */
-		hotplug_slot->private = slot;
 		snprintf(name, SLOT_NAME_SIZE, "%u", slot->number);
-		hotplug_slot->ops = &cpqphp_hotplug_slot_ops;
+		slot->hotplug_slot.ops = &cpqphp_hotplug_slot_ops;
 
 		dbg("registering bus %d, dev %d, number %d, ctrl->slot_device_offset %d, slot %d\n",
 				slot->bus, slot->device,
 				slot->number, ctrl->slot_device_offset,
 				slot_number);
-		result = pci_hp_register(hotplug_slot,
+		result = pci_hp_register(&slot->hotplug_slot,
 					 ctrl->pci_dev->bus,
 					 slot->device,
 					 name);
 		if (result) {
 			err("pci_hp_register failed with error %d\n", result);
-			goto error_hpslot;
+			goto error_slot;
 		}
 
 		slot->next = ctrl->slot;
@@ -685,8 +674,6 @@ static int ctrl_slot_setup(struct controller *ctrl,
 	}
 
 	return 0;
-error_hpslot:
-	kfree(hotplug_slot);
 error_slot:
 	kfree(slot);
 error:
diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index 9c4826ac6a4f..b7f4e1f099d9 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -1130,8 +1130,6 @@ static u8 set_controller_speed(struct controller *ctrl, u8 adapter_speed, u8 hp_
 	for (slot = ctrl->slot; slot; slot = slot->next) {
 		if (slot->device == (hp_slot + ctrl->slot_device_offset))
 			continue;
-		if (!slot->hotplug_slot)
-			continue;
 		if (get_presence_status(ctrl, slot) == 0)
 			continue;
 		/* If another adapter is running on the same segment but at a
diff --git a/drivers/pci/hotplug/ibmphp.h b/drivers/pci/hotplug/ibmphp.h
index db387e10581e..b89f850c3a4e 100644
--- a/drivers/pci/hotplug/ibmphp.h
+++ b/drivers/pci/hotplug/ibmphp.h
@@ -698,7 +698,7 @@ struct slot {
 	u8 supported_bus_mode;
 	u8 flag;		/* this is for disable slot and polling */
 	u8 ctlr_index;
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 	struct controller *ctrl;
 	struct pci_func *func;
 	u8 irq[4];
@@ -742,5 +742,10 @@ int ibmphp_configure_card(struct pci_func *, u8);
 int ibmphp_unconfigure_card(struct slot **, int);
 extern const struct hotplug_slot_ops ibmphp_hotplug_slot_ops;
 
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
 #endif				//__IBMPHP_H
 
diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 96e5b1f544ac..08a58e911fc2 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c
@@ -247,11 +247,8 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value)
 			break;
 		}
 		if (rc == 0) {
-			pslot = hotplug_slot->private;
-			if (pslot)
-				rc = ibmphp_hpc_writeslot(pslot, cmd);
-			else
-				rc = -ENODEV;
+			pslot = to_slot(hotplug_slot);
+			rc = ibmphp_hpc_writeslot(pslot, cmd);
 		}
 	} else
 		rc = -ENODEV;
@@ -273,19 +270,15 @@ static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 	ibmphp_lock_operations();
 	if (hotplug_slot) {
-		pslot = hotplug_slot->private;
-		if (pslot) {
-			memcpy(&myslot, pslot, sizeof(struct slot));
-			rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
-						&(myslot.status));
-			if (!rc)
-				rc = ibmphp_hpc_readslot(pslot,
-						READ_EXTSLOTSTATUS,
-						&(myslot.ext_status));
-			if (!rc)
-				*value = SLOT_ATTN(myslot.status,
-						myslot.ext_status);
-		}
+		pslot = to_slot(hotplug_slot);
+		memcpy(&myslot, pslot, sizeof(struct slot));
+		rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
+					 &myslot.status);
+		if (!rc)
+			rc = ibmphp_hpc_readslot(pslot, READ_EXTSLOTSTATUS,
+						 &myslot.ext_status);
+		if (!rc)
+			*value = SLOT_ATTN(myslot.status, myslot.ext_status);
 	}
 
 	ibmphp_unlock_operations();
@@ -303,14 +296,12 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 					(ulong) hotplug_slot, (ulong) value);
 	ibmphp_lock_operations();
 	if (hotplug_slot) {
-		pslot = hotplug_slot->private;
-		if (pslot) {
-			memcpy(&myslot, pslot, sizeof(struct slot));
-			rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
-						&(myslot.status));
-			if (!rc)
-				*value = SLOT_LATCH(myslot.status);
-		}
+		pslot = to_slot(hotplug_slot);
+		memcpy(&myslot, pslot, sizeof(struct slot));
+		rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
+					 &myslot.status);
+		if (!rc)
+			*value = SLOT_LATCH(myslot.status);
 	}
 
 	ibmphp_unlock_operations();
@@ -330,14 +321,12 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 					(ulong) hotplug_slot, (ulong) value);
 	ibmphp_lock_operations();
 	if (hotplug_slot) {
-		pslot = hotplug_slot->private;
-		if (pslot) {
-			memcpy(&myslot, pslot, sizeof(struct slot));
-			rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
-						&(myslot.status));
-			if (!rc)
-				*value = SLOT_PWRGD(myslot.status);
-		}
+		pslot = to_slot(hotplug_slot);
+		memcpy(&myslot, pslot, sizeof(struct slot));
+		rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
+					 &myslot.status);
+		if (!rc)
+			*value = SLOT_PWRGD(myslot.status);
 	}
 
 	ibmphp_unlock_operations();
@@ -357,18 +346,16 @@ static int get_adapter_present(struct hotplug_slot *hotplug_slot, u8 *value)
 					(ulong) hotplug_slot, (ulong) value);
 	ibmphp_lock_operations();
 	if (hotplug_slot) {
-		pslot = hotplug_slot->private;
-		if (pslot) {
-			memcpy(&myslot, pslot, sizeof(struct slot));
-			rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
-						&(myslot.status));
-			if (!rc) {
-				present = SLOT_PRESENT(myslot.status);
-				if (present == HPC_SLOT_EMPTY)
-					*value = 0;
-				else
-					*value = 1;
-			}
+		pslot = to_slot(hotplug_slot);
+		memcpy(&myslot, pslot, sizeof(struct slot));
+		rc = ibmphp_hpc_readslot(pslot, READ_SLOTSTATUS,
+					 &myslot.status);
+		if (!rc) {
+			present = SLOT_PRESENT(myslot.status);
+			if (present == HPC_SLOT_EMPTY)
+				*value = 0;
+			else
+				*value = 1;
 		}
 	}
 
@@ -382,7 +369,7 @@ static int get_max_bus_speed(struct slot *slot)
 	int rc = 0;
 	u8 mode = 0;
 	enum pci_bus_speed speed;
-	struct pci_bus *bus = slot->hotplug_slot->pci_slot->bus;
+	struct pci_bus *bus = slot->hotplug_slot.pci_slot->bus;
 
 	debug("%s - Entry slot[%p]\n", __func__, slot);
 
@@ -582,7 +569,7 @@ static int validate(struct slot *slot_cur, int opn)
  ****************************************************************************/
 int ibmphp_update_slot_info(struct slot *slot_cur)
 {
-	struct pci_bus *bus = slot_cur->hotplug_slot->pci_slot->bus;
+	struct pci_bus *bus = slot_cur->hotplug_slot.pci_slot->bus;
 	u8 bus_speed;
 	u8 mode;
 
@@ -652,7 +639,7 @@ static void free_slots(void)
 
 	list_for_each_entry_safe(slot_cur, next, &ibmphp_slot_head,
 				 ibm_slot_list) {
-		pci_hp_del(slot_cur->hotplug_slot);
+		pci_hp_del(&slot_cur->hotplug_slot);
 		slot_cur->ctrl = NULL;
 		slot_cur->bus_on = NULL;
 
@@ -662,8 +649,7 @@ static void free_slots(void)
 		 */
 		ibmphp_unconfigure_card(&slot_cur, -1);
 
-		pci_hp_destroy(slot_cur->hotplug_slot);
-		kfree(slot_cur->hotplug_slot);
+		pci_hp_destroy(&slot_cur->hotplug_slot);
 		kfree(slot_cur);
 	}
 	debug("%s -- exit\n", __func__);
@@ -985,7 +971,7 @@ static int enable_slot(struct hotplug_slot *hs)
 	ibmphp_lock_operations();
 
 	debug("ENABLING SLOT........\n");
-	slot_cur = hs->private;
+	slot_cur = to_slot(hs);
 
 	rc = validate(slot_cur, ENABLE);
 	if (rc) {
@@ -1146,7 +1132,7 @@ error_power:
 **************************************************************/
 static int ibmphp_disable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	int rc;
 
 	ibmphp_lock_operations();
diff --git a/drivers/pci/hotplug/ibmphp_ebda.c b/drivers/pci/hotplug/ibmphp_ebda.c
index c05d066ab0d5..11a2661dc062 100644
--- a/drivers/pci/hotplug/ibmphp_ebda.c
+++ b/drivers/pci/hotplug/ibmphp_ebda.c
@@ -666,10 +666,7 @@ static int fillslotinfo(struct hotplug_slot *hotplug_slot)
 	struct slot *slot;
 	int rc = 0;
 
-	if (!hotplug_slot || !hotplug_slot->private)
-		return -EINVAL;
-
-	slot = hotplug_slot->private;
+	slot = to_slot(hotplug_slot);
 	rc = ibmphp_hpc_readslot(slot, READ_ALLSTAT, NULL);
 	return rc;
 }
@@ -687,7 +684,6 @@ static int __init ebda_rsrc_controller(void)
 	u8 ctlr_id, temp, bus_index;
 	u16 ctlr, slot, bus;
 	u16 slot_num, bus_num, index;
-	struct hotplug_slot *hp_slot_ptr;
 	struct controller *hpc_ptr;
 	struct ebda_hpc_bus *bus_ptr;
 	struct ebda_hpc_slot *slot_ptr;
@@ -746,7 +742,7 @@ static int __init ebda_rsrc_controller(void)
 				bus_info_ptr1 = kzalloc(sizeof(struct bus_info), GFP_KERNEL);
 				if (!bus_info_ptr1) {
 					rc = -ENOMEM;
-					goto error_no_hp_slot;
+					goto error_no_slot;
 				}
 				bus_info_ptr1->slot_min = slot_ptr->slot_num;
 				bus_info_ptr1->slot_max = slot_ptr->slot_num;
@@ -817,7 +813,7 @@ static int __init ebda_rsrc_controller(void)
 						     (hpc_ptr->u.isa_ctlr.io_end - hpc_ptr->u.isa_ctlr.io_start + 1),
 						     "ibmphp")) {
 					rc = -ENODEV;
-					goto error_no_hp_slot;
+					goto error_no_slot;
 				}
 				hpc_ptr->irq = readb(io_mem + addr + 4);
 				addr += 5;
@@ -832,7 +828,7 @@ static int __init ebda_rsrc_controller(void)
 				break;
 			default:
 				rc = -ENODEV;
-				goto error_no_hp_slot;
+				goto error_no_slot;
 		}
 
 		//reorganize chassis' linked list
@@ -845,13 +841,6 @@ static int __init ebda_rsrc_controller(void)
 
 		// register slots with hpc core as well as create linked list of ibm slot
 		for (index = 0; index < hpc_ptr->slot_count; index++) {
-
-			hp_slot_ptr = kzalloc(sizeof(*hp_slot_ptr), GFP_KERNEL);
-			if (!hp_slot_ptr) {
-				rc = -ENOMEM;
-				goto error_no_hp_slot;
-			}
-
 			tmp_slot = kzalloc(sizeof(*tmp_slot), GFP_KERNEL);
 			if (!tmp_slot) {
 				rc = -ENOMEM;
@@ -878,7 +867,6 @@ static int __init ebda_rsrc_controller(void)
 
 			bus_info_ptr1 = ibmphp_find_same_bus_num(hpc_ptr->slots[index].slot_bus_num);
 			if (!bus_info_ptr1) {
-				kfree(tmp_slot);
 				rc = -ENODEV;
 				goto error;
 			}
@@ -888,22 +876,19 @@ static int __init ebda_rsrc_controller(void)
 
 			tmp_slot->ctlr_index = hpc_ptr->slots[index].ctl_index;
 			tmp_slot->number = hpc_ptr->slots[index].slot_num;
-			tmp_slot->hotplug_slot = hp_slot_ptr;
-
-			hp_slot_ptr->private = tmp_slot;
 
-			rc = fillslotinfo(hp_slot_ptr);
+			rc = fillslotinfo(&tmp_slot->hotplug_slot);
 			if (rc)
 				goto error;
 
-			rc = ibmphp_init_devno((struct slot **) &hp_slot_ptr->private);
+			rc = ibmphp_init_devno(&tmp_slot);
 			if (rc)
 				goto error;
-			hp_slot_ptr->ops = &ibmphp_hotplug_slot_ops;
+			tmp_slot->hotplug_slot.ops = &ibmphp_hotplug_slot_ops;
 
 			// end of registering ibm slot with hotplug core
 
-			list_add(&((struct slot *)(hp_slot_ptr->private))->ibm_slot_list, &ibmphp_slot_head);
+			list_add(&tmp_slot->ibm_slot_list, &ibmphp_slot_head);
 		}
 
 		print_bus_info();
@@ -913,7 +898,7 @@ static int __init ebda_rsrc_controller(void)
 
 	list_for_each_entry(tmp_slot, &ibmphp_slot_head, ibm_slot_list) {
 		snprintf(name, SLOT_NAME_SIZE, "%s", create_file_name(tmp_slot));
-		pci_hp_register(tmp_slot->hotplug_slot,
+		pci_hp_register(&tmp_slot->hotplug_slot,
 			pci_find_bus(0, tmp_slot->bus), tmp_slot->device, name);
 	}
 
@@ -922,10 +907,8 @@ static int __init ebda_rsrc_controller(void)
 	return 0;
 
 error:
-	kfree(hp_slot_ptr->private);
+	kfree(tmp_slot);
 error_no_slot:
-	kfree(hp_slot_ptr);
-error_no_hp_slot:
 	free_ebda_hpc(hpc_ptr);
 error_no_hpc:
 	iounmap(io_mem);
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 3cc88f3e4368..3740f1a759c5 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -88,7 +88,7 @@ do {									\
  *	protects scheduling, execution and cancellation of @button_work
  * @button_work: work item to turn the slot on or off after 5 seconds
  *	in response to an Attention Button press
- * @hotplug_slot: pointer to the structure registered with the PCI hotplug core
+ * @hotplug_slot: structure registered with the PCI hotplug core
  * @reset_lock: prevents access to the Data Link Layer Link Active bit in the
  *	Link Status register and to the Presence Detect State bit in the Slot
  *	Status register during a slot reset which may cause them to flap
@@ -120,7 +120,7 @@ struct controller {
 	struct mutex state_lock;
 	struct delayed_work button_work;
 
-	struct hotplug_slot *hotplug_slot;	/* hotplug core interface */
+	struct hotplug_slot hotplug_slot;	/* hotplug core interface */
 	struct rw_semaphore reset_lock;
 	int request_result;
 	wait_queue_head_t requester;
@@ -207,7 +207,12 @@ int pciehp_get_raw_indicator_status(struct hotplug_slot *h_slot, u8 *status);
 
 static inline const char *slot_name(struct controller *ctrl)
 {
-	return hotplug_slot_name(ctrl->hotplug_slot);
+	return hotplug_slot_name(&ctrl->hotplug_slot);
+}
+
+static inline struct controller *to_ctrl(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct controller, hotplug_slot);
 }
 
 #endif				/* _PCIEHP_H */
diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c
index ac5baf887c5d..68b20e667764 100644
--- a/drivers/pci/hotplug/pciehp_core.c
+++ b/drivers/pci/hotplug/pciehp_core.c
@@ -51,19 +51,14 @@ static int get_adapter_status(struct hotplug_slot *slot, u8 *value);
 
 static int init_slot(struct controller *ctrl)
 {
-	struct hotplug_slot *hotplug = NULL;
-	struct hotplug_slot_ops *ops = NULL;
+	struct hotplug_slot_ops *ops;
 	char name[SLOT_NAME_SIZE];
-	int retval = -ENOMEM;
-
-	hotplug = kzalloc(sizeof(*hotplug), GFP_KERNEL);
-	if (!hotplug)
-		goto out;
+	int retval;
 
 	/* Setup hotplug slot ops */
 	ops = kzalloc(sizeof(*ops), GFP_KERNEL);
 	if (!ops)
-		goto out;
+		return -ENOMEM;
 
 	ops->enable_slot = pciehp_sysfs_enable_slot;
 	ops->disable_slot = pciehp_sysfs_disable_slot;
@@ -81,30 +76,24 @@ static int init_slot(struct controller *ctrl)
 	}
 
 	/* register this slot with the hotplug pci core */
-	hotplug->private = ctrl;
-	hotplug->ops = ops;
-	ctrl->hotplug_slot = hotplug;
+	ctrl->hotplug_slot.ops = ops;
 	snprintf(name, SLOT_NAME_SIZE, "%u", PSN(ctrl));
 
-	retval = pci_hp_initialize(hotplug,
+	retval = pci_hp_initialize(&ctrl->hotplug_slot,
 				   ctrl->pcie->port->subordinate, 0, name);
-	if (retval)
-		ctrl_err(ctrl, "pci_hp_initialize failed: error %d\n", retval);
-out:
 	if (retval) {
+		ctrl_err(ctrl, "pci_hp_initialize failed: error %d\n", retval);
 		kfree(ops);
-		kfree(hotplug);
 	}
 	return retval;
 }
 
 static void cleanup_slot(struct controller *ctrl)
 {
-	struct hotplug_slot *hotplug_slot = ctrl->hotplug_slot;
+	struct hotplug_slot *hotplug_slot = &ctrl->hotplug_slot;
 
 	pci_hp_destroy(hotplug_slot);
 	kfree(hotplug_slot->ops);
-	kfree(hotplug_slot);
 }
 
 /*
@@ -112,7 +101,7 @@ static void cleanup_slot(struct controller *ctrl)
  */
 static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl->pcie->port;
 
 	pci_config_pm_runtime_get(pdev);
@@ -123,7 +112,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 status)
 
 static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl->pcie->port;
 
 	pci_config_pm_runtime_get(pdev);
@@ -134,7 +123,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl->pcie->port;
 
 	pci_config_pm_runtime_get(pdev);
@@ -145,7 +134,7 @@ static int get_latch_status(struct hotplug_slot *hotplug_slot, u8 *value)
 
 static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl->pcie->port;
 
 	pci_config_pm_runtime_get(pdev);
@@ -223,7 +212,7 @@ static int pciehp_probe(struct pcie_device *dev)
 	}
 
 	/* Publish to user space */
-	rc = pci_hp_add(ctrl->hotplug_slot);
+	rc = pci_hp_add(&ctrl->hotplug_slot);
 	if (rc) {
 		ctrl_err(ctrl, "Publication to user space failed (%d)\n", rc);
 		goto err_out_shutdown_notification;
@@ -246,7 +235,7 @@ static void pciehp_remove(struct pcie_device *dev)
 {
 	struct controller *ctrl = get_service_data(dev);
 
-	pci_hp_del(ctrl->hotplug_slot);
+	pci_hp_del(&ctrl->hotplug_slot);
 	pcie_shutdown_notification(ctrl);
 	cleanup_slot(ctrl);
 	pciehp_release_ctrl(ctrl);
diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c
index 04f7ad9fffe1..3f3df4c29f6e 100644
--- a/drivers/pci/hotplug/pciehp_ctrl.c
+++ b/drivers/pci/hotplug/pciehp_ctrl.c
@@ -348,7 +348,7 @@ static int pciehp_disable_slot(struct controller *ctrl, bool safe_removal)
 
 int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 
 	mutex_lock(&ctrl->state_lock);
 	switch (ctrl->state) {
@@ -386,7 +386,7 @@ int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot)
 
 int pciehp_sysfs_disable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 
 	mutex_lock(&ctrl->state_lock);
 	switch (ctrl->state) {
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 0289a3ae4d90..7b5f9db60d9a 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -315,7 +315,7 @@ static int pciehp_link_enable(struct controller *ctrl)
 int pciehp_get_raw_indicator_status(struct hotplug_slot *hotplug_slot,
 				    u8 *status)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl_dev(ctrl);
 	u16 slot_ctrl;
 
@@ -328,7 +328,7 @@ int pciehp_get_raw_indicator_status(struct hotplug_slot *hotplug_slot,
 
 int pciehp_get_attention_status(struct hotplug_slot *hotplug_slot, u8 *status)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl_dev(ctrl);
 	u16 slot_ctrl;
 
@@ -422,7 +422,7 @@ int pciehp_query_power_fault(struct controller *ctrl)
 int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot,
 				    u8 status)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl_dev(ctrl);
 
 	pci_config_pm_runtime_get(pdev);
@@ -758,7 +758,7 @@ void pcie_clear_hotplug_events(struct controller *ctrl)
  */
 int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, int probe)
 {
-	struct controller *ctrl = hotplug_slot->private;
+	struct controller *ctrl = to_ctrl(hotplug_slot);
 	struct pci_dev *pdev = ctrl_dev(ctrl);
 	u16 stat_mask = 0, ctrl_mask = 0;
 	int rc;
diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 5bb63430262e..5070620a4f9f 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -336,7 +336,7 @@ static inline struct pnv_php_slot *to_pnv_php_slot(struct hotplug_slot *slot)
 int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
 				 uint8_t state)
 {
-	struct pnv_php_slot *php_slot = slot->private;
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
 	struct opal_msg msg;
 	int ret;
 
@@ -368,7 +368,7 @@ EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state);
 
 static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state)
 {
-	struct pnv_php_slot *php_slot = slot->private;
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
 	uint8_t power_state = OPAL_PCI_SLOT_POWER_ON;
 	int ret;
 
@@ -390,7 +390,7 @@ static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state)
 
 static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state)
 {
-	struct pnv_php_slot *php_slot = slot->private;
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
 	uint8_t presence = OPAL_PCI_SLOT_EMPTY;
 	int ret;
 
@@ -521,7 +521,7 @@ static int pnv_php_enable_slot(struct hotplug_slot *slot)
 
 static int pnv_php_disable_slot(struct hotplug_slot *slot)
 {
-	struct pnv_php_slot *php_slot = slot->private;
+	struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
 	int ret;
 
 	if (php_slot->state != PNV_PHP_STATE_POPULATED)
@@ -607,7 +607,6 @@ static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn)
 	php_slot->id	                = id;
 	php_slot->power_state_check     = false;
 	php_slot->slot.ops              = &php_slot_ops;
-	php_slot->slot.private          = php_slot;
 
 	INIT_LIST_HEAD(&php_slot->children);
 	INIT_LIST_HEAD(&php_slot->link);
diff --git a/drivers/pci/hotplug/rpaphp.h b/drivers/pci/hotplug/rpaphp.h
index 26a3dd731b5e..bdc954d70869 100644
--- a/drivers/pci/hotplug/rpaphp.h
+++ b/drivers/pci/hotplug/rpaphp.h
@@ -68,12 +68,17 @@ struct slot {
 	struct device_node *dn;
 	struct pci_bus *bus;
 	struct list_head *pci_devs;
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 };
 
 extern const struct hotplug_slot_ops rpaphp_hotplug_slot_ops;
 extern struct list_head rpaphp_slot_head;
 
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
 /* function prototypes */
 
 /* rpaphp_pci.c */
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 898e78dcd311..bcd5d357ca23 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -52,7 +52,7 @@ module_param_named(debug, rpaphp_debug, bool, 0644);
 static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value)
 {
 	int rc;
-	struct slot *slot = (struct slot *)hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	switch (value) {
 	case 0:
@@ -79,7 +79,7 @@ static int set_attention_status(struct hotplug_slot *hotplug_slot, u8 value)
 static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
 	int retval, level;
-	struct slot *slot = (struct slot *)hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	retval = rtas_get_power_level(slot->power_domain, &level);
 	if (!retval)
@@ -94,14 +94,14 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
  */
 static int get_attention_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = (struct slot *)hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	*value = slot->attention_status;
 	return 0;
 }
 
 static int get_adapter_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = (struct slot *)hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	int rc, state;
 
 	rc = rpaphp_get_sensor_state(slot, &state);
@@ -409,7 +409,7 @@ static void __exit cleanup_slots(void)
 	list_for_each_entry_safe(slot, next, &rpaphp_slot_head,
 				 rpaphp_slot_list) {
 		list_del(&slot->rpaphp_slot_list);
-		pci_hp_deregister(slot->hotplug_slot);
+		pci_hp_deregister(&slot->hotplug_slot);
 		dealloc_slot_struct(slot);
 	}
 	return;
@@ -434,7 +434,7 @@ static void __exit rpaphp_exit(void)
 
 static int enable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = (struct slot *)hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	int state;
 	int retval;
 
@@ -464,7 +464,7 @@ static int enable_slot(struct hotplug_slot *hotplug_slot)
 
 static int disable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = (struct slot *)hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	if (slot->state == NOT_CONFIGURED)
 		return -EINVAL;
 
diff --git a/drivers/pci/hotplug/rpaphp_slot.c b/drivers/pci/hotplug/rpaphp_slot.c
index 6e2658ce300b..5282aa3e33c5 100644
--- a/drivers/pci/hotplug/rpaphp_slot.c
+++ b/drivers/pci/hotplug/rpaphp_slot.c
@@ -22,7 +22,6 @@
 void dealloc_slot_struct(struct slot *slot)
 {
 	kfree(slot->name);
-	kfree(slot->hotplug_slot);
 	kfree(slot);
 }
 
@@ -34,22 +33,16 @@ struct slot *alloc_slot_struct(struct device_node *dn,
 	slot = kzalloc(sizeof(struct slot), GFP_KERNEL);
 	if (!slot)
 		goto error_nomem;
-	slot->hotplug_slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
-	if (!slot->hotplug_slot)
-		goto error_slot;
 	slot->name = kstrdup(drc_name, GFP_KERNEL);
 	if (!slot->name)
-		goto error_hpslot;
+		goto error_slot;
 	slot->dn = dn;
 	slot->index = drc_index;
 	slot->power_domain = power_domain;
-	slot->hotplug_slot->private = slot;
-	slot->hotplug_slot->ops = &rpaphp_hotplug_slot_ops;
+	slot->hotplug_slot.ops = &rpaphp_hotplug_slot_ops;
 
 	return (slot);
 
-error_hpslot:
-	kfree(slot->hotplug_slot);
 error_slot:
 	kfree(slot);
 error_nomem:
@@ -70,7 +63,7 @@ static int is_registered(struct slot *slot)
 int rpaphp_deregister_slot(struct slot *slot)
 {
 	int retval = 0;
-	struct hotplug_slot *php_slot = slot->hotplug_slot;
+	struct hotplug_slot *php_slot = &slot->hotplug_slot;
 
 	 dbg("%s - Entry: deregistering slot=%s\n",
 		__func__, slot->name);
@@ -86,7 +79,7 @@ EXPORT_SYMBOL_GPL(rpaphp_deregister_slot);
 
 int rpaphp_register_slot(struct slot *slot)
 {
-	struct hotplug_slot *php_slot = slot->hotplug_slot;
+	struct hotplug_slot *php_slot = &slot->hotplug_slot;
 	struct device_node *child;
 	u32 my_index;
 	int retval;
diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c
index d04634b0defe..30ee72268790 100644
--- a/drivers/pci/hotplug/s390_pci_hpc.c
+++ b/drivers/pci/hotplug/s390_pci_hpc.c
@@ -32,10 +32,15 @@ static int zpci_fn_configured(enum zpci_state state)
  */
 struct slot {
 	struct list_head slot_list;
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 	struct zpci_dev *zdev;
 };
 
+static inline struct slot *to_slot(struct hotplug_slot *hotplug_slot)
+{
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
+}
+
 static inline int slot_configure(struct slot *slot)
 {
 	int ret = sclp_pci_configure(slot->zdev->fid);
@@ -60,7 +65,7 @@ static inline int slot_deconfigure(struct slot *slot)
 
 static int enable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	int rc;
 
 	if (slot->zdev->state != ZPCI_FN_STATE_STANDBY)
@@ -88,7 +93,7 @@ out_deconfigure:
 
 static int disable_slot(struct hotplug_slot *hotplug_slot)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 	struct pci_dev *pdev;
 	int rc;
 
@@ -110,7 +115,7 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
 
 static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value)
 {
-	struct slot *slot = hotplug_slot->private;
+	struct slot *slot = to_slot(hotplug_slot);
 
 	switch (slot->zdev->state) {
 	case ZPCI_FN_STATE_STANDBY:
@@ -139,7 +144,6 @@ static const struct hotplug_slot_ops s390_hotplug_slot_ops = {
 
 int zpci_init_slot(struct zpci_dev *zdev)
 {
-	struct hotplug_slot *hotplug_slot;
 	char name[SLOT_NAME_SIZE];
 	struct slot *slot;
 	int rc;
@@ -151,18 +155,11 @@ int zpci_init_slot(struct zpci_dev *zdev)
 	if (!slot)
 		goto error;
 
-	hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL);
-	if (!hotplug_slot)
-		goto error_hp;
-	hotplug_slot->private = slot;
-
-	slot->hotplug_slot = hotplug_slot;
 	slot->zdev = zdev;
-
-	hotplug_slot->ops = &s390_hotplug_slot_ops;
+	slot->hotplug_slot.ops = &s390_hotplug_slot_ops;
 
 	snprintf(name, SLOT_NAME_SIZE, "%08x", zdev->fid);
-	rc = pci_hp_register(slot->hotplug_slot, zdev->bus,
+	rc = pci_hp_register(&slot->hotplug_slot, zdev->bus,
 			     ZPCI_DEVFN, name);
 	if (rc)
 		goto error_reg;
@@ -171,8 +168,6 @@ int zpci_init_slot(struct zpci_dev *zdev)
 	return 0;
 
 error_reg:
-	kfree(hotplug_slot);
-error_hp:
 	kfree(slot);
 error:
 	return -ENOMEM;
@@ -187,8 +182,7 @@ void zpci_exit_slot(struct zpci_dev *zdev)
 		if (slot->zdev != zdev)
 			continue;
 		list_del(&slot->slot_list);
-		pci_hp_deregister(slot->hotplug_slot);
-		kfree(slot->hotplug_slot);
+		pci_hp_deregister(&slot->hotplug_slot);
 		kfree(slot);
 	}
 }
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index e103826c83e3..231f5bdd3d2d 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -56,7 +56,7 @@ struct slot {
 	int device_num;
 	struct pci_bus *pci_bus;
 	/* this struct for glue internal only */
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 	struct list_head hp_list;
 	char physical_path[SN_SLOT_NAME_SIZE];
 };
@@ -88,10 +88,15 @@ static const struct hotplug_slot_ops sn_hotplug_slot_ops = {
 
 static DEFINE_MUTEX(sn_hotplug_mutex);
 
+static struct slot *to_slot(struct hotplug_slot *bss_hotplug_slot)
+{
+	return container_of(bss_hotplug_slot, struct slot, hotplug_slot);
+}
+
 static ssize_t path_show(struct pci_slot *pci_slot, char *buf)
 {
 	int retval = -ENOENT;
-	struct slot *slot = pci_slot->hotplug->private;
+	struct slot *slot = to_slot(pci_slot->hotplug);
 
 	if (!slot)
 		return retval;
@@ -156,7 +161,7 @@ static int sn_pci_bus_valid(struct pci_bus *pci_bus)
 	return -EIO;
 }
 
-static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot,
+static int sn_hp_slot_private_alloc(struct hotplug_slot **bss_hotplug_slot,
 				    struct pci_bus *pci_bus, int device,
 				    char *name)
 {
@@ -168,7 +173,6 @@ static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot,
 	slot = kzalloc(sizeof(*slot), GFP_KERNEL);
 	if (!slot)
 		return -ENOMEM;
-	bss_hotplug_slot->private = slot;
 
 	slot->device_num = device;
 	slot->pci_bus = pci_bus;
@@ -179,8 +183,8 @@ static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot,
 
 	sn_generate_path(pci_bus, slot->physical_path);
 
-	slot->hotplug_slot = bss_hotplug_slot;
 	list_add(&slot->hp_list, &sn_hp_list);
+	*bss_hotplug_slot = &slot->hotplug_slot;
 
 	return 0;
 }
@@ -192,10 +196,9 @@ static struct hotplug_slot *sn_hp_destroy(void)
 	struct hotplug_slot *bss_hotplug_slot = NULL;
 
 	list_for_each_entry(slot, &sn_hp_list, hp_list) {
-		bss_hotplug_slot = slot->hotplug_slot;
+		bss_hotplug_slot = &slot->hotplug_slot;
 		pci_slot = bss_hotplug_slot->pci_slot;
-		list_del(&((struct slot *)bss_hotplug_slot->private)->
-			 hp_list);
+		list_del(&slot->hp_list);
 		sysfs_remove_file(&pci_slot->kobj,
 				  &sn_slot_path_attr.attr);
 		break;
@@ -227,7 +230,7 @@ static void sn_bus_free_data(struct pci_dev *dev)
 static int sn_slot_enable(struct hotplug_slot *bss_hotplug_slot,
 			  int device_num, char **ssdt)
 {
-	struct slot *slot = bss_hotplug_slot->private;
+	struct slot *slot = to_slot(bss_hotplug_slot);
 	struct pcibus_info *pcibus_info;
 	struct pcibr_slot_enable_resp resp;
 	int rc;
@@ -267,7 +270,7 @@ static int sn_slot_enable(struct hotplug_slot *bss_hotplug_slot,
 static int sn_slot_disable(struct hotplug_slot *bss_hotplug_slot,
 			   int device_num, int action)
 {
-	struct slot *slot = bss_hotplug_slot->private;
+	struct slot *slot = to_slot(bss_hotplug_slot);
 	struct pcibus_info *pcibus_info;
 	struct pcibr_slot_disable_resp resp;
 	int rc;
@@ -323,7 +326,7 @@ static int sn_slot_disable(struct hotplug_slot *bss_hotplug_slot,
  */
 static int enable_slot(struct hotplug_slot *bss_hotplug_slot)
 {
-	struct slot *slot = bss_hotplug_slot->private;
+	struct slot *slot = to_slot(bss_hotplug_slot);
 	struct pci_bus *new_bus = NULL;
 	struct pci_dev *dev;
 	int num_funcs;
@@ -469,7 +472,7 @@ static int enable_slot(struct hotplug_slot *bss_hotplug_slot)
 
 static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
 {
-	struct slot *slot = bss_hotplug_slot->private;
+	struct slot *slot = to_slot(bss_hotplug_slot);
 	struct pci_dev *dev, *temp;
 	int rc;
 	acpi_handle ssdt_hdl = NULL;
@@ -571,7 +574,7 @@ static int disable_slot(struct hotplug_slot *bss_hotplug_slot)
 static inline int get_power_status(struct hotplug_slot *bss_hotplug_slot,
 				   u8 *value)
 {
-	struct slot *slot = bss_hotplug_slot->private;
+	struct slot *slot = to_slot(bss_hotplug_slot);
 	struct pcibus_info *pcibus_info;
 	u32 power;
 
@@ -585,8 +588,7 @@ static inline int get_power_status(struct hotplug_slot *bss_hotplug_slot,
 
 static void sn_release_slot(struct hotplug_slot *bss_hotplug_slot)
 {
-	kfree(bss_hotplug_slot->private);
-	kfree(bss_hotplug_slot);
+	kfree(to_slot(bss_hotplug_slot));
 }
 
 static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
@@ -606,14 +608,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
 		if (sn_pci_slot_valid(pci_bus, device) != 1)
 			continue;
 
-		bss_hotplug_slot = kzalloc(sizeof(*bss_hotplug_slot),
-					   GFP_KERNEL);
-		if (!bss_hotplug_slot) {
-			rc = -ENOMEM;
-			goto alloc_err;
-		}
-
-		if (sn_hp_slot_private_alloc(bss_hotplug_slot,
+		if (sn_hp_slot_private_alloc(&bss_hotplug_slot,
 					     pci_bus, device, name)) {
 			rc = -ENOMEM;
 			goto alloc_err;
@@ -628,7 +623,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus)
 		rc = sysfs_create_file(&pci_slot->kobj,
 				       &sn_slot_path_attr.attr);
 		if (rc)
-			goto register_err;
+			goto alloc_err;
 	}
 	pci_dbg(pci_bus->self, "Registered bus with hotplug\n");
 	return rc;
@@ -637,14 +632,11 @@ register_err:
 	pci_dbg(pci_bus->self, "bus failed to register with err = %d\n",
 		rc);
 
-alloc_err:
-	if (rc == -ENOMEM)
-		pci_dbg(pci_bus->self, "Memory allocation error\n");
-
 	/* destroy THIS element */
-	if (bss_hotplug_slot)
-		sn_release_slot(bss_hotplug_slot);
+	sn_hp_destroy();
+	sn_release_slot(bss_hotplug_slot);
 
+alloc_err:
 	/* destroy anything else on the list */
 	while ((bss_hotplug_slot = sn_hp_destroy())) {
 		pci_hp_deregister(bss_hotplug_slot);
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index a7bb816e6f25..f7f13ee5d06e 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -73,7 +73,7 @@ struct slot {
 	u8 pwr_save;
 	struct controller *ctrl;
 	const struct hpc_ops *hpc_ops;
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 	struct list_head	slot_list;
 	struct delayed_work work;	/* work for button event */
 	struct mutex lock;
@@ -171,7 +171,7 @@ int shpc_init(struct controller *ctrl, struct pci_dev *pdev);
 
 static inline const char *slot_name(struct slot *slot)
 {
-	return hotplug_slot_name(slot->hotplug_slot);
+	return hotplug_slot_name(&slot->hotplug_slot);
 }
 
 struct ctrl_reg {
@@ -209,7 +209,7 @@ enum ctrl_offsets {
 
 static inline struct slot *get_slot(struct hotplug_slot *hotplug_slot)
 {
-	return hotplug_slot->private;
+	return container_of(hotplug_slot, struct slot, hotplug_slot);
 }
 
 static inline struct slot *shpchp_find_slot(struct controller *ctrl, u8 device)
diff --git a/drivers/pci/hotplug/shpchp_core.c b/drivers/pci/hotplug/shpchp_core.c
index b7181b7e7b98..81a918d47895 100644
--- a/drivers/pci/hotplug/shpchp_core.c
+++ b/drivers/pci/hotplug/shpchp_core.c
@@ -76,12 +76,7 @@ static int init_slots(struct controller *ctrl)
 			goto error;
 		}
 
-		hotplug_slot = kzalloc(sizeof(*hotplug_slot), GFP_KERNEL);
-		if (!hotplug_slot) {
-			retval = -ENOMEM;
-			goto error_slot;
-		}
-		slot->hotplug_slot = hotplug_slot;
+		hotplug_slot = &slot->hotplug_slot;
 
 		slot->hp_slot = i;
 		slot->ctrl = ctrl;
@@ -93,14 +88,13 @@ static int init_slots(struct controller *ctrl)
 		slot->wq = alloc_workqueue("shpchp-%d", 0, 0, slot->number);
 		if (!slot->wq) {
 			retval = -ENOMEM;
-			goto error_hpslot;
+			goto error_slot;
 		}
 
 		mutex_init(&slot->lock);
 		INIT_DELAYED_WORK(&slot->work, shpchp_queue_pushbutton_work);
 
 		/* register this slot with the hotplug pci core */
-		hotplug_slot->private = slot;
 		snprintf(name, SLOT_NAME_SIZE, "%d", slot->number);
 		hotplug_slot->ops = &shpchp_hotplug_slot_ops;
 
@@ -108,7 +102,7 @@ static int init_slots(struct controller *ctrl)
 			 pci_domain_nr(ctrl->pci_dev->subordinate),
 			 slot->bus, slot->device, slot->hp_slot, slot->number,
 			 ctrl->slot_device_offset);
-		retval = pci_hp_register(slot->hotplug_slot,
+		retval = pci_hp_register(hotplug_slot,
 				ctrl->pci_dev->subordinate, slot->device, name);
 		if (retval) {
 			ctrl_err(ctrl, "pci_hp_register failed with error %d\n",
@@ -127,8 +121,6 @@ static int init_slots(struct controller *ctrl)
 	return 0;
 error_slotwq:
 	destroy_workqueue(slot->wq);
-error_hpslot:
-	kfree(hotplug_slot);
 error_slot:
 	kfree(slot);
 error:
@@ -143,8 +135,7 @@ void cleanup_slots(struct controller *ctrl)
 		list_del(&slot->slot_list);
 		cancel_delayed_work(&slot->work);
 		destroy_workqueue(slot->wq);
-		pci_hp_deregister(slot->hotplug_slot);
-		kfree(slot->hotplug_slot);
+		pci_hp_deregister(&slot->hotplug_slot);
 		kfree(slot);
 	}
 }
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 019b037319e3..93ee2d5466f8 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -254,7 +254,7 @@ struct asus_wmi {
 	int asus_hwmon_num_fans;
 	int asus_hwmon_pwm;
 
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 	struct mutex hotplug_lock;
 	struct mutex wmi_lock;
 	struct workqueue_struct *hotplug_workqueue;
@@ -753,7 +753,7 @@ static void asus_rfkill_hotplug(struct asus_wmi *asus)
 	if (asus->wlan.rfkill)
 		rfkill_set_sw_state(asus->wlan.rfkill, blocked);
 
-	if (asus->hotplug_slot) {
+	if (asus->hotplug_slot.ops) {
 		bus = pci_find_bus(0, 1);
 		if (!bus) {
 			pr_warn("Unable to find PCI bus 1?\n");
@@ -858,7 +858,8 @@ static void asus_unregister_rfkill_notifier(struct asus_wmi *asus, char *node)
 static int asus_get_adapter_status(struct hotplug_slot *hotplug_slot,
 				   u8 *value)
 {
-	struct asus_wmi *asus = hotplug_slot->private;
+	struct asus_wmi *asus = container_of(hotplug_slot,
+					     struct asus_wmi, hotplug_slot);
 	int result = asus_wmi_get_devstate_simple(asus, ASUS_WMI_DEVID_WLAN);
 
 	if (result < 0)
@@ -898,14 +899,9 @@ static int asus_setup_pci_hotplug(struct asus_wmi *asus)
 
 	INIT_WORK(&asus->hotplug_work, asus_hotplug_work);
 
-	asus->hotplug_slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
-	if (!asus->hotplug_slot)
-		goto error_slot;
+	asus->hotplug_slot.ops = &asus_hotplug_slot_ops;
 
-	asus->hotplug_slot->private = asus;
-	asus->hotplug_slot->ops = &asus_hotplug_slot_ops;
-
-	ret = pci_hp_register(asus->hotplug_slot, bus, 0, "asus-wifi");
+	ret = pci_hp_register(&asus->hotplug_slot, bus, 0, "asus-wifi");
 	if (ret) {
 		pr_err("Unable to register hotplug slot - %d\n", ret);
 		goto error_register;
@@ -914,9 +910,7 @@ static int asus_setup_pci_hotplug(struct asus_wmi *asus)
 	return 0;
 
 error_register:
-	kfree(asus->hotplug_slot);
-	asus->hotplug_slot = NULL;
-error_slot:
+	asus->hotplug_slot.ops = NULL;
 	destroy_workqueue(asus->hotplug_workqueue);
 error_workqueue:
 	return ret;
@@ -1044,10 +1038,8 @@ static void asus_wmi_rfkill_exit(struct asus_wmi *asus)
 	 * asus_unregister_rfkill_notifier()
 	 */
 	asus_rfkill_hotplug(asus);
-	if (asus->hotplug_slot) {
-		pci_hp_deregister(asus->hotplug_slot);
-		kfree(asus->hotplug_slot);
-	}
+	if (asus->hotplug_slot.ops)
+		pci_hp_deregister(&asus->hotplug_slot);
 	if (asus->hotplug_workqueue)
 		destroy_workqueue(asus->hotplug_workqueue);
 
diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 028b20f82962..e6946a9beb5a 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c
@@ -177,7 +177,7 @@ struct eeepc_laptop {
 	struct rfkill *wwan3g_rfkill;
 	struct rfkill *wimax_rfkill;
 
-	struct hotplug_slot *hotplug_slot;
+	struct hotplug_slot hotplug_slot;
 	struct mutex hotplug_lock;
 
 	struct led_classdev tpd_led;
@@ -582,7 +582,7 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle)
 	mutex_lock(&eeepc->hotplug_lock);
 	pci_lock_rescan_remove();
 
-	if (!eeepc->hotplug_slot)
+	if (!eeepc->hotplug_slot.ops)
 		goto out_unlock;
 
 	port = acpi_get_pci_dev(handle);
@@ -715,8 +715,11 @@ static void eeepc_unregister_rfkill_notifier(struct eeepc_laptop *eeepc,
 static int eeepc_get_adapter_status(struct hotplug_slot *hotplug_slot,
 				    u8 *value)
 {
-	struct eeepc_laptop *eeepc = hotplug_slot->private;
-	int val = get_acpi(eeepc, CM_ASL_WLAN);
+	struct eeepc_laptop *eeepc;
+	int val;
+
+	eeepc = container_of(hotplug_slot, struct eeepc_laptop, hotplug_slot);
+	val = get_acpi(eeepc, CM_ASL_WLAN);
 
 	if (val == 1 || val == 0)
 		*value = val;
@@ -741,14 +744,9 @@ static int eeepc_setup_pci_hotplug(struct eeepc_laptop *eeepc)
 		return -ENODEV;
 	}
 
-	eeepc->hotplug_slot = kzalloc(sizeof(struct hotplug_slot), GFP_KERNEL);
-	if (!eeepc->hotplug_slot)
-		goto error_slot;
+	eeepc->hotplug_slot.ops = &eeepc_hotplug_slot_ops;
 
-	eeepc->hotplug_slot->private = eeepc;
-	eeepc->hotplug_slot->ops = &eeepc_hotplug_slot_ops;
-
-	ret = pci_hp_register(eeepc->hotplug_slot, bus, 0, "eeepc-wifi");
+	ret = pci_hp_register(&eeepc->hotplug_slot, bus, 0, "eeepc-wifi");
 	if (ret) {
 		pr_err("Unable to register hotplug slot - %d\n", ret);
 		goto error_register;
@@ -757,9 +755,7 @@ static int eeepc_setup_pci_hotplug(struct eeepc_laptop *eeepc)
 	return 0;
 
 error_register:
-	kfree(eeepc->hotplug_slot);
-	eeepc->hotplug_slot = NULL;
-error_slot:
+	eeepc->hotplug_slot.ops = NULL;
 	return ret;
 }
 
@@ -820,10 +816,8 @@ static void eeepc_rfkill_exit(struct eeepc_laptop *eeepc)
 		eeepc->wlan_rfkill = NULL;
 	}
 
-	if (eeepc->hotplug_slot) {
-		pci_hp_deregister(eeepc->hotplug_slot);
-		kfree(eeepc->hotplug_slot);
-	}
+	if (eeepc->hotplug_slot.ops)
+		pci_hp_deregister(&eeepc->hotplug_slot);
 
 	if (eeepc->bluetooth_rfkill) {
 		rfkill_unregister(eeepc->bluetooth_rfkill);
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 6f07a4e1de8d..7acc9f91e72b 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -50,14 +50,11 @@ struct hotplug_slot_ops {
 /**
  * struct hotplug_slot - used to register a physical slot with the hotplug pci core
  * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot
- * @private: used by the hotplug pci controller driver to store whatever it
- * needs.
  * @owner: The module owner of this structure
  * @mod_name: The module name (KBUILD_MODNAME) of this structure
  */
 struct hotplug_slot {
 	const struct hotplug_slot_ops	*ops;
-	void				*private;
 
 	/* Variables below this are for use only by the hotplug pci core. */
 	struct list_head		slot_list;
-- 
cgit v1.2.3


From 8fce3331702316d4bcfeb0771c09ac75d2192bbc Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Mon, 17 Sep 2018 09:22:56 +0100
Subject: net: stmmac: Rework coalesce timer and fix multi-queue races

This follows David Miller advice and tries to fix coalesce timer in
multi-queue scenarios.

We are now using per-queue coalesce values and per-queue TX timer.

Coalesce timer default values was changed to 1ms and the coalesce frames
to 25.

Tested in B2B setup between XGMAC2 and GMAC5.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Fixes: 	ce736788e8a ("net: stmmac: adding multiple buffers for TX")
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Neil Armstrong <narmstrong@baylibre.com>
Cc: Jerome Brunet <jbrunet@baylibre.com>
Cc: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |   4 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  14 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 233 ++++++++++++----------
 include/linux/stmmac.h                            |   1 +
 4 files changed, 146 insertions(+), 106 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 1854f270ad66..b1b305f8f414 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -258,10 +258,10 @@ struct stmmac_safety_stats {
 #define MAX_DMA_RIWT		0xff
 #define MIN_DMA_RIWT		0x20
 /* Tx coalesce parameters */
-#define STMMAC_COAL_TX_TIMER	40000
+#define STMMAC_COAL_TX_TIMER	1000
 #define STMMAC_MAX_COAL_TX_TICK	100000
 #define STMMAC_TX_MAX_FRAMES	256
-#define STMMAC_TX_FRAMES	64
+#define STMMAC_TX_FRAMES	25
 
 /* Packets types */
 enum packets_types {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index c0a855b7ab3b..63e1064b27a2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -48,6 +48,8 @@ struct stmmac_tx_info {
 
 /* Frequently used values are kept adjacent for cache effect */
 struct stmmac_tx_queue {
+	u32 tx_count_frames;
+	struct timer_list txtimer;
 	u32 queue_index;
 	struct stmmac_priv *priv_data;
 	struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
@@ -73,7 +75,14 @@ struct stmmac_rx_queue {
 	u32 rx_zeroc_thresh;
 	dma_addr_t dma_rx_phy;
 	u32 rx_tail_addr;
+};
+
+struct stmmac_channel {
 	struct napi_struct napi ____cacheline_aligned_in_smp;
+	struct stmmac_priv *priv_data;
+	u32 index;
+	int has_rx;
+	int has_tx;
 };
 
 struct stmmac_tc_entry {
@@ -109,14 +118,12 @@ struct stmmac_pps_cfg {
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
-	u32 tx_count_frames;
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
 
 	int tx_coalesce;
 	int hwts_tx_en;
 	bool tx_path_in_lpi_mode;
-	struct timer_list txtimer;
 	bool tso;
 
 	unsigned int dma_buf_sz;
@@ -137,6 +144,9 @@ struct stmmac_priv {
 	/* TX Queue */
 	struct stmmac_tx_queue tx_queue[MTL_MAX_TX_QUEUES];
 
+	/* Generic channel for NAPI */
+	struct stmmac_channel channel[STMMAC_CH_MAX];
+
 	bool oldlink;
 	int speed;
 	int oldduplex;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9f458bb16f2a..ab9cc0143ff2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -148,12 +148,14 @@ static void stmmac_verify_args(void)
 static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 {
 	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+	u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
 	u32 queue;
 
-	for (queue = 0; queue < rx_queues_cnt; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
 
-		napi_disable(&rx_q->napi);
+		napi_disable(&ch->napi);
 	}
 }
 
@@ -164,12 +166,14 @@ static void stmmac_disable_all_queues(struct stmmac_priv *priv)
 static void stmmac_enable_all_queues(struct stmmac_priv *priv)
 {
 	u32 rx_queues_cnt = priv->plat->rx_queues_to_use;
+	u32 tx_queues_cnt = priv->plat->tx_queues_to_use;
+	u32 maxq = max(rx_queues_cnt, tx_queues_cnt);
 	u32 queue;
 
-	for (queue = 0; queue < rx_queues_cnt; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
 
-		napi_enable(&rx_q->napi);
+		napi_enable(&ch->napi);
 	}
 }
 
@@ -1843,18 +1847,18 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
  * @queue: TX queue index
  * Description: it reclaims the transmit resources after transmission completes.
  */
-static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
+static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
 {
 	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
 	unsigned int bytes_compl = 0, pkts_compl = 0;
-	unsigned int entry;
+	unsigned int entry, count = 0;
 
-	netif_tx_lock(priv->dev);
+	__netif_tx_lock_bh(netdev_get_tx_queue(priv->dev, queue));
 
 	priv->xstats.tx_clean++;
 
 	entry = tx_q->dirty_tx;
-	while (entry != tx_q->cur_tx) {
+	while ((entry != tx_q->cur_tx) && (count < budget)) {
 		struct sk_buff *skb = tx_q->tx_skbuff[entry];
 		struct dma_desc *p;
 		int status;
@@ -1870,6 +1874,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		if (unlikely(status & tx_dma_own))
 			break;
 
+		count++;
+
 		/* Make sure descriptor fields are read after reading
 		 * the own bit.
 		 */
@@ -1937,7 +1943,10 @@ static void stmmac_tx_clean(struct stmmac_priv *priv, u32 queue)
 		stmmac_enable_eee_mode(priv);
 		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
 	}
-	netif_tx_unlock(priv->dev);
+
+	__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
+
+	return count;
 }
 
 /**
@@ -2020,6 +2029,33 @@ static bool stmmac_safety_feat_interrupt(struct stmmac_priv *priv)
 	return false;
 }
 
+static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
+{
+	int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
+						 &priv->xstats, chan);
+	struct stmmac_channel *ch = &priv->channel[chan];
+	bool needs_work = false;
+
+	if ((status & handle_rx) && ch->has_rx) {
+		needs_work = true;
+	} else {
+		status &= ~handle_rx;
+	}
+
+	if ((status & handle_tx) && ch->has_tx) {
+		needs_work = true;
+	} else {
+		status &= ~handle_tx;
+	}
+
+	if (needs_work && napi_schedule_prep(&ch->napi)) {
+		stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+		__napi_schedule(&ch->napi);
+	}
+
+	return status;
+}
+
 /**
  * stmmac_dma_interrupt - DMA ISR
  * @priv: driver private structure
@@ -2034,57 +2070,14 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	u32 channels_to_check = tx_channel_count > rx_channel_count ?
 				tx_channel_count : rx_channel_count;
 	u32 chan;
-	bool poll_scheduled = false;
 	int status[max_t(u32, MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES)];
 
 	/* Make sure we never check beyond our status buffer. */
 	if (WARN_ON_ONCE(channels_to_check > ARRAY_SIZE(status)))
 		channels_to_check = ARRAY_SIZE(status);
 
-	/* Each DMA channel can be used for rx and tx simultaneously, yet
-	 * napi_struct is embedded in struct stmmac_rx_queue rather than in a
-	 * stmmac_channel struct.
-	 * Because of this, stmmac_poll currently checks (and possibly wakes)
-	 * all tx queues rather than just a single tx queue.
-	 */
 	for (chan = 0; chan < channels_to_check; chan++)
-		status[chan] = stmmac_dma_interrupt_status(priv, priv->ioaddr,
-				&priv->xstats, chan);
-
-	for (chan = 0; chan < rx_channel_count; chan++) {
-		if (likely(status[chan] & handle_rx)) {
-			struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
-
-			if (likely(napi_schedule_prep(&rx_q->napi))) {
-				stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
-				__napi_schedule(&rx_q->napi);
-				poll_scheduled = true;
-			}
-		}
-	}
-
-	/* If we scheduled poll, we already know that tx queues will be checked.
-	 * If we didn't schedule poll, see if any DMA channel (used by tx) has a
-	 * completed transmission, if so, call stmmac_poll (once).
-	 */
-	if (!poll_scheduled) {
-		for (chan = 0; chan < tx_channel_count; chan++) {
-			if (status[chan] & handle_tx) {
-				/* It doesn't matter what rx queue we choose
-				 * here. We use 0 since it always exists.
-				 */
-				struct stmmac_rx_queue *rx_q =
-					&priv->rx_queue[0];
-
-				if (likely(napi_schedule_prep(&rx_q->napi))) {
-					stmmac_disable_dma_irq(priv,
-							priv->ioaddr, chan);
-					__napi_schedule(&rx_q->napi);
-				}
-				break;
-			}
-		}
-	}
+		status[chan] = stmmac_napi_check(priv, chan);
 
 	for (chan = 0; chan < tx_channel_count; chan++) {
 		if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
@@ -2233,6 +2226,13 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 	return ret;
 }
 
+static void stmmac_tx_timer_arm(struct stmmac_priv *priv, u32 queue)
+{
+	struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue];
+
+	mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer));
+}
+
 /**
  * stmmac_tx_timer - mitigation sw timer for tx.
  * @data: data pointer
@@ -2241,13 +2241,14 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
  */
 static void stmmac_tx_timer(struct timer_list *t)
 {
-	struct stmmac_priv *priv = from_timer(priv, t, txtimer);
-	u32 tx_queues_count = priv->plat->tx_queues_to_use;
-	u32 queue;
+	struct stmmac_tx_queue *tx_q = from_timer(tx_q, t, txtimer);
+	struct stmmac_priv *priv = tx_q->priv_data;
+	struct stmmac_channel *ch;
+
+	ch = &priv->channel[tx_q->queue_index];
 
-	/* let's scan all the tx queues */
-	for (queue = 0; queue < tx_queues_count; queue++)
-		stmmac_tx_clean(priv, queue);
+	if (likely(napi_schedule_prep(&ch->napi)))
+		__napi_schedule(&ch->napi);
 }
 
 /**
@@ -2260,11 +2261,17 @@ static void stmmac_tx_timer(struct timer_list *t)
  */
 static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
 {
+	u32 tx_channel_count = priv->plat->tx_queues_to_use;
+	u32 chan;
+
 	priv->tx_coal_frames = STMMAC_TX_FRAMES;
 	priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
-	timer_setup(&priv->txtimer, stmmac_tx_timer, 0);
-	priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
-	add_timer(&priv->txtimer);
+
+	for (chan = 0; chan < tx_channel_count; chan++) {
+		struct stmmac_tx_queue *tx_q = &priv->tx_queue[chan];
+
+		timer_setup(&tx_q->txtimer, stmmac_tx_timer, 0);
+	}
 }
 
 static void stmmac_set_rings_length(struct stmmac_priv *priv)
@@ -2592,6 +2599,7 @@ static void stmmac_hw_teardown(struct net_device *dev)
 static int stmmac_open(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 chan;
 	int ret;
 
 	stmmac_check_ether_addr(priv);
@@ -2688,7 +2696,9 @@ irq_error:
 	if (dev->phydev)
 		phy_stop(dev->phydev);
 
-	del_timer_sync(&priv->txtimer);
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+		del_timer_sync(&priv->tx_queue[chan].txtimer);
+
 	stmmac_hw_teardown(dev);
 init_error:
 	free_dma_desc_resources(priv);
@@ -2708,6 +2718,7 @@ dma_desc_error:
 static int stmmac_release(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
+	u32 chan;
 
 	if (priv->eee_enabled)
 		del_timer_sync(&priv->eee_ctrl_timer);
@@ -2722,7 +2733,8 @@ static int stmmac_release(struct net_device *dev)
 
 	stmmac_disable_all_queues(priv);
 
-	del_timer_sync(&priv->txtimer);
+	for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++)
+		del_timer_sync(&priv->tx_queue[chan].txtimer);
 
 	/* Free the IRQ lines */
 	free_irq(dev->irq, dev);
@@ -2936,14 +2948,13 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->xstats.tx_tso_nfrags += nfrags;
 
 	/* Manage tx mitigation */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	} else {
-		priv->tx_count_frames = 0;
+	tx_q->tx_count_frames += nfrags + 1;
+	if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
+		tx_q->tx_count_frames = 0;
+	} else {
+		stmmac_tx_timer_arm(priv, queue);
 	}
 
 	skb_tx_timestamp(skb);
@@ -3146,14 +3157,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * This approach takes care about the fragments: desc is the first
 	 * element in case of no SG.
 	 */
-	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
-		mod_timer(&priv->txtimer,
-			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-	} else {
-		priv->tx_count_frames = 0;
+	tx_q->tx_count_frames += nfrags + 1;
+	if (priv->tx_coal_frames <= tx_q->tx_count_frames) {
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
+		tx_q->tx_count_frames = 0;
+	} else {
+		stmmac_tx_timer_arm(priv, queue);
 	}
 
 	skb_tx_timestamp(skb);
@@ -3199,6 +3209,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len);
 
 	stmmac_enable_dma_transmission(priv, priv->ioaddr);
+
 	stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
 
 	return NETDEV_TX_OK;
@@ -3319,6 +3330,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
 static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 {
 	struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	struct stmmac_channel *ch = &priv->channel[queue];
 	unsigned int entry = rx_q->cur_rx;
 	int coe = priv->hw->rx_csum;
 	unsigned int next_entry;
@@ -3491,7 +3503,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
 			else
 				skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-			napi_gro_receive(&rx_q->napi, skb);
+			napi_gro_receive(&ch->napi, skb);
 
 			priv->dev->stats.rx_packets++;
 			priv->dev->stats.rx_bytes += frame_len;
@@ -3514,27 +3526,33 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
  *  Description :
  *  To look at the incoming frames and clear the tx resources.
  */
-static int stmmac_poll(struct napi_struct *napi, int budget)
+static int stmmac_napi_poll(struct napi_struct *napi, int budget)
 {
-	struct stmmac_rx_queue *rx_q =
-		container_of(napi, struct stmmac_rx_queue, napi);
-	struct stmmac_priv *priv = rx_q->priv_data;
-	u32 tx_count = priv->plat->tx_queues_to_use;
-	u32 chan = rx_q->queue_index;
-	int work_done = 0;
-	u32 queue;
+	struct stmmac_channel *ch =
+		container_of(napi, struct stmmac_channel, napi);
+	struct stmmac_priv *priv = ch->priv_data;
+	int work_done = 0, work_rem = budget;
+	u32 chan = ch->index;
 
 	priv->xstats.napi_poll++;
 
-	/* check all the queues */
-	for (queue = 0; queue < tx_count; queue++)
-		stmmac_tx_clean(priv, queue);
+	if (ch->has_tx) {
+		int done = stmmac_tx_clean(priv, work_rem, chan);
 
-	work_done = stmmac_rx(priv, budget, rx_q->queue_index);
-	if (work_done < budget) {
-		napi_complete_done(napi, work_done);
-		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+		work_done += done;
+		work_rem -= done;
+	}
+
+	if (ch->has_rx) {
+		int done = stmmac_rx(priv, work_rem, chan);
+
+		work_done += done;
+		work_rem -= done;
 	}
+
+	if (work_done < budget && napi_complete_done(napi, work_done))
+		stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+
 	return work_done;
 }
 
@@ -4198,8 +4216,8 @@ int stmmac_dvr_probe(struct device *device,
 {
 	struct net_device *ndev = NULL;
 	struct stmmac_priv *priv;
+	u32 queue, maxq;
 	int ret = 0;
-	u32 queue;
 
 	ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
 				  MTL_MAX_TX_QUEUES,
@@ -4322,11 +4340,22 @@ int stmmac_dvr_probe(struct device *device,
 			 "Enable RX Mitigation via HW Watchdog Timer\n");
 	}
 
-	for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	/* Setup channels NAPI */
+	maxq = max(priv->plat->rx_queues_to_use, priv->plat->tx_queues_to_use);
 
-		netif_napi_add(ndev, &rx_q->napi, stmmac_poll,
-			       (8 * priv->plat->rx_queues_to_use));
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
+
+		ch->priv_data = priv;
+		ch->index = queue;
+
+		if (queue < priv->plat->rx_queues_to_use)
+			ch->has_rx = true;
+		if (queue < priv->plat->tx_queues_to_use)
+			ch->has_tx = true;
+
+		netif_napi_add(ndev, &ch->napi, stmmac_napi_poll,
+			       NAPI_POLL_WEIGHT);
 	}
 
 	mutex_init(&priv->lock);
@@ -4372,10 +4401,10 @@ error_netdev_register:
 	    priv->hw->pcs != STMMAC_PCS_RTBI)
 		stmmac_mdio_unregister(ndev);
 error_mdio_register:
-	for (queue = 0; queue < priv->plat->rx_queues_to_use; queue++) {
-		struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+	for (queue = 0; queue < maxq; queue++) {
+		struct stmmac_channel *ch = &priv->channel[queue];
 
-		netif_napi_del(&rx_q->napi);
+		netif_napi_del(&ch->napi);
 	}
 error_hw_init:
 	destroy_workqueue(priv->wq);
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index c43e9a01b892..7ddfc65586b0 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -30,6 +30,7 @@
 
 #define MTL_MAX_RX_QUEUES	8
 #define MTL_MAX_TX_QUEUES	8
+#define STMMAC_CH_MAX		8
 
 #define STMMAC_RX_COE_NONE	0
 #define STMMAC_RX_COE_TYPE1	1
-- 
cgit v1.2.3


From 14d73416792afa84f6a7245ee474d2432069da56 Mon Sep 17 00:00:00 2001
From: Li RongQing <lirongqing@baidu.com>
Date: Mon, 17 Sep 2018 18:46:55 +0800
Subject: veth: rename pcpu_vstats as pcpu_lstats

struct pcpu_vstats and pcpu_lstats have same members and
usage, and pcpu_lstats is used in many files, so rename
pcpu_vstats as pcpu_lstats to reduce duplicate definition

Signed-off-by: Zhang Yu <zhangyu31@baidu.com>
Signed-off-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/veth.c        | 22 ++++++++--------------
 include/linux/netdevice.h |  1 -
 2 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 8fc64b67f01e..224c56a4e2b1 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -37,12 +37,6 @@
 #define VETH_XDP_TX		BIT(0)
 #define VETH_XDP_REDIR		BIT(1)
 
-struct pcpu_vstats {
-	u64			packets;
-	u64			bytes;
-	struct u64_stats_sync	syncp;
-};
-
 struct veth_rq {
 	struct napi_struct	xdp_napi;
 	struct net_device	*dev;
@@ -217,7 +211,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	skb_tx_timestamp(skb);
 	if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
-		struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
+		struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats);
 
 		u64_stats_update_begin(&stats->syncp);
 		stats->bytes += length;
@@ -236,7 +230,7 @@ drop:
 	return NETDEV_TX_OK;
 }
 
-static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
+static u64 veth_stats_one(struct pcpu_lstats *result, struct net_device *dev)
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	int cpu;
@@ -244,7 +238,7 @@ static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
 	result->packets = 0;
 	result->bytes = 0;
 	for_each_possible_cpu(cpu) {
-		struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
+		struct pcpu_lstats *stats = per_cpu_ptr(dev->lstats, cpu);
 		u64 packets, bytes;
 		unsigned int start;
 
@@ -264,7 +258,7 @@ static void veth_get_stats64(struct net_device *dev,
 {
 	struct veth_priv *priv = netdev_priv(dev);
 	struct net_device *peer;
-	struct pcpu_vstats one;
+	struct pcpu_lstats one;
 
 	tot->tx_dropped = veth_stats_one(&one, dev);
 	tot->tx_bytes = one.bytes;
@@ -830,13 +824,13 @@ static int veth_dev_init(struct net_device *dev)
 {
 	int err;
 
-	dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats);
-	if (!dev->vstats)
+	dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
+	if (!dev->lstats)
 		return -ENOMEM;
 
 	err = veth_alloc_queues(dev);
 	if (err) {
-		free_percpu(dev->vstats);
+		free_percpu(dev->lstats);
 		return err;
 	}
 
@@ -846,7 +840,7 @@ static int veth_dev_init(struct net_device *dev)
 static void veth_dev_free(struct net_device *dev)
 {
 	veth_free_queues(dev);
-	free_percpu(dev->vstats);
+	free_percpu(dev->lstats);
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index baed5d5088c5..1cbbf77a685f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2000,7 +2000,6 @@ struct net_device {
 		struct pcpu_lstats __percpu		*lstats;
 		struct pcpu_sw_netstats __percpu	*tstats;
 		struct pcpu_dstats __percpu		*dstats;
-		struct pcpu_vstats __percpu		*vstats;
 	};
 
 #if IS_ENABLED(CONFIG_GARP)
-- 
cgit v1.2.3


From efc463adbccf709c5dbaf6cfbf84b7e94b62810a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 16 Apr 2018 14:18:26 -0500
Subject: signal: Simplify tracehook_report_syscall_exit

Replace user_single_step_siginfo with user_single_step_report
that allocates siginfo structure on the stack and sends it.

This allows tracehook_report_syscall_exit to become a simple
if statement that calls user_single_step_report or ptrace_report_syscall
depending on the value of step.

Update the default helper function now called user_single_step_report
to explicitly set si_code to SI_USER and to set si_uid and si_pid to 0.
The default helper has always been doing this (using memset) but it
was far from obvious.

The powerpc helper can now just call force_sig_fault.
The x86 helper can now just call send_sigtrap.

Unfortunately the default implementation of user_single_step_report
can not use force_sig_fault as it does not use a SIGTRAP si_code.
So it has to carefully setup the siginfo and use use force_sig_info.

The net result is code that is easier to understand and simpler
to maintain.

Ref: 85ec7fd9f8e5 ("ptrace: introduce user_single_step_siginfo() helper")
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/powerpc/include/asm/ptrace.h |  2 +-
 arch/powerpc/kernel/traps.c       |  7 ++-----
 arch/x86/include/asm/ptrace.h     |  2 +-
 arch/x86/kernel/ptrace.c          | 11 +++++------
 include/linux/ptrace.h            | 17 +++++++++++------
 include/linux/tracehook.h         | 13 ++++---------
 6 files changed, 24 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 447cbd1bee99..5b480e1d5909 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -149,7 +149,7 @@ do {									      \
 
 #define arch_has_single_step()	(1)
 #define arch_has_block_step()	(!cpu_has_feature(CPU_FTR_601))
-#define ARCH_HAS_USER_SINGLE_STEP_INFO
+#define ARCH_HAS_USER_SINGLE_STEP_REPORT
 
 /*
  * kprobe-based event tracer support
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index c85adb858271..f651fa91cdc9 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -307,12 +307,9 @@ void die(const char *str, struct pt_regs *regs, long err)
 }
 NOKPROBE_SYMBOL(die);
 
-void user_single_step_siginfo(struct task_struct *tsk,
-				struct pt_regs *regs, siginfo_t *info)
+void user_single_step_report(struct pt_regs *regs)
 {
-	info->si_signo = SIGTRAP;
-	info->si_code = TRAP_TRACE;
-	info->si_addr = (void __user *)regs->nip;
+	force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)regs->nip, current);
 }
 
 static void show_signal_msg(int signr, struct pt_regs *regs, int code,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 6de1fd3d0097..e353f08b7fe2 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -263,7 +263,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
 #define arch_has_block_step()	(boot_cpu_data.x86 >= 6)
 #endif
 
-#define ARCH_HAS_USER_SINGLE_STEP_INFO
+#define ARCH_HAS_USER_SINGLE_STEP_REPORT
 
 /*
  * When hitting ptrace_stop(), we cannot return using SYSRET because
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e2ee403865eb..94bd6e89129a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1382,12 +1382,6 @@ static void fill_sigtrap_info(struct task_struct *tsk,
 	info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
 }
 
-void user_single_step_siginfo(struct task_struct *tsk,
-				struct pt_regs *regs,
-				struct siginfo *info)
-{
-	fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info);
-}
 
 void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
 					 int error_code, int si_code)
@@ -1399,3 +1393,8 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
 	/* Send us the fake SIGTRAP */
 	force_sig_info(SIGTRAP, &info, tsk);
 }
+
+void user_single_step_report(struct pt_regs *regs)
+{
+	send_sigtrap(current, regs, 0, TRAP_BRKPT);
+}
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 4f36431c380b..1de2235511c8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -336,14 +336,19 @@ static inline void user_enable_block_step(struct task_struct *task)
 extern void user_enable_block_step(struct task_struct *);
 #endif	/* arch_has_block_step */
 
-#ifdef ARCH_HAS_USER_SINGLE_STEP_INFO
-extern void user_single_step_siginfo(struct task_struct *tsk,
-				struct pt_regs *regs, siginfo_t *info);
+#ifdef ARCH_HAS_USER_SINGLE_STEP_REPORT
+extern void user_single_step_report(struct pt_regs *regs);
 #else
-static inline void user_single_step_siginfo(struct task_struct *tsk,
-				struct pt_regs *regs, siginfo_t *info)
+static inline void user_single_step_report(struct pt_regs *regs)
 {
-	info->si_signo = SIGTRAP;
+	siginfo_t info;
+	clear_siginfo(&info);
+	info.si_signo = SIGTRAP;
+	info.si_errno = 0;
+	info.si_code = SI_USER;
+	info.si_pid = 0;
+	info.si_uid = 0;
+	force_sig_info(info.si_signo, &info, current);
 }
 #endif
 
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 05589a3e37f4..40b0b4c1bf7b 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -123,15 +123,10 @@ static inline __must_check int tracehook_report_syscall_entry(
  */
 static inline void tracehook_report_syscall_exit(struct pt_regs *regs, int step)
 {
-	if (step) {
-		siginfo_t info;
-		clear_siginfo(&info);
-		user_single_step_siginfo(current, regs, &info);
-		force_sig_info(SIGTRAP, &info, current);
-		return;
-	}
-
-	ptrace_report_syscall(regs);
+	if (step)
+		user_single_step_report(regs);
+	else
+		ptrace_report_syscall(regs);
 }
 
 /**
-- 
cgit v1.2.3


From 2dfd184abd38fd72d80715fa8b00c9de78490200 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 18 Sep 2018 16:20:18 -0400
Subject: flow_dissector: fix build failure without CONFIG_NET

If boolean CONFIG_BPF_SYSCALL is enabled, kernel/bpf/syscall.c will
call flow_dissector functions from net/core/flow_dissector.c.

This causes this build failure if CONFIG_NET is disabled:

    kernel/bpf/syscall.o: In function `__x64_sys_bpf':
    syscall.c:(.text+0x3278): undefined reference to
    `skb_flow_dissector_bpf_prog_attach'
    syscall.c:(.text+0x3310): undefined reference to
    `skb_flow_dissector_bpf_prog_detach'
    kernel/bpf/syscall.o:(.rodata+0x3f0): undefined reference to
    `flow_dissector_prog_ops'
    kernel/bpf/verifier.o:(.rodata+0x250): undefined reference to
    `flow_dissector_verifier_ops'

Analogous to other optional BPF program types in syscall.c, add stubs
if the relevant functions are not compiled and move the BPF_PROG_TYPE
definition in the #ifdef CONFIG_NET block.

Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook")
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_types.h |  2 +-
 include/linux/skbuff.h    | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 22083712dd18..c9bd6fb765b0 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -16,6 +16,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg)
+BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
 #endif
 #ifdef CONFIG_BPF_EVENTS
 BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
@@ -32,7 +33,6 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #ifdef CONFIG_INET
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 #endif
-BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ce0e863f02a2..76be85ea392a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1194,10 +1194,23 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 			     const struct flow_dissector_key *key,
 			     unsigned int key_count);
 
+#ifdef CONFIG_NET
 int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
 				       struct bpf_prog *prog);
 
 int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
+#else
+static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+						     struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 
 bool __skb_flow_dissect(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-- 
cgit v1.2.3


From eb7c8743d6cf489e30091e6656fd4d3306621e9a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 5 Sep 2018 16:17:14 +0530
Subject: OPP: Pass index to _of_init_opp_table()

This is a preparatory patch required for the next commit which will
start using OPP table's node pointer in _of_init_opp_table(), which
requires the index in order to read the OPP table's phandle.

This commit adds the index argument in the call chains in order to get
it delivered to _of_init_opp_table().

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 19 +++++++++++++++----
 drivers/opp/of.c       | 12 +++++++-----
 drivers/opp/opp.h      |  4 ++--
 include/linux/pm_opp.h |  6 ++++++
 4 files changed, 30 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 9f8aa31265fe..332748adc262 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -785,7 +785,7 @@ struct opp_device *_add_opp_dev(const struct device *dev,
 	return opp_dev;
 }
 
-static struct opp_table *_allocate_opp_table(struct device *dev)
+static struct opp_table *_allocate_opp_table(struct device *dev, int index)
 {
 	struct opp_table *opp_table;
 	struct opp_device *opp_dev;
@@ -808,7 +808,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev)
 		return NULL;
 	}
 
-	_of_init_opp_table(opp_table, dev);
+	_of_init_opp_table(opp_table, dev, index);
 
 	/* Find clk for the device */
 	opp_table->clk = clk_get(dev, NULL);
@@ -833,7 +833,7 @@ void _get_opp_table_kref(struct opp_table *opp_table)
 	kref_get(&opp_table->kref);
 }
 
-struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
+static struct opp_table *_opp_get_opp_table(struct device *dev, int index)
 {
 	struct opp_table *opp_table;
 
@@ -844,15 +844,26 @@ struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
 	if (!IS_ERR(opp_table))
 		goto unlock;
 
-	opp_table = _allocate_opp_table(dev);
+	opp_table = _allocate_opp_table(dev, index);
 
 unlock:
 	mutex_unlock(&opp_table_lock);
 
 	return opp_table;
 }
+
+struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
+{
+	return _opp_get_opp_table(dev, 0);
+}
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_table);
 
+struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev,
+						   int index)
+{
+	return _opp_get_opp_table(dev, index);
+}
+
 static void _opp_table_kref_release(struct kref *kref)
 {
 	struct opp_table *opp_table = container_of(kref, struct opp_table, kref);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 86222586f27b..1a9e1242a2a7 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -52,7 +52,8 @@ static struct opp_table *_managed_opp(const struct device_node *np)
 	return managed_table;
 }
 
-void _of_init_opp_table(struct opp_table *opp_table, struct device *dev)
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
+			int index)
 {
 	struct device_node *np;
 
@@ -378,7 +379,8 @@ free_opp:
 }
 
 /* Initializes OPP tables based on new bindings */
-static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
+static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
+				int index)
 {
 	struct device_node *np;
 	struct opp_table *opp_table;
@@ -393,7 +395,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 		goto put_opp_table;
 	}
 
-	opp_table = dev_pm_opp_get_opp_table(dev);
+	opp_table = dev_pm_opp_get_opp_table_indexed(dev, index);
 	if (!opp_table)
 		return -ENOMEM;
 
@@ -526,7 +528,7 @@ int dev_pm_opp_of_add_table(struct device *dev)
 		return _of_add_opp_table_v1(dev);
 	}
 
-	ret = _of_add_opp_table_v2(dev, opp_np);
+	ret = _of_add_opp_table_v2(dev, opp_np, 0);
 	of_node_put(opp_np);
 
 	return ret;
@@ -574,7 +576,7 @@ again:
 		return -ENODEV;
 	}
 
-	ret = _of_add_opp_table_v2(dev, opp_np);
+	ret = _of_add_opp_table_v2(dev, opp_np, index);
 	of_node_put(opp_np);
 
 	return ret;
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 88e9f47aadf1..b235e76fc8cc 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -200,9 +200,9 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of, in
 struct opp_table *_add_opp_table(struct device *dev);
 
 #ifdef CONFIG_OF
-void _of_init_opp_table(struct opp_table *opp_table, struct device *dev);
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index);
 #else
-static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev) {}
+static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index) {}
 #endif
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 099b31960dec..5d399eeef172 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -79,6 +79,7 @@ struct dev_pm_set_opp_data {
 #if defined(CONFIG_PM_OPP)
 
 struct opp_table *dev_pm_opp_get_opp_table(struct device *dev);
+struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index);
 void dev_pm_opp_put_opp_table(struct opp_table *opp_table);
 
 unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
@@ -136,6 +137,11 @@ static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
 	return ERR_PTR(-ENOTSUPP);
 }
 
+static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
 static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {}
 
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
-- 
cgit v1.2.3


From 53dd9dce6979bc54d64a3a09a2fb20187a025be7 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Thu, 13 Sep 2018 15:16:22 -0500
Subject: libfdt: Ensure INT_MAX is defined in libfdt_env.h

The next update of libfdt has a new dependency on INT_MAX. Update the
instances of libfdt_env.h in the kernel to either include the necessary
header with the definition or define it locally.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxppc-dev@lists.ozlabs.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 arch/arm/boot/compressed/libfdt_env.h | 2 ++
 arch/powerpc/boot/libfdt_env.h        | 2 ++
 include/linux/libfdt_env.h            | 1 +
 3 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h
index 07437816e098..b36c0289a308 100644
--- a/arch/arm/boot/compressed/libfdt_env.h
+++ b/arch/arm/boot/compressed/libfdt_env.h
@@ -6,6 +6,8 @@
 #include <linux/string.h>
 #include <asm/byteorder.h>
 
+#define INT_MAX			((int)(~0U>>1))
+
 typedef __be16 fdt16_t;
 typedef __be32 fdt32_t;
 typedef __be64 fdt64_t;
diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h
index 2a0c8b1bf147..2abc8e83b95e 100644
--- a/arch/powerpc/boot/libfdt_env.h
+++ b/arch/powerpc/boot/libfdt_env.h
@@ -5,6 +5,8 @@
 #include <types.h>
 #include <string.h>
 
+#define INT_MAX			((int)(~0U>>1))
+
 #include "of.h"
 
 typedef unsigned long uintptr_t;
diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h
index c6ac1fe7ec68..edb0f0c30904 100644
--- a/include/linux/libfdt_env.h
+++ b/include/linux/libfdt_env.h
@@ -2,6 +2,7 @@
 #ifndef LIBFDT_ENV_H
 #define LIBFDT_ENV_H
 
+#include <linux/kernel.h>	/* For INT_MAX */
 #include <linux/string.h>
 
 #include <asm/byteorder.h>
-- 
cgit v1.2.3


From 822f312d47f0200dc0999c9f006fe94aa43bd0bd Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 12 Sep 2018 15:33:45 +0200
Subject: kvm: x86: make kvm_{load|put}_guest_fpu() static

The functions
	kvm_load_guest_fpu()
	kvm_put_guest_fpu()

are only used locally, make them static. This requires also that both
functions are moved because they are used before their implementation.
Those functions were exported (via EXPORT_SYMBOL) before commit
e5bb40251a920 ("KVM: Drop kvm_{load,put}_guest_fpu() exports").

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c       | 46 +++++++++++++++++++++++-----------------------
 include/linux/kvm_host.h |  2 --
 2 files changed, 23 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d0fda9056de..6f4789398876 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7835,6 +7835,29 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+/* Swap (qemu) user FPU context for the guest FPU context. */
+static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
+	/* PKRU is separately restored in kvm_x86_ops->run.  */
+	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
+				~XFEATURE_MASK_PKRU);
+	preempt_enable();
+	trace_kvm_fpu(1);
+}
+
+/* When vcpu_run ends, restore user space FPU context. */
+static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
+	copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+	preempt_enable();
+	++vcpu->stat.fpu_reload;
+	trace_kvm_fpu(0);
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -8412,29 +8435,6 @@ static void fx_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.cr0 |= X86_CR0_ET;
 }
 
-/* Swap (qemu) user FPU context for the guest FPU context. */
-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
-{
-	preempt_disable();
-	copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
-	/* PKRU is separately restored in kvm_x86_ops->run.  */
-	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
-				~XFEATURE_MASK_PKRU);
-	preempt_enable();
-	trace_kvm_fpu(1);
-}
-
-/* When vcpu_run ends, restore user space FPU context. */
-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
-{
-	preempt_disable();
-	copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
-	copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
-	preempt_enable();
-	++vcpu->stat.fpu_reload;
-	trace_kvm_fpu(0);
-}
-
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 0205aee44ded..c926698040e0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -733,8 +733,6 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool usermode_vcpu_not_eligible);
-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
-- 
cgit v1.2.3


From f3ecc0ff0457eae93503792c6fc35921fa8a6204 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 19 Aug 2018 14:53:20 +0200
Subject: dma-mapping: move the dma_coherent flag to struct device

Various architectures support both coherent and non-coherent dma on a
per-device basis.  Move the dma_noncoherent flag from the mips archdata
field to struct device proper to prepare the infrastructure for reuse on
other architectures.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Paul Burton <paul.burton@mips.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/Kconfig                     |  1 +
 arch/mips/include/asm/Kbuild          |  1 +
 arch/mips/include/asm/device.h        | 19 -------------------
 arch/mips/include/asm/dma-coherence.h |  6 ++++++
 arch/mips/include/asm/dma-mapping.h   |  2 +-
 arch/mips/mm/dma-noncoherent.c        | 32 ++++++--------------------------
 include/linux/device.h                |  7 +++++++
 include/linux/dma-noncoherent.h       | 16 ++++++++++++++++
 kernel/dma/Kconfig                    |  3 +++
 9 files changed, 41 insertions(+), 46 deletions(-)
 delete mode 100644 arch/mips/include/asm/device.h

(limited to 'include/linux')

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 0b25180028b8..54c52bd0d9d3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1106,6 +1106,7 @@ config ARCH_SUPPORTS_UPROBES
 	bool
 
 config DMA_MAYBE_COHERENT
+	select ARCH_HAS_DMA_COHERENCE_H
 	select DMA_NONCOHERENT
 	bool
 
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 58351e48421e..9a81e72119da 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -1,6 +1,7 @@
 # MIPS headers
 generic-(CONFIG_GENERIC_CSUM) += checksum.h
 generic-y += current.h
+generic-y += device.h
 generic-y += dma-contiguous.h
 generic-y += emergency-restart.h
 generic-y += export.h
diff --git a/arch/mips/include/asm/device.h b/arch/mips/include/asm/device.h
deleted file mode 100644
index 6aa796f1081a..000000000000
--- a/arch/mips/include/asm/device.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Arch specific extensions to struct device
- *
- * This file is released under the GPLv2
- */
-#ifndef _ASM_MIPS_DEVICE_H
-#define _ASM_MIPS_DEVICE_H
-
-struct dev_archdata {
-#ifdef CONFIG_DMA_PERDEV_COHERENT
-	/* Non-zero if DMA is coherent with CPU caches */
-	bool dma_coherent;
-#endif
-};
-
-struct pdev_archdata {
-};
-
-#endif /* _ASM_MIPS_DEVICE_H*/
diff --git a/arch/mips/include/asm/dma-coherence.h b/arch/mips/include/asm/dma-coherence.h
index 8eda48748ed5..5eaa1fcc878a 100644
--- a/arch/mips/include/asm/dma-coherence.h
+++ b/arch/mips/include/asm/dma-coherence.h
@@ -20,6 +20,12 @@ enum coherent_io_user_state {
 #elif defined(CONFIG_DMA_MAYBE_COHERENT)
 extern enum coherent_io_user_state coherentio;
 extern int hw_coherentio;
+
+static inline bool dev_is_dma_coherent(struct device *dev)
+{
+	return coherentio == IO_COHERENCE_ENABLED ||
+		(coherentio == IO_COHERENCE_DEFAULT && hw_coherentio);
+}
 #else
 #ifdef CONFIG_DMA_NONCOHERENT
 #define coherentio	IO_COHERENCE_DISABLED
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index e81c4e97ff1a..40d825c779de 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -25,7 +25,7 @@ static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
 				      bool coherent)
 {
 #ifdef CONFIG_DMA_PERDEV_COHERENT
-	dev->archdata.dma_coherent = coherent;
+	dev->dma_coherent = coherent;
 #endif
 }
 
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index 2aca1236af36..d408ac51f56c 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -14,26 +14,6 @@
 #include <asm/dma-coherence.h>
 #include <asm/io.h>
 
-#ifdef CONFIG_DMA_PERDEV_COHERENT
-static inline int dev_is_coherent(struct device *dev)
-{
-	return dev->archdata.dma_coherent;
-}
-#else
-static inline int dev_is_coherent(struct device *dev)
-{
-	switch (coherentio) {
-	default:
-	case IO_COHERENCE_DEFAULT:
-		return hw_coherentio;
-	case IO_COHERENCE_ENABLED:
-		return 1;
-	case IO_COHERENCE_DISABLED:
-		return 0;
-	}
-}
-#endif /* CONFIG_DMA_PERDEV_COHERENT */
-
 /*
  * The affected CPUs below in 'cpu_needs_post_dma_flush()' can speculatively
  * fill random cachelines with stale data at any time, requiring an extra
@@ -49,7 +29,7 @@ static inline int dev_is_coherent(struct device *dev)
  */
 static inline bool cpu_needs_post_dma_flush(struct device *dev)
 {
-	if (dev_is_coherent(dev))
+	if (dev_is_dma_coherent(dev))
 		return false;
 
 	switch (boot_cpu_type()) {
@@ -76,7 +56,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
 	if (!ret)
 		return NULL;
 
-	if (!dev_is_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
 		dma_cache_wback_inv((unsigned long) ret, size);
 		ret = (void *)UNCAC_ADDR(ret);
 	}
@@ -87,7 +67,7 @@ void *arch_dma_alloc(struct device *dev, size_t size,
 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs)
 {
-	if (!(attrs & DMA_ATTR_NON_CONSISTENT) && !dev_is_coherent(dev))
+	if (!(attrs & DMA_ATTR_NON_CONSISTENT) && !dev_is_dma_coherent(dev))
 		cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
 	dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
 }
@@ -103,7 +83,7 @@ int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	unsigned long pfn;
 	int ret = -ENXIO;
 
-	if (!dev_is_coherent(dev))
+	if (!dev_is_dma_coherent(dev))
 		addr = CAC_ADDR(addr);
 
 	pfn = page_to_pfn(virt_to_page((void *)addr));
@@ -187,7 +167,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size,
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 		size_t size, enum dma_data_direction dir)
 {
-	if (!dev_is_coherent(dev))
+	if (!dev_is_dma_coherent(dev))
 		dma_sync_phys(paddr, size, dir);
 }
 
@@ -203,6 +183,6 @@ void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 {
 	BUG_ON(direction == DMA_NONE);
 
-	if (!dev_is_coherent(dev))
+	if (!dev_is_dma_coherent(dev))
 		dma_sync_virt(vaddr, size, direction);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 8f882549edee..983506789402 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -927,6 +927,8 @@ struct dev_links_info {
  * @offline:	Set after successful invocation of bus type's .offline().
  * @of_node_reused: Set if the device-tree node is shared with an ancestor
  *              device.
+ * @dma_coherent: this particular device is dma coherent, even if the
+ *		architecture supports non-coherent devices.
  *
  * At the lowest level, every device in a Linux system is represented by an
  * instance of struct device. The device structure contains the information
@@ -1016,6 +1018,11 @@ struct device {
 	bool			offline_disabled:1;
 	bool			offline:1;
 	bool			of_node_reused:1;
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
+    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
+	bool			dma_coherent:1;
+#endif
 };
 
 static inline struct device *kobj_to_dev(struct kobject *kobj)
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index a0aa00cc909d..ce9732506ef4 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -4,6 +4,22 @@
 
 #include <linux/dma-mapping.h>
 
+#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H
+#include <asm/dma-coherence.h>
+#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
+	defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+	defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
+static inline bool dev_is_dma_coherent(struct device *dev)
+{
+	return dev->dma_coherent;
+}
+#else
+static inline bool dev_is_dma_coherent(struct device *dev)
+{
+	return true;
+}
+#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */
+
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs);
 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 1b1d63b3634b..79476749f196 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -13,6 +13,9 @@ config NEED_DMA_MAP_STATE
 config ARCH_DMA_ADDR_T_64BIT
 	def_bool 64BIT || PHYS_ADDR_T_64BIT
 
+config ARCH_HAS_DMA_COHERENCE_H
+	bool
+
 config HAVE_GENERIC_DMA_COHERENT
 	bool
 
-- 
cgit v1.2.3


From bc3ec75de5452db59b683487867ba562b950708a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 8 Sep 2018 11:22:43 +0200
Subject: dma-mapping: merge direct and noncoherent ops

All the cache maintainance is already stubbed out when not enabled,
but merging the two allows us to nicely handle the case where
cache maintainance is required for some devices, but not others.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts
---
 arch/arc/Kconfig                     |   2 +-
 arch/arc/mm/dma.c                    |  16 ++---
 arch/arm/mm/dma-mapping-nommu.c      |   5 +-
 arch/c6x/Kconfig                     |   2 +-
 arch/hexagon/Kconfig                 |   2 +-
 arch/m68k/Kconfig                    |   2 +-
 arch/microblaze/Kconfig              |   2 +-
 arch/mips/Kconfig                    |   1 -
 arch/mips/include/asm/dma-mapping.h  |   2 -
 arch/mips/jazz/jazzdma.c             |   6 +-
 arch/mips/mm/dma-noncoherent.c       |  29 +++------
 arch/nds32/Kconfig                   |   2 +-
 arch/nios2/Kconfig                   |   2 +-
 arch/openrisc/Kconfig                |   2 +-
 arch/parisc/Kconfig                  |   2 +-
 arch/parisc/kernel/setup.c           |   2 +-
 arch/sh/Kconfig                      |   3 +-
 arch/sparc/Kconfig                   |   2 +-
 arch/sparc/include/asm/dma-mapping.h |   4 +-
 arch/x86/kernel/amd_gart_64.c        |   6 +-
 arch/xtensa/Kconfig                  |   2 +-
 include/asm-generic/dma-mapping.h    |   9 ---
 include/linux/dma-direct.h           |   4 ++
 include/linux/dma-mapping.h          |   1 -
 include/linux/dma-noncoherent.h      |   5 --
 kernel/dma/Kconfig                   |   9 +--
 kernel/dma/Makefile                  |   1 -
 kernel/dma/direct.c                  | 121 +++++++++++++++++++++++++++++++++--
 kernel/dma/noncoherent.c             | 106 ------------------------------
 29 files changed, 160 insertions(+), 192 deletions(-)
 delete mode 100644 kernel/dma/noncoherent.c

(limited to 'include/linux')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index b4441b0764d7..ca03694d518a 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -17,7 +17,7 @@ config ARC
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select DMA_NONCOHERENT_MMAP
 	select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index c75d5c3470e3..535ed4a068ef 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -167,7 +167,7 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 }
 
 /*
- * Plug in coherent or noncoherent dma ops
+ * Plug in direct dma map ops.
  */
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
@@ -175,13 +175,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 	/*
 	 * IOC hardware snoops all DMA traffic keeping the caches consistent
 	 * with memory - eliding need for any explicit cache maintenance of
-	 * DMA buffers - so we can use dma_direct cache ops.
+	 * DMA buffers.
 	 */
-	if (is_isa_arcv2() && ioc_enable && coherent) {
-		set_dma_ops(dev, &dma_direct_ops);
-		dev_info(dev, "use dma_direct_ops cache ops\n");
-	} else {
-		set_dma_ops(dev, &dma_noncoherent_ops);
-		dev_info(dev, "use dma_noncoherent_ops cache ops\n");
-	}
+	if (is_isa_arcv2() && ioc_enable && coherent)
+		dev->dma_coherent = true;
+
+	dev_info(dev, "use %sncoherent DMA ops\n",
+		 dev->dma_coherent ? "" : "non");
 }
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index aa7aba302e76..0ad156f9985b 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -47,7 +47,8 @@ static void *arm_nommu_dma_alloc(struct device *dev, size_t size,
 	 */
 
 	if (attrs & DMA_ATTR_NON_CONSISTENT)
-		return dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+		return dma_direct_alloc_pages(dev, size, dma_handle, gfp,
+				attrs);
 
 	ret = dma_alloc_from_global_coherent(size, dma_handle);
 
@@ -70,7 +71,7 @@ static void arm_nommu_dma_free(struct device *dev, size_t size,
 			       unsigned long attrs)
 {
 	if (attrs & DMA_ATTR_NON_CONSISTENT) {
-		dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
+		dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
 	} else {
 		int ret = dma_release_from_global_coherent(get_order(size),
 							   cpu_addr);
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index a641b0bf1611..f65a084607fd 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -9,7 +9,7 @@ config C6X
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select CLKDEV_LOOKUP
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_TRACEHOOK
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 6cee842a9b44..3ef46522e89f 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -30,7 +30,7 @@ config HEXAGON
 	select GENERIC_CLOCKEVENTS_BROADCAST
 	select MODULES_USE_ELF_RELA
 	select GENERIC_CPU_DEVICES
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 070553791e97..c7b2a8d60a41 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -26,7 +26,7 @@ config M68K
 	select MODULES_USE_ELF_RELA
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
-	select DMA_NONCOHERENT_OPS if HAS_DMA
+	select DMA_DIRECT_OPS if HAS_DMA
 	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
 	select NO_BOOTMEM
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index ace5c5bf1836..0f48ab6a8070 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -11,7 +11,7 @@ config MICROBLAZE
 	select TIMER_OF
 	select CLONE_BACKWARDS3
 	select COMMON_CLK
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select DMA_NONCOHERENT_MMAP
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 54c52bd0d9d3..96da6e3396e1 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1121,7 +1121,6 @@ config DMA_NONCOHERENT
 	select NEED_DMA_MAP_STATE
 	select DMA_NONCOHERENT_MMAP
 	select DMA_NONCOHERENT_CACHE_SYNC
-	select DMA_NONCOHERENT_OPS
 
 config SYS_HAS_EARLY_PRINTK
 	bool
diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h
index 40d825c779de..b4c477eb46ce 100644
--- a/arch/mips/include/asm/dma-mapping.h
+++ b/arch/mips/include/asm/dma-mapping.h
@@ -12,8 +12,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 	return &jazz_dma_ops;
 #elif defined(CONFIG_SWIOTLB)
 	return &swiotlb_dma_ops;
-#elif defined(CONFIG_DMA_NONCOHERENT_OPS)
-	return &dma_noncoherent_ops;
 #else
 	return &dma_direct_ops;
 #endif
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index d31bc2f01208..bb49dfa1a9a3 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -564,13 +564,13 @@ static void *jazz_dma_alloc(struct device *dev, size_t size,
 {
 	void *ret;
 
-	ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+	ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
 	if (!ret)
 		return NULL;
 
 	*dma_handle = vdma_alloc(virt_to_phys(ret), size);
 	if (*dma_handle == VDMA_ERROR) {
-		dma_direct_free(dev, size, ret, *dma_handle, attrs);
+		dma_direct_free_pages(dev, size, ret, *dma_handle, attrs);
 		return NULL;
 	}
 
@@ -587,7 +587,7 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr,
 	vdma_free(dma_handle);
 	if (!(attrs & DMA_ATTR_NON_CONSISTENT))
 		vaddr = (void *)CAC_ADDR((unsigned long)vaddr);
-	return dma_direct_free(dev, size, vaddr, dma_handle, attrs);
+	dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs);
 }
 
 static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page,
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index d408ac51f56c..b01b9a3e424f 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -29,9 +29,6 @@
  */
 static inline bool cpu_needs_post_dma_flush(struct device *dev)
 {
-	if (dev_is_dma_coherent(dev))
-		return false;
-
 	switch (boot_cpu_type()) {
 	case CPU_R10000:
 	case CPU_R12000:
@@ -52,11 +49,8 @@ void *arch_dma_alloc(struct device *dev, size_t size,
 {
 	void *ret;
 
-	ret = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
-	if (!ret)
-		return NULL;
-
-	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
+	ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
+	if (!ret && !(attrs & DMA_ATTR_NON_CONSISTENT)) {
 		dma_cache_wback_inv((unsigned long) ret, size);
 		ret = (void *)UNCAC_ADDR(ret);
 	}
@@ -67,9 +61,9 @@ void *arch_dma_alloc(struct device *dev, size_t size,
 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs)
 {
-	if (!(attrs & DMA_ATTR_NON_CONSISTENT) && !dev_is_dma_coherent(dev))
+	if (!(attrs & DMA_ATTR_NON_CONSISTENT))
 		cpu_addr = (void *)CAC_ADDR((unsigned long)cpu_addr);
-	dma_direct_free(dev, size, cpu_addr, dma_addr, attrs);
+	dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
 }
 
 int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
@@ -78,16 +72,11 @@ int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 {
 	unsigned long user_count = vma_pages(vma);
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	unsigned long addr = (unsigned long)cpu_addr;
+	unsigned long addr = CAC_ADDR((unsigned long)cpu_addr);
 	unsigned long off = vma->vm_pgoff;
-	unsigned long pfn;
+	unsigned long pfn = page_to_pfn(virt_to_page((void *)addr));
 	int ret = -ENXIO;
 
-	if (!dev_is_dma_coherent(dev))
-		addr = CAC_ADDR(addr);
-
-	pfn = page_to_pfn(virt_to_page((void *)addr));
-
 	if (attrs & DMA_ATTR_WRITE_COMBINE)
 		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	else
@@ -167,8 +156,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size,
 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
 		size_t size, enum dma_data_direction dir)
 {
-	if (!dev_is_dma_coherent(dev))
-		dma_sync_phys(paddr, size, dir);
+	dma_sync_phys(paddr, size, dir);
 }
 
 void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
@@ -183,6 +171,5 @@ void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 {
 	BUG_ON(direction == DMA_NONE);
 
-	if (!dev_is_dma_coherent(dev))
-		dma_sync_virt(vaddr, size, direction);
+	dma_sync_virt(vaddr, size, direction);
 }
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 7068f341133d..56992330026a 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -11,7 +11,7 @@ config NDS32
 	select CLKSRC_MMIO
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select GENERIC_ATOMIC64
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index f4ad1138e6b9..03965692fbfe 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -4,7 +4,7 @@ config NIOS2
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_NO_SWAP
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select TIMER_OF
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index e0081e734827..a655ae280637 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -7,7 +7,7 @@
 config OPENRISC
 	def_bool y
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select OF
 	select OF_EARLY_FLATTREE
 	select IRQ_DOMAIN
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 8e6d83f79e72..f1cd12afd943 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -186,7 +186,7 @@ config PA11
 	depends on PA7000 || PA7100LC || PA7200 || PA7300LC
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select DMA_NONCOHERENT_CACHE_SYNC
 
 config PREFETCH
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 4e87c35c22b7..755e89ec828a 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -102,7 +102,7 @@ void __init dma_ops_init(void)
 	case pcxl: /* falls through */
 	case pcxs:
 	case pcxt:
-		hppa_dma_ops = &dma_noncoherent_ops;
+		hppa_dma_ops = &dma_direct_ops;
 		break;
 	default:
 		break;
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 1fb7b6d72baf..475d786a65b0 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -7,6 +7,7 @@ config SUPERH
 	select ARCH_NO_COHERENT_DMA_MMAP if !MMU
 	select HAVE_PATA_PLATFORM
 	select CLKDEV_LOOKUP
+	select DMA_DIRECT_OPS
 	select HAVE_IDE if HAS_IOPORT_MAP
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
@@ -158,13 +159,11 @@ config SWAP_IO_SPACE
 	bool
 
 config DMA_COHERENT
-	select DMA_DIRECT_OPS
 	bool
 
 config DMA_NONCOHERENT
 	def_bool !DMA_COHERENT
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
-	select DMA_NONCOHERENT_OPS
 
 config PGTABLE_LEVELS
 	default 3 if X2TLB
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e6f2a38d2e61..7e2aa59fcc29 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -51,7 +51,7 @@ config SPARC
 config SPARC32
 	def_bool !64BIT
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select GENERIC_ATOMIC64
 	select CLZ_TAB
 	select HAVE_UID16
diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h
index e17566376934..b0bb2fcaf1c9 100644
--- a/arch/sparc/include/asm/dma-mapping.h
+++ b/arch/sparc/include/asm/dma-mapping.h
@@ -14,11 +14,11 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
 #ifdef CONFIG_SPARC_LEON
 	if (sparc_cpu_model == sparc_leon)
-		return &dma_noncoherent_ops;
+		return &dma_direct_ops;
 #endif
 #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI)
 	if (bus == &pci_bus_type)
-		return &dma_noncoherent_ops;
+		return &dma_direct_ops;
 #endif
 	return dma_ops;
 }
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index f299d8a479bb..3f9d1b4019bb 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -482,7 +482,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 {
 	void *vaddr;
 
-	vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs);
+	vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs);
 	if (!vaddr ||
 	    !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24))
 		return vaddr;
@@ -494,7 +494,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr,
 		goto out_free;
 	return vaddr;
 out_free:
-	dma_direct_free(dev, size, vaddr, *dma_addr, attrs);
+	dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs);
 	return NULL;
 }
 
@@ -504,7 +504,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
 		   dma_addr_t dma_addr, unsigned long attrs)
 {
 	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0);
-	dma_direct_free(dev, size, vaddr, dma_addr, attrs);
+	dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs);
 }
 
 static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 04d038f3b6fa..516694937b7a 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -12,7 +12,7 @@ config XTENSA
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS
 	select COMMON_CLK
-	select DMA_NONCOHERENT_OPS
+	select DMA_DIRECT_OPS
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_IRQ_SHOW
diff --git a/include/asm-generic/dma-mapping.h b/include/asm-generic/dma-mapping.h
index ad2868263867..880a292d792f 100644
--- a/include/asm-generic/dma-mapping.h
+++ b/include/asm-generic/dma-mapping.h
@@ -4,16 +4,7 @@
 
 static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
 {
-	/*
-	 * Use the non-coherent ops if available.  If an architecture wants a
-	 * more fine-grained selection of operations it will have to implement
-	 * get_arch_dma_ops itself or use the per-device dma_ops.
-	 */
-#ifdef CONFIG_DMA_NONCOHERENT_OPS
-	return &dma_noncoherent_ops;
-#else
 	return &dma_direct_ops;
-#endif
 }
 
 #endif /* _ASM_GENERIC_DMA_MAPPING_H */
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 8d9f33febde5..86a59ba5a7f3 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -59,6 +59,10 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs);
 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs);
+void *dma_direct_alloc_pages(struct device *dev, size_t size,
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs);
+void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
+		dma_addr_t dma_addr, unsigned long attrs);
 dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index eafd6f318e78..8f2001181cd1 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -136,7 +136,6 @@ struct dma_map_ops {
 };
 
 extern const struct dma_map_ops dma_direct_ops;
-extern const struct dma_map_ops dma_noncoherent_ops;
 extern const struct dma_map_ops dma_virt_ops;
 
 #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index ce9732506ef4..3f503025a0cd 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -24,14 +24,9 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs);
 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs);
-
-#ifdef CONFIG_DMA_NONCOHERENT_MMAP
 int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		void *cpu_addr, dma_addr_t dma_addr, size_t size,
 		unsigned long attrs);
-#else
-#define arch_dma_mmap NULL
-#endif /* CONFIG_DMA_NONCOHERENT_MMAP */
 
 #ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC
 void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 79476749f196..5617c9a76208 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -33,18 +33,13 @@ config DMA_DIRECT_OPS
 	bool
 	depends on HAS_DMA
 
-config DMA_NONCOHERENT_OPS
-	bool
-	depends on HAS_DMA
-	select DMA_DIRECT_OPS
-
 config DMA_NONCOHERENT_MMAP
 	bool
-	depends on DMA_NONCOHERENT_OPS
+	depends on DMA_DIRECT_OPS
 
 config DMA_NONCOHERENT_CACHE_SYNC
 	bool
-	depends on DMA_NONCOHERENT_OPS
+	depends on DMA_DIRECT_OPS
 
 config DMA_VIRT_OPS
 	bool
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 6de44e4eb454..7d581e4eea4a 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -4,7 +4,6 @@ obj-$(CONFIG_HAS_DMA)			+= mapping.o
 obj-$(CONFIG_DMA_CMA)			+= contiguous.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o
 obj-$(CONFIG_DMA_DIRECT_OPS)		+= direct.o
-obj-$(CONFIG_DMA_NONCOHERENT_OPS)	+= noncoherent.o
 obj-$(CONFIG_DMA_VIRT_OPS)		+= virt.o
 obj-$(CONFIG_DMA_API_DEBUG)		+= debug.o
 obj-$(CONFIG_SWIOTLB)			+= swiotlb.o
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index de87b0282e74..09e85f6aa4ba 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -1,13 +1,15 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * DMA operations that map physical memory directly without using an IOMMU or
- * flushing caches.
+ * Copyright (C) 2018 Christoph Hellwig.
+ *
+ * DMA operations that map physical memory directly without using an IOMMU.
  */
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/dma-direct.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-contiguous.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/pfn.h>
 #include <linux/set_memory.h>
 
@@ -58,8 +60,8 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 	return addr + size - 1 <= dev->coherent_dma_mask;
 }
 
-void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, unsigned long attrs)
+void *dma_direct_alloc_pages(struct device *dev, size_t size,
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
 {
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	int page_order = get_order(size);
@@ -124,7 +126,7 @@ again:
  * NOTE: this function must never look at the dma_addr argument, because we want
  * to be able to use it as a helper for iommu implementations as well.
  */
-void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
+void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs)
 {
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
@@ -136,14 +138,106 @@ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
 		free_pages((unsigned long)cpu_addr, page_order);
 }
 
+void *dma_direct_alloc(struct device *dev, size_t size,
+		dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+{
+	if (!dev_is_dma_coherent(dev))
+		return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
+	return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
+}
+
+void dma_direct_free(struct device *dev, size_t size,
+		void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
+{
+	if (!dev_is_dma_coherent(dev))
+		arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
+	else
+		dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
+}
+
+static int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs)
+{
+	if (!dev_is_dma_coherent(dev) &&
+	    IS_ENABLED(CONFIG_DMA_NONCOHERENT_MMAP))
+		return arch_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
+	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+}
+
+static void dma_direct_sync_single_for_device(struct device *dev,
+		dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	if (dev_is_dma_coherent(dev))
+		return;
+	arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_direct_sync_sg_for_device(struct device *dev,
+		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (dev_is_dma_coherent(dev))
+		return;
+
+	for_each_sg(sgl, sg, nents, i)
+		arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+}
+
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
+static void dma_direct_sync_single_for_cpu(struct device *dev,
+		dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+	if (dev_is_dma_coherent(dev))
+		return;
+	arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
+	arch_sync_dma_for_cpu_all(dev);
+}
+
+static void dma_direct_sync_sg_for_cpu(struct device *dev,
+		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	if (dev_is_dma_coherent(dev))
+		return;
+
+	for_each_sg(sgl, sg, nents, i)
+		arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+	arch_sync_dma_for_cpu_all(dev);
+}
+
+static void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
+		size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
+		int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+#endif
+
 dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
 		unsigned long offset, size_t size, enum dma_data_direction dir,
 		unsigned long attrs)
 {
-	dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
+	phys_addr_t phys = page_to_phys(page) + offset;
+	dma_addr_t dma_addr = phys_to_dma(dev, phys);
 
 	if (!check_addr(dev, dma_addr, size, __func__))
 		return DIRECT_MAPPING_ERROR;
+
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_direct_sync_single_for_device(dev, dma_addr, size, dir);
 	return dma_addr;
 }
 
@@ -162,6 +256,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
 		sg_dma_len(sg) = sg->length;
 	}
 
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+		dma_direct_sync_sg_for_device(dev, sgl, nents, dir);
 	return nents;
 }
 
@@ -197,9 +293,22 @@ int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
 const struct dma_map_ops dma_direct_ops = {
 	.alloc			= dma_direct_alloc,
 	.free			= dma_direct_free,
+	.mmap			= dma_direct_mmap,
 	.map_page		= dma_direct_map_page,
 	.map_sg			= dma_direct_map_sg,
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE)
+	.sync_single_for_device	= dma_direct_sync_single_for_device,
+	.sync_sg_for_device	= dma_direct_sync_sg_for_device,
+#endif
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
+	.sync_single_for_cpu	= dma_direct_sync_single_for_cpu,
+	.sync_sg_for_cpu	= dma_direct_sync_sg_for_cpu,
+	.unmap_page		= dma_direct_unmap_page,
+	.unmap_sg		= dma_direct_unmap_sg,
+#endif
 	.dma_supported		= dma_direct_supported,
 	.mapping_error		= dma_direct_mapping_error,
+	.cache_sync		= arch_dma_cache_sync,
 };
 EXPORT_SYMBOL(dma_direct_ops);
diff --git a/kernel/dma/noncoherent.c b/kernel/dma/noncoherent.c
deleted file mode 100644
index 031fe235d958..000000000000
--- a/kernel/dma/noncoherent.c
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018 Christoph Hellwig.
- *
- * DMA operations that map physical memory directly without providing cache
- * coherence.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
-#include <linux/scatterlist.h>
-
-static void dma_noncoherent_sync_single_for_device(struct device *dev,
-		dma_addr_t addr, size_t size, enum dma_data_direction dir)
-{
-	arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
-}
-
-static void dma_noncoherent_sync_sg_for_device(struct device *dev,
-		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
-}
-
-static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size, enum dma_data_direction dir,
-		unsigned long attrs)
-{
-	dma_addr_t addr;
-
-	addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
-	if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
-				size, dir);
-	return addr;
-}
-
-static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
-	if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
-	return nents;
-}
-
-#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
-    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
-static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
-		dma_addr_t addr, size_t size, enum dma_data_direction dir)
-{
-	arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
-	arch_sync_dma_for_cpu_all(dev);
-}
-
-static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
-		struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
-	struct scatterlist *sg;
-	int i;
-
-	for_each_sg(sgl, sg, nents, i)
-		arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
-	arch_sync_dma_for_cpu_all(dev);
-}
-
-static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
-		size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
-}
-
-static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
-		int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-		dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
-}
-#endif
-
-const struct dma_map_ops dma_noncoherent_ops = {
-	.alloc			= arch_dma_alloc,
-	.free			= arch_dma_free,
-	.mmap			= arch_dma_mmap,
-	.sync_single_for_device	= dma_noncoherent_sync_single_for_device,
-	.sync_sg_for_device	= dma_noncoherent_sync_sg_for_device,
-	.map_page		= dma_noncoherent_map_page,
-	.map_sg			= dma_noncoherent_map_sg,
-#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
-    defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
-	.sync_single_for_cpu	= dma_noncoherent_sync_single_for_cpu,
-	.sync_sg_for_cpu	= dma_noncoherent_sync_sg_for_cpu,
-	.unmap_page		= dma_noncoherent_unmap_page,
-	.unmap_sg		= dma_noncoherent_unmap_sg,
-#endif
-	.dma_supported		= dma_direct_supported,
-	.mapping_error		= dma_direct_mapping_error,
-	.cache_sync		= arch_dma_cache_sync,
-};
-EXPORT_SYMBOL(dma_noncoherent_ops);
-- 
cgit v1.2.3


From 58b0440663ec11372befb8ead0ee7099d8878590 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 11 Sep 2018 08:55:28 +0200
Subject: dma-mapping: consolidate the dma mmap implementations

The only functional differences (modulo a few missing fixes in the arch
code) is that architectures without coherent caches need a hook to
convert a virtual or dma address into a pfn, given that we don't have
the kernel linear mapping available for the otherwise easy virt_to_page
call.  As a side effect we can support mmap of the per-device coherent
area even on architectures not providing the callback, and we make
previous dangerous default methods dma_common_mmap actually save for
non-coherent architectures by rejecting it without the right helper.

In addition to that we need a hook so that some architectures can
override the protection bits when mmaping a dma coherent allocations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts
---
 arch/arc/Kconfig                      |  2 +-
 arch/arc/mm/dma.c                     | 25 +++----------------------
 arch/arm/mm/dma-mapping-nommu.c       |  2 +-
 arch/microblaze/Kconfig               |  2 +-
 arch/microblaze/include/asm/pgtable.h |  2 --
 arch/microblaze/kernel/dma.c          | 22 ----------------------
 arch/microblaze/mm/consistent.c       |  3 ++-
 arch/mips/Kconfig                     |  3 ++-
 arch/mips/jazz/jazzdma.c              |  1 -
 arch/mips/mm/dma-noncoherent.c        | 32 +++++++++-----------------------
 drivers/xen/swiotlb-xen.c             |  2 +-
 include/linux/dma-mapping.h           |  5 +++--
 include/linux/dma-noncoherent.h       | 10 ++++++++--
 kernel/dma/Kconfig                    | 10 ++++++----
 kernel/dma/direct.c                   | 11 -----------
 kernel/dma/mapping.c                  | 32 +++++++++++++++++++++-----------
 16 files changed, 58 insertions(+), 106 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index ca03694d518a..3d9bdecfa52d 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -9,6 +9,7 @@
 config ARC
 	def_bool y
 	select ARC_TIMERS
+	select ARCH_HAS_DMA_COHERENT_TO_PFN
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
@@ -18,7 +19,6 @@ config ARC
 	select CLONE_BACKWARDS
 	select COMMON_CLK
 	select DMA_DIRECT_OPS
-	select DMA_NONCOHERENT_MMAP
 	select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC)
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_FIND_FIRST_BIT
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index 535ed4a068ef..db203ff69ccf 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -84,29 +84,10 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 	__free_pages(page, get_order(size));
 }
 
-int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-		void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		unsigned long attrs)
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+		dma_addr_t dma_addr)
 {
-	unsigned long user_count = vma_pages(vma);
-	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	unsigned long pfn = __phys_to_pfn(dma_addr);
-	unsigned long off = vma->vm_pgoff;
-	int ret = -ENXIO;
-
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
-		return ret;
-
-	if (off < count && user_count <= (count - off)) {
-		ret = remap_pfn_range(vma, vma->vm_start,
-				      pfn + off,
-				      user_count << PAGE_SHIFT,
-				      vma->vm_page_prot);
-	}
-
-	return ret;
+	return __phys_to_pfn(dma_addr);
 }
 
 /*
diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c
index 0ad156f9985b..712416ecd8e6 100644
--- a/arch/arm/mm/dma-mapping-nommu.c
+++ b/arch/arm/mm/dma-mapping-nommu.c
@@ -91,7 +91,7 @@ static int arm_nommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 	if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
 		return ret;
 
-	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
 }
 
 
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 0f48ab6a8070..164a4857737a 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -1,6 +1,7 @@
 config MICROBLAZE
 	def_bool y
 	select ARCH_NO_SWAP
+	select ARCH_HAS_DMA_COHERENT_TO_PFN if MMU
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
@@ -12,7 +13,6 @@ config MICROBLAZE
 	select CLONE_BACKWARDS3
 	select COMMON_CLK
 	select DMA_DIRECT_OPS
-	select DMA_NONCOHERENT_MMAP
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CPU_DEVICES
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index 7b650ab14fa0..f64ebb9c9a41 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -553,8 +553,6 @@ void __init *early_get_page(void);
 
 extern unsigned long ioremap_bot, ioremap_base;
 
-unsigned long consistent_virt_to_pfn(void *vaddr);
-
 void setup_memory(void);
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c
index 71032cf64669..a89c2d4ed5ff 100644
--- a/arch/microblaze/kernel/dma.c
+++ b/arch/microblaze/kernel/dma.c
@@ -42,25 +42,3 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 {
 	__dma_sync(dev, paddr, size, dir);
 }
-
-int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-		void *cpu_addr, dma_addr_t handle, size_t size,
-		unsigned long attrs)
-{
-#ifdef CONFIG_MMU
-	unsigned long user_count = vma_pages(vma);
-	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-	unsigned long off = vma->vm_pgoff;
-	unsigned long pfn;
-
-	if (off >= count || user_count > (count - off))
-		return -ENXIO;
-
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	pfn = consistent_virt_to_pfn(cpu_addr);
-	return remap_pfn_range(vma, vma->vm_start, pfn + off,
-			       vma->vm_end - vma->vm_start, vma->vm_page_prot);
-#else
-	return -ENXIO;
-#endif
-}
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index c9a278ac795a..d801cc5f5b95 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -165,7 +165,8 @@ static pte_t *consistent_virt_to_pte(void *vaddr)
 	return pte_offset_kernel(pmd_offset(pgd_offset_k(addr), addr), addr);
 }
 
-unsigned long consistent_virt_to_pfn(void *vaddr)
+long arch_dma_coherent_to_pfn(struct device *dev, void *vaddr,
+		dma_addr_t dma_addr)
 {
 	pte_t *ptep = consistent_virt_to_pte(vaddr);
 
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 96da6e3396e1..77c022e56e6e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1116,10 +1116,11 @@ config DMA_PERDEV_COHERENT
 
 config DMA_NONCOHERENT
 	bool
+	select ARCH_HAS_DMA_MMAP_PGPROT
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select NEED_DMA_MAP_STATE
-	select DMA_NONCOHERENT_MMAP
+	select ARCH_HAS_DMA_COHERENT_TO_PFN
 	select DMA_NONCOHERENT_CACHE_SYNC
 
 config SYS_HAS_EARLY_PRINTK
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index bb49dfa1a9a3..0a0aaf39fd16 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -682,7 +682,6 @@ static int jazz_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 const struct dma_map_ops jazz_dma_ops = {
 	.alloc			= jazz_dma_alloc,
 	.free			= jazz_dma_free,
-	.mmap			= arch_dma_mmap,
 	.map_page		= jazz_dma_map_page,
 	.unmap_page		= jazz_dma_unmap_page,
 	.map_sg			= jazz_dma_map_sg,
diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c
index b01b9a3e424f..e6c9485cadcf 100644
--- a/arch/mips/mm/dma-noncoherent.c
+++ b/arch/mips/mm/dma-noncoherent.c
@@ -66,33 +66,19 @@ void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 	dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
 }
 
-int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-		void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		unsigned long attrs)
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+		dma_addr_t dma_addr)
 {
-	unsigned long user_count = vma_pages(vma);
-	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long addr = CAC_ADDR((unsigned long)cpu_addr);
-	unsigned long off = vma->vm_pgoff;
-	unsigned long pfn = page_to_pfn(virt_to_page((void *)addr));
-	int ret = -ENXIO;
+	return page_to_pfn(virt_to_page((void *)addr));
+}
 
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+		unsigned long attrs)
+{
 	if (attrs & DMA_ATTR_WRITE_COMBINE)
-		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-	else
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
-		return ret;
-
-	if (off < count && user_count <= (count - off)) {
-		ret = remap_pfn_range(vma, vma->vm_start,
-				      pfn + off,
-				      user_count << PAGE_SHIFT,
-				      vma->vm_page_prot);
-	}
-
-	return ret;
+		return pgprot_writecombine(prot);
+	return pgprot_noncached(prot);
 }
 
 static inline void dma_sync_virt(void *addr, size_t size,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index a6f9ba85dc4b..470757ddddea 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -662,7 +662,7 @@ xen_swiotlb_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 		return xen_get_dma_ops(dev)->mmap(dev, vma, cpu_addr,
 						    dma_addr, size, attrs);
 #endif
-	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
 }
 
 /*
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8f2001181cd1..c3378d4e0d57 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -444,7 +444,8 @@ dma_cache_sync(struct device *dev, void *vaddr, size_t size,
 }
 
 extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
-			   void *cpu_addr, dma_addr_t dma_addr, size_t size);
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs);
 
 void *dma_common_contiguous_remap(struct page *page, size_t size,
 			unsigned long vm_flags,
@@ -476,7 +477,7 @@ dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
 	BUG_ON(!ops);
 	if (ops->mmap)
 		return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
-	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
+	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
 }
 
 #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0)
diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h
index 3f503025a0cd..9051b055beec 100644
--- a/include/linux/dma-noncoherent.h
+++ b/include/linux/dma-noncoherent.h
@@ -24,9 +24,15 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs);
 void arch_dma_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs);
-int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma,
-		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+		dma_addr_t dma_addr);
+
+#ifdef CONFIG_ARCH_HAS_DMA_MMAP_PGPROT
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
 		unsigned long attrs);
+#else
+# define arch_dma_mmap_pgprot(dev, prot, attrs)	pgprot_noncached(prot)
+#endif
 
 #ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC
 void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size,
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 5617c9a76208..645c7a2ecde8 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -29,13 +29,15 @@ config ARCH_HAS_SYNC_DMA_FOR_CPU
 config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
 	bool
 
-config DMA_DIRECT_OPS
+config ARCH_HAS_DMA_COHERENT_TO_PFN
 	bool
-	depends on HAS_DMA
 
-config DMA_NONCOHERENT_MMAP
+config ARCH_HAS_DMA_MMAP_PGPROT
 	bool
-	depends on DMA_DIRECT_OPS
+
+config DMA_DIRECT_OPS
+	bool
+	depends on HAS_DMA
 
 config DMA_NONCOHERENT_CACHE_SYNC
 	bool
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 09e85f6aa4ba..c954f0a6dc62 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -155,16 +155,6 @@ void dma_direct_free(struct device *dev, size_t size,
 		dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
 }
 
-static int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
-		void *cpu_addr, dma_addr_t dma_addr, size_t size,
-		unsigned long attrs)
-{
-	if (!dev_is_dma_coherent(dev) &&
-	    IS_ENABLED(CONFIG_DMA_NONCOHERENT_MMAP))
-		return arch_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs);
-	return dma_common_mmap(dev, vma, cpu_addr, dma_addr, size);
-}
-
 static void dma_direct_sync_single_for_device(struct device *dev,
 		dma_addr_t addr, size_t size, enum dma_data_direction dir)
 {
@@ -293,7 +283,6 @@ int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
 const struct dma_map_ops dma_direct_ops = {
 	.alloc			= dma_direct_alloc,
 	.free			= dma_direct_free,
-	.mmap			= dma_direct_mmap,
 	.map_page		= dma_direct_map_page,
 	.map_sg			= dma_direct_map_sg,
 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 3540cb399bd2..42fd73aca305 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/acpi.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-noncoherent.h>
 #include <linux/export.h>
 #include <linux/gfp.h>
 #include <linux/of_device.h>
@@ -220,27 +220,37 @@ EXPORT_SYMBOL(dma_common_get_sgtable);
  * Create userspace mapping for the DMA-coherent memory.
  */
 int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
-		    void *cpu_addr, dma_addr_t dma_addr, size_t size)
+		void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		unsigned long attrs)
 {
-	int ret = -ENXIO;
 #ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP
 	unsigned long user_count = vma_pages(vma);
 	unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	unsigned long off = vma->vm_pgoff;
+	unsigned long pfn;
+	int ret = -ENXIO;
 
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
 
 	if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
 		return ret;
 
-	if (off < count && user_count <= (count - off))
-		ret = remap_pfn_range(vma, vma->vm_start,
-				      page_to_pfn(virt_to_page(cpu_addr)) + off,
-				      user_count << PAGE_SHIFT,
-				      vma->vm_page_prot);
-#endif	/* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
+	if (off >= count || user_count > count - off)
+		return -ENXIO;
+
+	if (!dev_is_dma_coherent(dev)) {
+		if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
+			return -ENXIO;
+		pfn = arch_dma_coherent_to_pfn(dev, cpu_addr, dma_addr);
+	} else {
+		pfn = page_to_pfn(virt_to_page(cpu_addr));
+	}
 
-	return ret;
+	return remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+			user_count << PAGE_SHIFT, vma->vm_page_prot);
+#else
+	return -ENXIO;
+#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
 }
 EXPORT_SYMBOL(dma_common_mmap);
 
-- 
cgit v1.2.3


From 9406a49fd1f4379409ed87b29fdaa259b0441912 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 23 Aug 2018 09:39:38 +0200
Subject: dma-mapping: support non-coherent devices in dma_common_get_sgtable

We can use the arch_dma_coherent_to_pfn hook to provide a ->get_sgtable
implementation.  Note that this isn't an endorsement of this interface
(which is a horrible bad idea), but it is required to move arm64 over
to the generic code without a loss of functionality.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/xen/swiotlb-xen.c   |  2 +-
 include/linux/dma-mapping.h |  7 ++++---
 kernel/dma/mapping.c        | 23 ++++++++++++++++-------
 3 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 470757ddddea..28819a0e61d0 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -689,7 +689,7 @@ xen_swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
 							   handle, size, attrs);
 	}
 #endif
-	return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size);
+	return dma_common_get_sgtable(dev, sgt, cpu_addr, handle, size, attrs);
 }
 
 static int xen_swiotlb_mapping_error(struct device *dev, dma_addr_t dma_addr)
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index c3378d4e0d57..bd81e74cca7b 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -483,8 +483,8 @@ dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
 #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0)
 
 int
-dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
-		       void *cpu_addr, dma_addr_t dma_addr, size_t size);
+dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr,
+		dma_addr_t dma_addr, size_t size, unsigned long attrs);
 
 static inline int
 dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
@@ -496,7 +496,8 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr,
 	if (ops->get_sgtable)
 		return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
 					attrs);
-	return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size);
+	return dma_common_get_sgtable(dev, sgt, cpu_addr, dma_addr, size,
+			attrs);
 }
 
 #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0)
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index 42fd73aca305..58dec7a92b7b 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -202,17 +202,26 @@ EXPORT_SYMBOL(dmam_release_declared_memory);
  * Create scatter-list for the already allocated DMA buffer.
  */
 int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
-		 void *cpu_addr, dma_addr_t handle, size_t size)
+		 void *cpu_addr, dma_addr_t dma_addr, size_t size,
+		 unsigned long attrs)
 {
-	struct page *page = virt_to_page(cpu_addr);
+	struct page *page;
 	int ret;
 
-	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
-	if (unlikely(ret))
-		return ret;
+	if (!dev_is_dma_coherent(dev)) {
+		if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_COHERENT_TO_PFN))
+			return -ENXIO;
 
-	sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
-	return 0;
+		page = pfn_to_page(arch_dma_coherent_to_pfn(dev, cpu_addr,
+				dma_addr));
+	} else {
+		page = virt_to_page(cpu_addr);
+	}
+
+	ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+	if (!ret)
+		sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+	return ret;
 }
 EXPORT_SYMBOL(dma_common_get_sgtable);
 
-- 
cgit v1.2.3


From 16caf1fa37db4722d8d8c7bc26177279949d75a6 Mon Sep 17 00:00:00 2001
From: Loic Poulain <loic.poulain@linaro.org>
Date: Tue, 4 Sep 2018 17:18:55 +0200
Subject: usb: chipidea: Add dynamic pinctrl selection

Some hardware implementations require to configure pins differently
according to the USB role (host/device), this can be an update of the
pins routing or a simple GPIO value change.

This patch introduces new optional "host" and "device" pinctrls.
If these pinctrls are defined by the device, they are respectively
selected on host/device role start.

If a default pinctrl exist, it is restored on host/device role stop.

Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Signed-off-by: Peter Chen <peter.chen@nxp.com>
---
 drivers/usb/chipidea/core.c  | 19 +++++++++++++++++++
 drivers/usb/chipidea/host.c  |  9 +++++++++
 drivers/usb/chipidea/udc.c   |  9 +++++++++
 include/linux/usb/chipidea.h |  6 ++++++
 4 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 85fc6db48e44..7bfcbb23c2a4 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -53,6 +53,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 #include <linux/usb/otg.h>
@@ -723,6 +724,24 @@ static int ci_get_platdata(struct device *dev,
 		else
 			cable->connected = false;
 	}
+
+	platdata->pctl = devm_pinctrl_get(dev);
+	if (!IS_ERR(platdata->pctl)) {
+		struct pinctrl_state *p;
+
+		p = pinctrl_lookup_state(platdata->pctl, "default");
+		if (!IS_ERR(p))
+			platdata->pins_default = p;
+
+		p = pinctrl_lookup_state(platdata->pctl, "host");
+		if (!IS_ERR(p))
+			platdata->pins_host = p;
+
+		p = pinctrl_lookup_state(platdata->pctl, "device");
+		if (!IS_ERR(p))
+			platdata->pins_device = p;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index 4638d9b066be..d858a82c4f44 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -13,6 +13,7 @@
 #include <linux/usb/hcd.h>
 #include <linux/usb/chipidea.h>
 #include <linux/regulator/consumer.h>
+#include <linux/pinctrl/consumer.h>
 
 #include "../host/ehci.h"
 
@@ -153,6 +154,10 @@ static int host_start(struct ci_hdrc *ci)
 		}
 	}
 
+	if (ci->platdata->pins_host)
+		pinctrl_select_state(ci->platdata->pctl,
+				     ci->platdata->pins_host);
+
 	ret = usb_add_hcd(hcd, 0, 0);
 	if (ret) {
 		goto disable_reg;
@@ -197,6 +202,10 @@ static void host_stop(struct ci_hdrc *ci)
 	}
 	ci->hcd = NULL;
 	ci->otg.host = NULL;
+
+	if (ci->platdata->pins_host && ci->platdata->pins_default)
+		pinctrl_select_state(ci->platdata->pctl,
+				     ci->platdata->pins_default);
 }
 
 
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 9852ec5e6e01..829e947cabf5 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 #include <linux/usb/otg-fsm.h>
@@ -1965,6 +1966,10 @@ void ci_hdrc_gadget_destroy(struct ci_hdrc *ci)
 
 static int udc_id_switch_for_device(struct ci_hdrc *ci)
 {
+	if (ci->platdata->pins_device)
+		pinctrl_select_state(ci->platdata->pctl,
+				     ci->platdata->pins_device);
+
 	if (ci->is_otg)
 		/* Clear and enable BSV irq */
 		hw_write_otgsc(ci, OTGSC_BSVIS | OTGSC_BSVIE,
@@ -1983,6 +1988,10 @@ static void udc_id_switch_for_host(struct ci_hdrc *ci)
 		hw_write_otgsc(ci, OTGSC_BSVIE | OTGSC_BSVIS, OTGSC_BSVIS);
 
 	ci->vbus_active = 0;
+
+	if (ci->platdata->pins_device && ci->platdata->pins_default)
+		pinctrl_select_state(ci->platdata->pctl,
+				     ci->platdata->pins_default);
 }
 
 /**
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index 07f99362bc90..63758c399e4e 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -77,6 +77,12 @@ struct ci_hdrc_platform_data {
 	struct ci_hdrc_cable		vbus_extcon;
 	struct ci_hdrc_cable		id_extcon;
 	u32			phy_clkgate_delay_us;
+
+	/* pins */
+	struct pinctrl *pctl;
+	struct pinctrl_state *pins_default;
+	struct pinctrl_state *pins_host;
+	struct pinctrl_state *pins_device;
 };
 
 /* Default offset of capability registers */
-- 
cgit v1.2.3


From 457b42f0aa1c397745ada6083bf81b9479ab19fe Mon Sep 17 00:00:00 2001
From: Liu Xiang <liu.xiang6@zte.com.cn>
Date: Sun, 22 Jul 2018 21:33:56 +0800
Subject: power: supply: bq27xxx: Add support for BQ27411

According to the datasheet, bq27411 is similar to bq27421.

Signed-off-by: Liu Xiang <liu.xiang6@zte.com.cn>
Reviewed-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 Documentation/devicetree/bindings/power/supply/bq27xxx.txt | 1 +
 drivers/power/supply/bq27xxx_battery.c                     | 9 +++++++++
 drivers/power/supply/bq27xxx_battery_i2c.c                 | 2 ++
 include/linux/power/bq27xxx_battery.h                      | 1 +
 4 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/power/supply/bq27xxx.txt b/Documentation/devicetree/bindings/power/supply/bq27xxx.txt
index 37994fdb18ca..4fa8e08df2b6 100644
--- a/Documentation/devicetree/bindings/power/supply/bq27xxx.txt
+++ b/Documentation/devicetree/bindings/power/supply/bq27xxx.txt
@@ -23,6 +23,7 @@ Required properties:
  * "ti,bq27546" - BQ27546
  * "ti,bq27742" - BQ27742
  * "ti,bq27545" - BQ27545
+ * "ti,bq27411" - BQ27411
  * "ti,bq27421" - BQ27421
  * "ti,bq27425" - BQ27425
  * "ti,bq27426" - BQ27426
diff --git a/drivers/power/supply/bq27xxx_battery.c b/drivers/power/supply/bq27xxx_battery.c
index f022e1b550df..6dbbe95844a3 100644
--- a/drivers/power/supply/bq27xxx_battery.c
+++ b/drivers/power/supply/bq27xxx_battery.c
@@ -432,6 +432,7 @@ static u8
 		[BQ27XXX_REG_AP] = 0x18,
 		BQ27XXX_DM_REG_ROWS,
 	};
+#define bq27411_regs bq27421_regs
 #define bq27425_regs bq27421_regs
 #define bq27426_regs bq27421_regs
 #define bq27441_regs bq27421_regs
@@ -665,6 +666,7 @@ static enum power_supply_property bq27421_props[] = {
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
 	POWER_SUPPLY_PROP_MANUFACTURER,
 };
+#define bq27411_props bq27421_props
 #define bq27425_props bq27421_props
 #define bq27426_props bq27421_props
 #define bq27441_props bq27421_props
@@ -725,6 +727,12 @@ static struct bq27xxx_dm_reg bq27545_dm_regs[] = {
 #define bq27545_dm_regs 0
 #endif
 
+static struct bq27xxx_dm_reg bq27411_dm_regs[] = {
+	[BQ27XXX_DM_DESIGN_CAPACITY]   = { 82, 10, 2,    0, 32767 },
+	[BQ27XXX_DM_DESIGN_ENERGY]     = { 82, 12, 2,    0, 32767 },
+	[BQ27XXX_DM_TERMINATE_VOLTAGE] = { 82, 16, 2, 2800,  3700 },
+};
+
 static struct bq27xxx_dm_reg bq27421_dm_regs[] = {
 	[BQ27XXX_DM_DESIGN_CAPACITY]   = { 82, 10, 2,    0,  8000 },
 	[BQ27XXX_DM_DESIGN_ENERGY]     = { 82, 12, 2,    0, 32767 },
@@ -802,6 +810,7 @@ static struct {
 	[BQ27546]   = BQ27XXX_DATA(bq27546,   0         , BQ27XXX_O_OTDC),
 	[BQ27742]   = BQ27XXX_DATA(bq27742,   0         , BQ27XXX_O_OTDC),
 	[BQ27545]   = BQ27XXX_DATA(bq27545,   0x04143672, BQ27XXX_O_OTDC),
+	[BQ27411]   = BQ27XXX_DATA(bq27411,   0x80008000, BQ27XXX_O_UTOT | BQ27XXX_O_CFGUP | BQ27XXX_O_RAM),
 	[BQ27421]   = BQ27XXX_DATA(bq27421,   0x80008000, BQ27XXX_O_UTOT | BQ27XXX_O_CFGUP | BQ27XXX_O_RAM),
 	[BQ27425]   = BQ27XXX_DATA(bq27425,   0x04143672, BQ27XXX_O_UTOT | BQ27XXX_O_CFGUP),
 	[BQ27426]   = BQ27XXX_DATA(bq27426,   0x80008000, BQ27XXX_O_UTOT | BQ27XXX_O_CFGUP | BQ27XXX_O_RAM),
diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index 40069128ad44..2677c38a8a42 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -247,6 +247,7 @@ static const struct i2c_device_id bq27xxx_i2c_id_table[] = {
 	{ "bq27546", BQ27546 },
 	{ "bq27742", BQ27742 },
 	{ "bq27545", BQ27545 },
+	{ "bq27411", BQ27411 },
 	{ "bq27421", BQ27421 },
 	{ "bq27425", BQ27425 },
 	{ "bq27426", BQ27426 },
@@ -279,6 +280,7 @@ static const struct of_device_id bq27xxx_battery_i2c_of_match_table[] = {
 	{ .compatible = "ti,bq27546" },
 	{ .compatible = "ti,bq27742" },
 	{ .compatible = "ti,bq27545" },
+	{ .compatible = "ti,bq27411" },
 	{ .compatible = "ti,bq27421" },
 	{ .compatible = "ti,bq27425" },
 	{ .compatible = "ti,bq27426" },
diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h
index d6355f49fbae..507c5e214c42 100644
--- a/include/linux/power/bq27xxx_battery.h
+++ b/include/linux/power/bq27xxx_battery.h
@@ -24,6 +24,7 @@ enum bq27xxx_chip {
 	BQ27546,
 	BQ27742,
 	BQ27545, /* bq27545 */
+	BQ27411,
 	BQ27421, /* bq27421, bq27441, bq27621 */
 	BQ27425,
 	BQ27426,
-- 
cgit v1.2.3


From bd93227897007bac09c44fe67626035303905900 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Wed, 22 Aug 2018 22:43:00 +0200
Subject: USB: EHCI: ehci-mv: remove private_init

It's unused.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-mv.c           | 4 ----
 include/linux/platform_data/mv_usb.h | 1 -
 2 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-mv.c b/drivers/usb/host/ehci-mv.c
index 77a4ab1dcd07..705d1b43b2dd 100644
--- a/drivers/usb/host/ehci-mv.c
+++ b/drivers/usb/host/ehci-mv.c
@@ -203,9 +203,6 @@ static int mv_ehci_probe(struct platform_device *pdev)
 		device_wakeup_enable(hcd->self.controller);
 	}
 
-	if (pdata->private_init)
-		pdata->private_init(ehci_mv->op_regs, ehci_mv->phy_regs);
-
 	dev_info(&pdev->dev,
 		 "successful find EHCI device with regs 0x%p irq %d"
 		 " working in %s mode\n", hcd->regs, hcd->irq,
@@ -260,7 +257,6 @@ static const struct platform_device_id ehci_id_table[] = {
 static void mv_ehci_shutdown(struct platform_device *pdev)
 {
 	struct usb_hcd *hcd = platform_get_drvdata(pdev);
-	struct ehci_hcd_mv *ehci_mv = hcd_to_ehci_hcd_mv(hcd);
 
 	if (!hcd->rh_registered)
 		return;
diff --git a/include/linux/platform_data/mv_usb.h b/include/linux/platform_data/mv_usb.h
index 98b7925f1a2d..c0f624aca81c 100644
--- a/include/linux/platform_data/mv_usb.h
+++ b/include/linux/platform_data/mv_usb.h
@@ -48,6 +48,5 @@ struct mv_usb_platform_data {
 	int	(*phy_init)(void __iomem *regbase);
 	void	(*phy_deinit)(void __iomem *regbase);
 	int	(*set_vbus)(unsigned int vbus);
-	int     (*private_init)(void __iomem *opregs, void __iomem *phyregs);
 };
 #endif
-- 
cgit v1.2.3


From cd7753d371388e712e3ee52b693459f9b71aaac2 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 20 Sep 2018 14:23:40 +0300
Subject: drivers: base: Helpers for adding device connection descriptions

Introducing helpers for adding and removing multiple device
connection descriptions at once.

Acked-by: Hans de Goede <hdegoede@redhat.com>
Tested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 8f882549edee..3f1066a9e1c3 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -773,6 +773,30 @@ struct device *device_connection_find(struct device *dev, const char *con_id);
 void device_connection_add(struct device_connection *con);
 void device_connection_remove(struct device_connection *con);
 
+/**
+ * device_connections_add - Add multiple device connections at once
+ * @cons: Zero terminated array of device connection descriptors
+ */
+static inline void device_connections_add(struct device_connection *cons)
+{
+	struct device_connection *c;
+
+	for (c = cons; c->endpoint[0]; c++)
+		device_connection_add(c);
+}
+
+/**
+ * device_connections_remove - Remove multiple device connections at once
+ * @cons: Zero terminated array of device connection descriptors
+ */
+static inline void device_connections_remove(struct device_connection *cons)
+{
+	struct device_connection *c;
+
+	for (c = cons; c->endpoint[0]; c++)
+		device_connection_remove(c);
+}
+
 /**
  * enum device_link_state - Device link states.
  * @DL_STATE_NONE: The presence of the drivers is not being tracked.
-- 
cgit v1.2.3


From b1078c355d76769b5ddefc67d143fbd9b6e52c05 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Thu, 20 Sep 2018 13:41:52 +0200
Subject: of: base: Introduce of_alias_get_alias_list() to check alias IDs

The function travels the lookup table to record alias ids for the given
device match structures and alias stem.
This function will be used by serial drivers to check if requested alias
is allocated or free to use.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/base.c  | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of.h | 10 ++++++++++
 2 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 74eaedd5b860..33011b88ed3f 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -16,6 +16,7 @@
 
 #define pr_fmt(fmt)	"OF: " fmt
 
+#include <linux/bitmap.h>
 #include <linux/console.h>
 #include <linux/ctype.h>
 #include <linux/cpu.h>
@@ -1942,6 +1943,57 @@ int of_alias_get_id(struct device_node *np, const char *stem)
 }
 EXPORT_SYMBOL_GPL(of_alias_get_id);
 
+/**
+ * of_alias_get_alias_list - Get alias list for the given device driver
+ * @matches:	Array of OF device match structures to search in
+ * @stem:	Alias stem of the given device_node
+ * @bitmap:	Bitmap field pointer
+ * @nbits:	Maximum number of alias ID which can be recorded it bitmap
+ *
+ * The function travels the lookup table to record alias ids for the given
+ * device match structures and alias stem.
+ *
+ * Return:	0 or -ENOSYS when !CONFIG_OF
+ */
+int of_alias_get_alias_list(const struct of_device_id *matches,
+			     const char *stem, unsigned long *bitmap,
+			     unsigned int nbits)
+{
+	struct alias_prop *app;
+
+	/* Zero bitmap field to make sure that all the time it is clean */
+	bitmap_zero(bitmap, nbits);
+
+	mutex_lock(&of_mutex);
+	pr_debug("%s: Looking for stem: %s\n", __func__, stem);
+	list_for_each_entry(app, &aliases_lookup, link) {
+		pr_debug("%s: stem: %s, id: %d\n",
+			 __func__, app->stem, app->id);
+
+		if (strcmp(app->stem, stem) != 0) {
+			pr_debug("%s: stem comparison doesn't passed %s\n",
+				 __func__, app->stem);
+			continue;
+		}
+
+		if (app->id >= nbits) {
+			pr_debug("%s: ID %d greater then bitmap field %d\n",
+				__func__, app->id, nbits);
+			continue;
+		}
+
+		if (of_match_node(matches, app->np)) {
+			pr_debug("%s: Allocated ID %d\n", __func__, app->id);
+			set_bit(app->id, bitmap);
+		}
+		/* Alias exist but it not compatible with matches */
+	}
+	mutex_unlock(&of_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_alias_get_alias_list);
+
 /**
  * of_alias_get_highest_id - Get highest alias id for the given stem
  * @stem:	Alias stem to be examined
diff --git a/include/linux/of.h b/include/linux/of.h
index 99b0ebf49632..d51457b40725 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -392,6 +392,9 @@ extern int of_phandle_iterator_args(struct of_phandle_iterator *it,
 extern void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align));
 extern int of_alias_get_id(struct device_node *np, const char *stem);
 extern int of_alias_get_highest_id(const char *stem);
+extern int of_alias_get_alias_list(const struct of_device_id *matches,
+				   const char *stem, unsigned long *bitmap,
+				   unsigned int nbits);
 
 extern int of_machine_is_compatible(const char *compat);
 
@@ -893,6 +896,13 @@ static inline int of_alias_get_highest_id(const char *stem)
 	return -ENOSYS;
 }
 
+static inline int of_alias_get_alias_list(const struct of_device_id *matches,
+					  const char *stem, unsigned long *bitmap,
+					  unsigned int nbits)
+{
+	return -ENOSYS;
+}
+
 static inline int of_machine_is_compatible(const char *compat)
 {
 	return 0;
-- 
cgit v1.2.3


From d124b44f09cab67fc6da4a4513417e3e54b01efc Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Tue, 18 Sep 2018 18:55:41 +0200
Subject: Compiler Attributes: naked was fixed in gcc 4.6

Commit 9c695203a7dd ("compiler-gcc.h: gcc-4.5 needs noclone
and noinline on __naked functions") added noinline and noclone
as a workaround for a gcc 4.5 bug, which was resolved in 4.6.0.

Since now the minimum gcc supported version is 4.6,
we can clean it up.

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44290
and https://godbolt.org/z/h6NMIL

Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive")
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Eli Friedman <efriedma@codeaurora.org>
Cc: Christopher Li <sparse@chrisli.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Joe Perches <joe@perches.com>
Cc: Dominique Martinet <asmadeus@codewreck.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-sparse@vger.kernel.org
Tested-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-gcc.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 763bbad1e258..25d3dd6b2702 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -84,14 +84,8 @@
  * to trace naked functions because then mcount is called without
  * stack and frame pointer being set up and there is no chance to
  * restore the lr register to the value before mcount was called.
- *
- * The asm() bodies of naked functions often depend on standard calling
- * conventions, therefore they must be noinline and noclone.
- *
- * GCC 4.[56] currently fail to enforce this, so we must do so ourselves.
- * See GCC PR44290.
  */
-#define __naked		__attribute__((naked)) noinline __noclone notrace
+#define __naked		__attribute__((naked)) notrace
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-- 
cgit v1.2.3


From ae596de1a0c8c2c924dc99d23c026259372ab234 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Tue, 18 Sep 2018 18:55:42 +0200
Subject: Compiler Attributes: naked can be shared

The naked attribute is supported by at least gcc >= 4.6 (for ARM,
which is the only current user), gcc >= 8 (for x86), clang >= 3.1
and icc >= 13. See https://godbolt.org/z/350Dyc

Therefore, move it out of compiler-gcc.h so that the definition
is shared by all compilers.

This also fixes Clang support for ARM32 --- 815f0ddb346c
("include/linux/compiler*.h: make compiler-*.h mutually exclusive").

Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive")
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Eli Friedman <efriedma@codeaurora.org>
Cc: Christopher Li <sparse@chrisli.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: Joe Perches <joe@perches.com>
Cc: Dominique Martinet <asmadeus@codewreck.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-sparse@vger.kernel.org
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/compiler-gcc.h   | 8 --------
 include/linux/compiler_types.h | 8 ++++++++
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 25d3dd6b2702..4d36b27214fd 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -79,14 +79,6 @@
 #define __noretpoline __attribute__((indirect_branch("keep")))
 #endif
 
-/*
- * it doesn't make sense on ARM (currently the only user of __naked)
- * to trace naked functions because then mcount is called without
- * stack and frame pointer being set up and there is no chance to
- * restore the lr register to the value before mcount was called.
- */
-#define __naked		__attribute__((naked)) notrace
-
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
 #define __optimize(level)	__attribute__((__optimize__(level)))
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 3525c179698c..db192becfec4 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -226,6 +226,14 @@ struct ftrace_likely_data {
 #define notrace			__attribute__((no_instrument_function))
 #endif
 
+/*
+ * it doesn't make sense on ARM (currently the only user of __naked)
+ * to trace naked functions because then mcount is called without
+ * stack and frame pointer being set up and there is no chance to
+ * restore the lr register to the value before mcount was called.
+ */
+#define __naked			__attribute__((naked)) notrace
+
 #define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
 
 /*
-- 
cgit v1.2.3


From 329357723f5e8e2d7fc3144fb79936b51ce63b76 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Fri, 31 Aug 2018 14:13:22 -0500
Subject: gpio: davinci: Remove unused member of davinci_gpio_controller

This was added as part of the patch in the fixes below, but was
not needed or used, remove this here.

Fixes: 8e11047b8f3c ("gpio: davinci: Add support for multiple GPIO controllers")
Tested-by: Keerthy <j-keerthy@ti.com>
Acked-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Andrew F. Davis <afd@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/platform_data/gpio-davinci.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 57a5a35e0073..b8e4957e7568 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -43,7 +43,6 @@ struct davinci_gpio_controller {
 	void __iomem		*regs[MAX_REGS_BANKS];
 	int			gpio_unbanked;
 	int			irqs[MAX_INT_PER_BANK];
-	unsigned int		base;
 };
 
 /*
-- 
cgit v1.2.3


From c36219d9d8df11641d28c6bd0698459485b1709b Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Fri, 31 Aug 2018 14:13:25 -0500
Subject: gpio: davinci: Remove unneeded GPIO macro

This macro does nothing and has only one user, remove it.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Acked-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-davinci/board-neuros-osd2.c  | 8 ++++----
 include/linux/platform_data/gpio-davinci.h | 5 -----
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-davinci/board-neuros-osd2.c b/arch/arm/mach-davinci/board-neuros-osd2.c
index 353f9e5a1454..efdaa27241c5 100644
--- a/arch/arm/mach-davinci/board-neuros-osd2.c
+++ b/arch/arm/mach-davinci/board-neuros-osd2.c
@@ -130,10 +130,10 @@ static struct platform_device davinci_fb_device = {
 };
 
 static const struct gpio_led ntosd2_leds[] = {
-	{ .name = "led1_green", .gpio = GPIO(10), },
-	{ .name = "led1_red",   .gpio = GPIO(11), },
-	{ .name = "led2_green", .gpio = GPIO(12), },
-	{ .name = "led2_red",   .gpio = GPIO(13), },
+	{ .name = "led1_green", .gpio = 10, },
+	{ .name = "led1_red",   .gpio = 11, },
+	{ .name = "led2_green", .gpio = 12, },
+	{ .name = "led2_red",   .gpio = 13, },
 };
 
 static struct gpio_led_platform_data ntosd2_leds_data = {
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index b8e4957e7568..47695b342883 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -45,11 +45,6 @@ struct davinci_gpio_controller {
 	int			irqs[MAX_INT_PER_BANK];
 };
 
-/*
- * basic gpio routines
- */
-#define	GPIO(X)		(X)	/* 0 <= X <= (DAVINCI_N_GPIO - 1) */
-
 /* Convert GPIO signal to GPIO pin number */
 #define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
 
-- 
cgit v1.2.3


From 79b73ff9b2a3ef312d8fa30894fd963c80ded466 Mon Sep 17 00:00:00 2001
From: "Andrew F. Davis" <afd@ti.com>
Date: Fri, 31 Aug 2018 14:13:26 -0500
Subject: gpio: davinci: Move driver local definitions to driver

These defines, structs and inline functions are used only internally by
the driver, they do not belong in platform_data. Move them.

Signed-off-by: Andrew F. Davis <afd@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>
Acked-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-davinci.c                | 28 ++++++++++++++++++++++++++++
 include/linux/platform_data/gpio-davinci.h | 28 ----------------------------
 2 files changed, 28 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 121a7948f785..5c1564fcc24e 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -9,6 +9,7 @@
  * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
  */
+
 #include <linux/gpio/driver.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -24,6 +25,12 @@
 #include <linux/platform_device.h>
 #include <linux/platform_data/gpio-davinci.h>
 #include <linux/irqchip/chained_irq.h>
+#include <linux/spinlock.h>
+
+#include <asm-generic/gpio.h>
+
+#define MAX_REGS_BANKS 5
+#define MAX_INT_PER_BANK 32
 
 struct davinci_gpio_regs {
 	u32	dir;
@@ -45,6 +52,27 @@ typedef struct irq_chip *(*gpio_get_irq_chip_cb_t)(unsigned int irq);
 static void __iomem *gpio_base;
 static unsigned int offset_array[5] = {0x10, 0x38, 0x60, 0x88, 0xb0};
 
+struct davinci_gpio_irq_data {
+	void __iomem			*regs;
+	struct davinci_gpio_controller	*chip;
+	int				bank_num;
+};
+
+struct davinci_gpio_controller {
+	struct gpio_chip	chip;
+	struct irq_domain	*irq_domain;
+	/* Serialize access to GPIO registers */
+	spinlock_t		lock;
+	void __iomem		*regs[MAX_REGS_BANKS];
+	int			gpio_unbanked;
+	int			irqs[MAX_INT_PER_BANK];
+};
+
+static inline u32 __gpio_mask(unsigned gpio)
+{
+	return 1 << (gpio % 32);
+}
+
 static inline struct davinci_gpio_regs __iomem *irq2regs(struct irq_data *d)
 {
 	struct davinci_gpio_regs __iomem *g;
diff --git a/include/linux/platform_data/gpio-davinci.h b/include/linux/platform_data/gpio-davinci.h
index 47695b342883..f92a47e18034 100644
--- a/include/linux/platform_data/gpio-davinci.h
+++ b/include/linux/platform_data/gpio-davinci.h
@@ -16,40 +16,12 @@
 #ifndef __DAVINCI_GPIO_PLATFORM_H
 #define __DAVINCI_GPIO_PLATFORM_H
 
-#include <linux/io.h>
-#include <linux/spinlock.h>
-
-#include <asm-generic/gpio.h>
-
-#define MAX_REGS_BANKS		5
-#define MAX_INT_PER_BANK 32
-
 struct davinci_gpio_platform_data {
 	u32	ngpio;
 	u32	gpio_unbanked;
 };
 
-struct davinci_gpio_irq_data {
-	void __iomem			*regs;
-	struct davinci_gpio_controller	*chip;
-	int				bank_num;
-};
-
-struct davinci_gpio_controller {
-	struct gpio_chip	chip;
-	struct irq_domain	*irq_domain;
-	/* Serialize access to GPIO registers */
-	spinlock_t		lock;
-	void __iomem		*regs[MAX_REGS_BANKS];
-	int			gpio_unbanked;
-	int			irqs[MAX_INT_PER_BANK];
-};
-
 /* Convert GPIO signal to GPIO pin number */
 #define GPIO_TO_PIN(bank, gpio)	(16 * (bank) + (gpio))
 
-static inline u32 __gpio_mask(unsigned gpio)
-{
-	return 1 << (gpio % 32);
-}
 #endif
-- 
cgit v1.2.3


From 60489f085574157c343fc62a32f997fe7346a659 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 20 Sep 2018 09:31:10 +0200
Subject: spi: spi-mem: Add missing description for data.nbytes field

Add a description for spi_mem_op.data.nbytes to the kerneldoc header.

Fixes: c36ff266dc82 ("spi: Extend the core to ease integration of SPI memory controllers")
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 62722fb7472d..0cfbb1ad8d96 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -79,6 +79,8 @@ enum spi_mem_data_dir {
  * @dummy.buswidth: number of IO lanes used to transmit the dummy bytes
  * @data.buswidth: number of IO lanes used to send/receive the data
  * @data.dir: direction of the transfer
+ * @data.nbytes: number of data bytes to send/receive. Can be zero if the
+ *		 operation does not involve transferring data
  * @data.buf.in: input buffer
  * @data.buf.out: output buffer
  */
-- 
cgit v1.2.3


From c949a8e8b43f2c75567269bcc9a50d704ae3c420 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 20 Sep 2018 09:31:11 +0200
Subject: spi: spi-mem: Move the DMA-able constraint doc to the kerneldoc
 header

We'd better have that documented in the kerneldoc header, so that it's
exposed to the doc generated by Sphinx.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi-mem.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 0cfbb1ad8d96..7195fbc234aa 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -81,8 +81,8 @@ enum spi_mem_data_dir {
  * @data.dir: direction of the transfer
  * @data.nbytes: number of data bytes to send/receive. Can be zero if the
  *		 operation does not involve transferring data
- * @data.buf.in: input buffer
- * @data.buf.out: output buffer
+ * @data.buf.in: input buffer (must be DMA-able)
+ * @data.buf.out: output buffer (must be DMA-able)
  */
 struct spi_mem_op {
 	struct {
@@ -105,7 +105,6 @@ struct spi_mem_op {
 		u8 buswidth;
 		enum spi_mem_data_dir dir;
 		unsigned int nbytes;
-		/* buf.{in,out} must be DMA-able. */
 		union {
 			void *in;
 			const void *out;
-- 
cgit v1.2.3


From 26683316c92ab2d1b330a3a38548328c9b136ad1 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Date: Mon, 10 Sep 2018 19:47:22 +0200
Subject: ARM: OMAP1: ams-delta-fiq: Use <linux/platform_data/gpio-omap.h>

Instead of defining symbols already defined in
linux/platform_data/gpio-omap.h, use that header file.

Since we include the header into an assembler code, prevent C only bits
from being read in.

Signed-off-by: Janusz Krzysztofik <jmkrzyszt@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/ams-delta-fiq-handler.S | 12 +++---------
 include/linux/platform_data/gpio-omap.h     |  4 ++++
 2 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap1/ams-delta-fiq-handler.S b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
index ddc27638ba2a..e3faa0274b56 100644
--- a/arch/arm/mach-omap1/ams-delta-fiq-handler.S
+++ b/arch/arm/mach-omap1/ams-delta-fiq-handler.S
@@ -15,6 +15,7 @@
 
 #include <linux/linkage.h>
 #include <linux/platform_data/ams-delta-fiq.h>
+#include <linux/platform_data/gpio-omap.h>
 
 #include <asm/assembler.h>
 #include <mach/board-ams-delta.h>
@@ -24,17 +25,10 @@
 #include "soc.h"
 
 /*
- * GPIO related definitions, copied from arch/arm/plat-omap/gpio.c.
- * Unfortunately, those were not placed in a separate header file.
+ * OMAP1510 GPIO related symbol copied from arch/arm/mach-omap1/gpio15xx.c.
+ * Unfortunately, it was not placed in a separate header file.
  */
 #define OMAP1510_GPIO_BASE		0xFFFCE000
-#define OMAP1510_GPIO_DATA_INPUT	0x00
-#define OMAP1510_GPIO_DATA_OUTPUT	0x04
-#define OMAP1510_GPIO_DIR_CONTROL	0x08
-#define OMAP1510_GPIO_INT_CONTROL	0x0c
-#define OMAP1510_GPIO_INT_MASK		0x10
-#define OMAP1510_GPIO_INT_STATUS	0x14
-#define OMAP1510_GPIO_PIN_CONTROL	0x18
 
 /* GPIO register bitmasks */
 #define KEYBRD_DATA_MASK		(0x1 << AMS_DELTA_GPIO_PIN_KEYBRD_DATA)
diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
index 8612855691b2..ed071f76b642 100644
--- a/include/linux/platform_data/gpio-omap.h
+++ b/include/linux/platform_data/gpio-omap.h
@@ -24,8 +24,10 @@
 #ifndef __ASM_ARCH_OMAP_GPIO_H
 #define __ASM_ARCH_OMAP_GPIO_H
 
+#ifndef __ASSEMBLER__
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#endif
 
 #define OMAP1_MPUIO_BASE			0xfffb5000
 
@@ -157,6 +159,7 @@
 #define OMAP_MPUIO(nr)		(OMAP_MAX_GPIO_LINES + (nr))
 #define OMAP_GPIO_IS_MPUIO(nr)	((nr) >= OMAP_MAX_GPIO_LINES)
 
+#ifndef __ASSEMBLER__
 struct omap_gpio_reg_offs {
 	u16 revision;
 	u16 direction;
@@ -215,5 +218,6 @@ static inline void omap2_gpio_resume_after_idle(void)
 {
 }
 #endif
+#endif /* __ASSEMBLER__ */
 
 #endif
-- 
cgit v1.2.3


From e9158b35ef9afb3bf24e0404c8fd4cd723eafa5b Mon Sep 17 00:00:00 2001
From: Horia Geantă <horia.geanta@nxp.com>
Date: Wed, 12 Sep 2018 11:59:26 +0300
Subject: bus: fsl-mc: add support for dpseci device type
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Horia Geantă <horia.geanta@nxp.com>
Acked-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/bus/fsl-mc/fsl-mc-bus.c | 5 +++++
 include/linux/fsl/mc.h          | 6 ++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 5d8266c6571f..4552b06fe601 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -188,6 +188,10 @@ struct device_type fsl_mc_bus_dprtc_type = {
 	.name = "fsl_mc_bus_dprtc"
 };
 
+struct device_type fsl_mc_bus_dpseci_type = {
+	.name = "fsl_mc_bus_dpseci"
+};
+
 static struct device_type *fsl_mc_get_device_type(const char *type)
 {
 	static const struct {
@@ -203,6 +207,7 @@ static struct device_type *fsl_mc_get_device_type(const char *type)
 		{ &fsl_mc_bus_dpmcp_type, "dpmcp" },
 		{ &fsl_mc_bus_dpmac_type, "dpmac" },
 		{ &fsl_mc_bus_dprtc_type, "dprtc" },
+		{ &fsl_mc_bus_dpseci_type, "dpseci" },
 		{ NULL, NULL }
 	};
 	int i;
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index f27cb14088a4..5160f06ffbac 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -405,6 +405,7 @@ extern struct device_type fsl_mc_bus_dpcon_type;
 extern struct device_type fsl_mc_bus_dpmcp_type;
 extern struct device_type fsl_mc_bus_dpmac_type;
 extern struct device_type fsl_mc_bus_dprtc_type;
+extern struct device_type fsl_mc_bus_dpseci_type;
 
 static inline bool is_fsl_mc_bus_dprc(const struct fsl_mc_device *mc_dev)
 {
@@ -451,6 +452,11 @@ static inline bool is_fsl_mc_bus_dprtc(const struct fsl_mc_device *mc_dev)
 	return mc_dev->dev.type == &fsl_mc_bus_dprtc_type;
 }
 
+static inline bool is_fsl_mc_bus_dpseci(const struct fsl_mc_device *mc_dev)
+{
+	return mc_dev->dev.type == &fsl_mc_bus_dpseci_type;
+}
+
 /*
  * Data Path Buffer Pool (DPBP) API
  * Contains initialization APIs and runtime control APIs for DPBP
-- 
cgit v1.2.3


From c785896b21dd8e156326ff660050b0074d3431df Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sat, 15 Sep 2018 21:38:24 -0700
Subject: cpufeature: avoid warning when compiling with clang

The table id (second) argument to MODULE_DEVICE_TABLE is often
referenced otherwise. This is not the case for CPU features. This
leads to warnings when building the kernel with Clang:
  arch/arm/crypto/aes-ce-glue.c:450:1: warning: variable
    'cpu_feature_match_AES' is not needed and will not be emitted
    [-Wunneeded-internal-declaration]
  module_cpu_feature_match(AES, aes_init);
  ^

Avoid warnings by using __maybe_unused, similar to commit 1f318a8bafcf
("modules: mark __inittest/__exittest as __maybe_unused").

Fixes: 67bad2fdb754 ("cpu: add generic support for CPU feature based module autoloading")
Signed-off-by: Stefan Agner <stefan@agner.ch>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/cpufeature.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h
index 986c06c88d81..84d3c81b5978 100644
--- a/include/linux/cpufeature.h
+++ b/include/linux/cpufeature.h
@@ -45,7 +45,7 @@
  * 'asm/cpufeature.h' of your favorite architecture.
  */
 #define module_cpu_feature_match(x, __initfunc)			\
-static struct cpu_feature const cpu_feature_match_ ## x[] =	\
+static struct cpu_feature const __maybe_unused cpu_feature_match_ ## x[] = \
 	{ { .feature = cpu_feature(x) }, { } };			\
 MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x);		\
 								\
-- 
cgit v1.2.3


From 5d773ff41a7cdf0ef6cc6647435d59f0cf53e7b1 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Mon, 17 Sep 2018 13:30:46 +0300
Subject: net/mlx5: Rename incorrect naming in IFC file

Remove a trailing underscore from the multicast/unicast names.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/qp.c                     | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 2 +-
 include/linux/mlx5/mlx5_ifc.h                       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 6cba2a02d11b..daf1eb84cd31 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1279,7 +1279,7 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 
 	if (dev->rep)
 		MLX5_SET(tirc, tirc, self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
 
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &rq->tirn);
 
@@ -1582,7 +1582,7 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 create_tir:
 	if (dev->rep)
 		MLX5_SET(tirc, tirc, self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
 
 	err = mlx5_core_create_tir(dev->mdev, in, inlen, &qp->rss_qp.tirn);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index db3278cc052b..3078491cc0d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -153,7 +153,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 
 	if (enable_uc_lb)
 		MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
 
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3a4a2e0567e9..4c7a1d25d73b 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -2559,8 +2559,8 @@ enum {
 };
 
 enum {
-	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_    = 0x1,
-	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST_  = 0x2,
+	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST    = 0x1,
+	MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST  = 0x2,
 };
 
 struct mlx5_ifc_tirc_bits {
-- 
cgit v1.2.3


From b57e99b4b8b0ebdf9707424e7ddc0c392bdc5fe6 Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 21 Sep 2018 16:44:34 -0700
Subject: block: use nanosecond resolution for iostat

Klaus Kusche reported that the I/O busy time in /proc/diskstats was not
updating properly on 4.18. This is because we started using ktime to
track elapsed time, and we convert nanoseconds to jiffies when we update
the partition counter. However, this gets rounded down, so any I/Os that
take less than a jiffy are not accounted for. Previously in this case,
the value of jiffies would sometimes increment while we were doing I/O,
so at least some I/Os were accounted for.

Let's convert the stats to use nanoseconds internally. We still report
milliseconds as before, now more accurately than ever. The value is
still truncated to 32 bits for backwards compatibility.

Fixes: 522a777566f5 ("block: consolidate struct request timestamp fields")
Cc: stable@vger.kernel.org
Reported-by: Klaus Kusche <klaus.kusche@computerix.info>
Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c               | 2 +-
 block/blk-core.c          | 4 +---
 block/genhd.c             | 6 +++---
 block/partition-generic.c | 6 +++---
 include/linux/genhd.h     | 5 ++++-
 5 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 8c680a776171..0093bed81c0e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1684,7 +1684,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
 	const int sgrp = op_stat_group(req_op);
 	int cpu = part_stat_lock();
 
-	part_stat_add(cpu, part, ticks[sgrp], duration);
+	part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
 	part_round_stats(q, cpu, part);
 	part_dec_in_flight(q, part, op_is_write(req_op));
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 4dbc93f43b38..cff0a60ee200 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2733,17 +2733,15 @@ void blk_account_io_done(struct request *req, u64 now)
 	 * containing request is enough.
 	 */
 	if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
-		unsigned long duration;
 		const int sgrp = op_stat_group(req_op(req));
 		struct hd_struct *part;
 		int cpu;
 
-		duration = nsecs_to_jiffies(now - req->start_time_ns);
 		cpu = part_stat_lock();
 		part = req->part;
 
 		part_stat_inc(cpu, part, ios[sgrp]);
-		part_stat_add(cpu, part, ticks[sgrp], duration);
+		part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
 		part_round_stats(req->q, cpu, part);
 		part_dec_in_flight(req->q, part, rq_data_dir(req));
 
diff --git a/block/genhd.c b/block/genhd.c
index 8cc719a37b32..be5bab20b2ab 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1343,18 +1343,18 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 			   part_stat_read(hd, ios[STAT_READ]),
 			   part_stat_read(hd, merges[STAT_READ]),
 			   part_stat_read(hd, sectors[STAT_READ]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_READ])),
+			   (unsigned int)part_stat_read_msecs(hd, STAT_READ),
 			   part_stat_read(hd, ios[STAT_WRITE]),
 			   part_stat_read(hd, merges[STAT_WRITE]),
 			   part_stat_read(hd, sectors[STAT_WRITE]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
+			   (unsigned int)part_stat_read_msecs(hd, STAT_WRITE),
 			   inflight[0],
 			   jiffies_to_msecs(part_stat_read(hd, io_ticks)),
 			   jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
 			   part_stat_read(hd, ios[STAT_DISCARD]),
 			   part_stat_read(hd, merges[STAT_DISCARD]),
 			   part_stat_read(hd, sectors[STAT_DISCARD]),
-			   jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD]))
+			   (unsigned int)part_stat_read_msecs(hd, STAT_DISCARD)
 			);
 	}
 	disk_part_iter_exit(&piter);
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 5a8975a1201c..d3d14e81fb12 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -136,18 +136,18 @@ ssize_t part_stat_show(struct device *dev,
 		part_stat_read(p, ios[STAT_READ]),
 		part_stat_read(p, merges[STAT_READ]),
 		(unsigned long long)part_stat_read(p, sectors[STAT_READ]),
-		jiffies_to_msecs(part_stat_read(p, ticks[STAT_READ])),
+		(unsigned int)part_stat_read_msecs(p, STAT_READ),
 		part_stat_read(p, ios[STAT_WRITE]),
 		part_stat_read(p, merges[STAT_WRITE]),
 		(unsigned long long)part_stat_read(p, sectors[STAT_WRITE]),
-		jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
+		(unsigned int)part_stat_read_msecs(p, STAT_WRITE),
 		inflight[0],
 		jiffies_to_msecs(part_stat_read(p, io_ticks)),
 		jiffies_to_msecs(part_stat_read(p, time_in_queue)),
 		part_stat_read(p, ios[STAT_DISCARD]),
 		part_stat_read(p, merges[STAT_DISCARD]),
 		(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
-		jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD])));
+		(unsigned int)part_stat_read_msecs(p, STAT_DISCARD));
 }
 
 ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 57864422a2c8..25c08c6c7f99 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -83,10 +83,10 @@ struct partition {
 } __attribute__((packed));
 
 struct disk_stats {
+	u64 nsecs[NR_STAT_GROUPS];
 	unsigned long sectors[NR_STAT_GROUPS];
 	unsigned long ios[NR_STAT_GROUPS];
 	unsigned long merges[NR_STAT_GROUPS];
-	unsigned long ticks[NR_STAT_GROUPS];
 	unsigned long io_ticks;
 	unsigned long time_in_queue;
 };
@@ -354,6 +354,9 @@ static inline void free_part_stats(struct hd_struct *part)
 
 #endif /* CONFIG_SMP */
 
+#define part_stat_read_msecs(part, which)				\
+	div_u64(part_stat_read(part, nsecs[which]), NSEC_PER_MSEC)
+
 #define part_stat_read_accum(part, field)				\
 	(part_stat_read(part, field[STAT_READ]) +			\
 	 part_stat_read(part, field[STAT_WRITE]) +			\
-- 
cgit v1.2.3


From 27e6fa996c534c32702aa4d32db0ffa383acd050 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:26 -0400
Subject: blkcg: fix ref count issue with bio_blkcg using task_css

The accessor function bio_blkcg either returns the blkcg associated with
the bio or finds one in the current context. This can cause an issue
when trying to associate a bio with a blkcg. Particularly, it's the
third case that is problematic:

	return css_to_blkcg(task_css(current, io_cgrp_id));

As the above may race against task migration and the cgroup exiting, it
is not always ok to take a reference on the blkcg returned from
bio_blkcg.

This patch adds association ahead of calling bio_blkcg rather than
after. This makes association a required and explicit step along the
code paths for calling bio_blkcg. blk_get_rl is modified as well to get
a reference to the blkcg it may use and blk_put_rl will always put the
reference back. Association is also moved above the bio_blkcg call to
ensure it will not return NULL in blk-iolatency.

BFQ and CFQ utilize this flaw, but due to the complexity, I do not want
to address this in this series. I've created a private version of the
function with notes not to use it describing the flaw. Hopefully soon,
that code can be cleaned up.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c         |   4 +-
 block/bfq-iosched.c        |   2 +-
 block/bio.c                |  10 ++++-
 block/blk-iolatency.c      |   2 +-
 block/cfq-iosched.c        |   4 +-
 include/linux/blk-cgroup.h | 101 +++++++++++++++++++++++++++++++++++++++++----
 6 files changed, 107 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 9fe5952d117d..d9a7916ff0ab 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 	uint64_t serial_nr;
 
 	rcu_read_lock();
-	serial_nr = bio_blkcg(bio)->css.serial_nr;
+	serial_nr = __bio_blkcg(bio)->css.serial_nr;
 
 	/*
 	 * Check whether blkcg has changed.  The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 	if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
 		goto out;
 
-	bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
+	bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
 	/*
 	 * Update blkg_path for bfq_log_* functions. We cache this
 	 * path, and update it here, for the following
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index c0b1db3afb81..1a1b80dfd69d 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4359,7 +4359,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
 
 	rcu_read_lock();
 
-	bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
+	bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
 	if (!bfqg) {
 		bfqq = &bfqd->oom_bfqq;
 		goto out;
diff --git a/block/bio.c b/block/bio.c
index 81c05ee51d6c..083f1c9cde0a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1988,13 +1988,19 @@ int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
  *
  * This function takes an extra reference of @blkcg_css which will be put
  * when @bio is released.  The caller must own @bio and is responsible for
- * synchronizing calls to this function.
+ * synchronizing calls to this function.  If @blkcg_css is NULL, a call to
+ * blkcg_get_css finds the current css from the kthread or task.
  */
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
 {
 	if (unlikely(bio->bi_css))
 		return -EBUSY;
-	css_get(blkcg_css);
+
+	if (blkcg_css)
+		css_get(blkcg_css);
+	else
+		blkcg_css = blkcg_get_css();
+
 	bio->bi_css = blkcg_css;
 	return 0;
 }
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index a6f21527e6c7..82450c37f2aa 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -401,8 +401,8 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
 		return;
 
 	rcu_read_lock();
+	bio_associate_blkcg(bio, NULL);
 	blkcg = bio_blkcg(bio);
-	bio_associate_blkcg(bio, &blkcg->css);
 	blkg = blkg_lookup(blkcg, q);
 	if (unlikely(!blkg)) {
 		if (!lock)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 2eb87444b157..d219e9a1af65 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3753,7 +3753,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
 	uint64_t serial_nr;
 
 	rcu_read_lock();
-	serial_nr = bio_blkcg(bio)->css.serial_nr;
+	serial_nr = __bio_blkcg(bio)->css.serial_nr;
 	rcu_read_unlock();
 
 	/*
@@ -3818,7 +3818,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
 	struct cfq_group *cfqg;
 
 	rcu_read_lock();
-	cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
+	cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
 	if (!cfqg) {
 		cfqq = &cfqd->oom_cfqq;
 		goto out;
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6d766a19f2bb..24067a1f8b36 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -230,22 +230,100 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		   char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
+/**
+ * blkcg_css - find the current css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This may return a dying css, so it is up to the caller to use tryget logic
+ * to confirm it is alive and well.
+ */
+static inline struct cgroup_subsys_state *blkcg_css(void)
+{
+	struct cgroup_subsys_state *css;
+
+	css = kthread_blkcg();
+	if (css)
+		return css;
+	return task_css(current, io_cgrp_id);
+}
+
+/**
+ * blkcg_get_css - find and get a reference to the css
+ *
+ * Find the css associated with either the kthread or the current task.
+ * This takes a reference on the blkcg which will need to be managed by the
+ * caller.
+ */
+static inline struct cgroup_subsys_state *blkcg_get_css(void)
+{
+	struct cgroup_subsys_state *css;
+
+	rcu_read_lock();
+
+	css = kthread_blkcg();
+	if (css) {
+		css_get(css);
+	} else {
+		/*
+		 * This is a bit complicated.  It is possible task_css is seeing
+		 * an old css pointer here.  This is caused by the current
+		 * thread migrating away from this cgroup and this cgroup dying.
+		 * css_tryget() will fail when trying to take a ref on a cgroup
+		 * that's ref count has hit 0.
+		 *
+		 * Therefore, if it does fail, this means current must have
+		 * been swapped away already and this is waiting for it to
+		 * propagate on the polling cpu.  Hence the use of cpu_relax().
+		 */
+		while (true) {
+			css = task_css(current, io_cgrp_id);
+			if (likely(css_tryget(css)))
+				break;
+			cpu_relax();
+		}
+	}
+
+	rcu_read_unlock();
+
+	return css;
+}
 
 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 {
 	return css ? container_of(css, struct blkcg, css) : NULL;
 }
 
-static inline struct blkcg *bio_blkcg(struct bio *bio)
+/**
+ * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
+ *
+ * DO NOT USE.
+ * There is a flaw using this version of the function.  In particular, this was
+ * used in a broken paradigm where association was called on the given css.  It
+ * is possible though that the returned css from task_css() is in the process
+ * of dying due to migration of the current task.  So it is improper to assume
+ * *_get() is going to succeed.  Both BFQ and CFQ rely on this logic and will
+ * take additional work to handle more gracefully.
+ */
+static inline struct blkcg *__bio_blkcg(struct bio *bio)
 {
-	struct cgroup_subsys_state *css;
+	if (bio && bio->bi_css)
+		return css_to_blkcg(bio->bi_css);
+	return css_to_blkcg(blkcg_css());
+}
 
+/**
+ * bio_blkcg - grab the blkcg associated with a bio
+ * @bio: target bio
+ *
+ * This returns the blkcg associated with a bio, NULL if not associated.
+ * Callers are expected to either handle NULL or know association has been
+ * done prior to calling this.
+ */
+static inline struct blkcg *bio_blkcg(struct bio *bio)
+{
 	if (bio && bio->bi_css)
 		return css_to_blkcg(bio->bi_css);
-	css = kthread_blkcg();
-	if (css)
-		return css_to_blkcg(css);
-	return css_to_blkcg(task_css(current, io_cgrp_id));
+	return NULL;
 }
 
 static inline bool blk_cgroup_congested(void)
@@ -534,6 +612,10 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 	rcu_read_lock();
 
 	blkcg = bio_blkcg(bio);
+	if (blkcg)
+		css_get(&blkcg->css);
+	else
+		blkcg = css_to_blkcg(blkcg_get_css());
 
 	/* bypass blkg lookup and use @q->root_rl directly for root */
 	if (blkcg == &blkcg_root)
@@ -565,6 +647,8 @@ root_rl:
  */
 static inline void blk_put_rl(struct request_list *rl)
 {
+	/* an additional ref is always taken for rl */
+	css_put(&rl->blkg->blkcg->css);
 	if (rl->blkg->blkcg != &blkcg_root)
 		blkg_put(rl->blkg);
 }
@@ -805,10 +889,10 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 	bool throtl = false;
 
 	rcu_read_lock();
-	blkcg = bio_blkcg(bio);
 
 	/* associate blkcg if bio hasn't attached one */
-	bio_associate_blkcg(bio, &blkcg->css);
+	bio_associate_blkcg(bio, NULL);
+	blkcg = bio_blkcg(bio);
 
 	blkg = blkg_lookup(blkcg, q);
 	if (unlikely(!blkg)) {
@@ -930,6 +1014,7 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 static inline void blkcg_deactivate_policy(struct request_queue *q,
 					   const struct blkcg_policy *pol) { }
 
+static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
-- 
cgit v1.2.3


From 49f4c2dc2b5066e9211101c59cc0828e81d41614 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:27 -0400
Subject: blkcg: update blkg_lookup_create to do locking

To know when to create a blkg, the general pattern is to do a
blkg_lookup and if that fails, lock and then do a lookup again and if
that fails finally create. It doesn't make much sense for everyone who
wants to do creation to write this themselves.

This changes blkg_lookup_create to do locking and implement this
pattern. The old blkg_lookup_create is renamed to __blkg_lookup_create.
If a call site wants to do its own error handling or already owns the
queue lock, they can use __blkg_lookup_create. This will be used in
upcoming patches.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 31 ++++++++++++++++++++++++++++---
 block/blk-iolatency.c      |  2 +-
 include/linux/blk-cgroup.h |  4 +++-
 3 files changed, 32 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c19f9078da1e..cd0d97bed83d 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -259,7 +259,7 @@ err_free_blkg:
 }
 
 /**
- * blkg_lookup_create - lookup blkg, try to create one if not there
+ * __blkg_lookup_create - lookup blkg, try to create one if not there
  * @blkcg: blkcg of interest
  * @q: request_queue of interest
  *
@@ -272,8 +272,8 @@ err_free_blkg:
  * value on error.  If @q is dead, returns ERR_PTR(-EINVAL).  If @q is not
  * dead and bypassing, returns ERR_PTR(-EBUSY).
  */
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
-				    struct request_queue *q)
+struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
+				      struct request_queue *q)
 {
 	struct blkcg_gq *blkg;
 
@@ -310,6 +310,31 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	}
 }
 
+/**
+ * blkg_lookup_create - find or create a blkg
+ * @blkcg: target block cgroup
+ * @q: target request_queue
+ *
+ * This looks up or creates the blkg representing the unique pair
+ * of the blkcg and the request_queue.
+ */
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+				    struct request_queue *q)
+{
+	struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
+	unsigned long flags;
+
+	if (unlikely(!blkg)) {
+		spin_lock_irqsave(q->queue_lock, flags);
+
+		blkg = __blkg_lookup_create(blkcg, q);
+
+		spin_unlock_irqrestore(q->queue_lock, flags);
+	}
+
+	return blkg;
+}
+
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 82450c37f2aa..ffde3ab9f84c 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -407,7 +407,7 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
 	if (unlikely(!blkg)) {
 		if (!lock)
 			spin_lock_irq(q->queue_lock);
-		blkg = blkg_lookup_create(blkcg, q);
+		blkg = __blkg_lookup_create(blkcg, q);
 		if (IS_ERR(blkg))
 			blkg = NULL;
 		if (!lock)
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 24067a1f8b36..cc0f238530f6 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -184,6 +184,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
 
 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 				      struct request_queue *q, bool update_hint);
+struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
+				      struct request_queue *q);
 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 				    struct request_queue *q);
 int blkcg_init_queue(struct request_queue *q);
@@ -897,7 +899,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 	blkg = blkg_lookup(blkcg, q);
 	if (unlikely(!blkg)) {
 		spin_lock_irq(q->queue_lock);
-		blkg = blkg_lookup_create(blkcg, q);
+		blkg = __blkg_lookup_create(blkcg, q);
 		if (IS_ERR(blkg))
 			blkg = NULL;
 		spin_unlock_irq(q->queue_lock);
-- 
cgit v1.2.3


From 07b05bcc3213ac9f8c28c9d835b4bf3d5798cc60 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:28 -0400
Subject: blkcg: convert blkg_lookup_create to find closest blkg

There are several scenarios where blkg_lookup_create can fail. Examples
include the blkcg dying, request_queue is dying, or simply being OOM. At
the end of the day, most handle this by simply falling back to the
q->root_blkg and calling it a day.

This patch implements the notion of closest blkg. During
blkg_lookup_create, if it fails to create, return the closest blkg
found or the q->root_blkg. blkg_try_get_closest is introduced and used
during association so a bio is always attached to a blkg.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                | 17 ++++++++++-------
 block/blk-cgroup.c         | 25 +++++++++++++++++--------
 include/linux/blk-cgroup.h | 14 ++++++++++++++
 3 files changed, 41 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 083f1c9cde0a..bfd41e8b53a8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -2007,21 +2007,24 @@ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
 EXPORT_SYMBOL_GPL(bio_associate_blkcg);
 
 /**
- * bio_associate_blkg - associate a bio with the specified blkg
+ * bio_associate_blkg - associate a bio with the a blkg
  * @bio: target bio
  * @blkg: the blkg to associate
  *
- * Associate @bio with the blkg specified by @blkg.  This is the queue specific
- * blkcg information associated with the @bio, a reference will be taken on the
- * @blkg and will be freed when the bio is freed.
+ * This tries to associate @bio with the specified blkg.  Association failure
+ * is handled by walking up the blkg tree.  Therefore, the blkg associated can
+ * be anything between @blkg and the root_blkg.  This situation only happens
+ * when a cgroup is dying and then the remaining bios will spill to the closest
+ * alive blkg.
+ *
+ * A reference will be taken on the @blkg and will be released when @bio is
+ * freed.
  */
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 {
 	if (unlikely(bio->bi_blkg))
 		return -EBUSY;
-	if (!blkg_try_get(blkg))
-		return -ENODEV;
-	bio->bi_blkg = blkg;
+	bio->bi_blkg = blkg_try_get_closest(blkg);
 	return 0;
 }
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index cd0d97bed83d..e9e3a955f61a 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -268,9 +268,8 @@ err_free_blkg:
  * that all non-root blkg's have access to the parent blkg.  This function
  * should be called under RCU read lock and @q->queue_lock.
  *
- * Returns pointer to the looked up or created blkg on success, ERR_PTR()
- * value on error.  If @q is dead, returns ERR_PTR(-EINVAL).  If @q is not
- * dead and bypassing, returns ERR_PTR(-EBUSY).
+ * Returns the blkg or the closest blkg if blkg_create fails as it walks
+ * down from root.
  */
 struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 				      struct request_queue *q)
@@ -285,7 +284,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 	 * we shouldn't allow anything to go through for a bypassing queue.
 	 */
 	if (unlikely(blk_queue_bypass(q)))
-		return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
+		return q->root_blkg;
 
 	blkg = __blkg_lookup(blkcg, q, true);
 	if (blkg)
@@ -293,19 +292,29 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 
 	/*
 	 * Create blkgs walking down from blkcg_root to @blkcg, so that all
-	 * non-root blkgs have access to their parents.
+	 * non-root blkgs have access to their parents.  Returns the closest
+	 * blkg to the intended blkg should blkg_create() fail.
 	 */
 	while (true) {
 		struct blkcg *pos = blkcg;
 		struct blkcg *parent = blkcg_parent(blkcg);
-
-		while (parent && !__blkg_lookup(parent, q, false)) {
+		struct blkcg_gq *ret_blkg = q->root_blkg;
+
+		while (parent) {
+			blkg = __blkg_lookup(parent, q, false);
+			if (blkg) {
+				/* remember closest blkg */
+				ret_blkg = blkg;
+				break;
+			}
 			pos = parent;
 			parent = blkcg_parent(parent);
 		}
 
 		blkg = blkg_create(pos, q, NULL);
-		if (pos == blkcg || IS_ERR(blkg))
+		if (IS_ERR(blkg))
+			return ret_blkg;
+		if (pos == blkcg)
 			return blkg;
 	}
 }
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index cc0f238530f6..1fbff1bbb651 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -549,6 +549,20 @@ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
 	return NULL;
 }
 
+/**
+ * blkg_try_get_closest - try and get a blkg ref on the closet blkg
+ * @blkg: blkg to get
+ *
+ * This walks up the blkg tree to find the closest non-dying blkg and returns
+ * the blkg that it did association with as it may not be the passed in blkg.
+ */
+static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg)
+{
+	while (!atomic_inc_not_zero(&blkg->refcnt))
+		blkg = blkg->parent;
+
+	return blkg;
+}
 
 void __blkg_release_rcu(struct rcu_head *rcu);
 
-- 
cgit v1.2.3


From a7b39b4e961c4e2b3ed837803a7441a65c90ce33 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:29 -0400
Subject: blkcg: always associate a bio with a blkg

Previously, blkg's were only assigned as needed by blk-iolatency and
blk-throttle. bio->css was also always being associated while blkg was
being looked up and then thrown away in blkcg_bio_issue_check.

This patch begins the cleanup of bio->css and bio->bi_blkg by always
associating a blkg in blkcg_bio_issue_check. This tries to create the
blkg, but if it is not possible, falls back to using the root_blkg of
the request_queue. Therefore, a bio will always be associated with a
blkg. The duplicate association logic is removed from blk-throttle and
blk-iolatency.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                | 38 ++++++++++++++++++++++++++++++++++++++
 block/blk-iolatency.c      | 24 ++----------------------
 block/blk-throttle.c       |  5 +----
 include/linux/bio.h        |  3 +++
 include/linux/blk-cgroup.h | 16 ++--------------
 5 files changed, 46 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index bfd41e8b53a8..748d7132f172 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -2028,6 +2028,41 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 	return 0;
 }
 
+/**
+ * bio_associate_create_blkg - associate a bio with a blkg from q
+ * @q: request_queue where bio is going
+ * @bio: target bio
+ *
+ * Associate @bio with the blkg found from the bio's css and the request_queue.
+ * If one is not found, bio_lookup_blkg creates the blkg.
+ */
+int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
+{
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+	int ret = 0;
+
+	/* someone has already associated this bio with a blkg */
+	if (bio->bi_blkg)
+		return ret;
+
+	rcu_read_lock();
+
+	bio_associate_blkcg(bio, NULL);
+	blkcg = bio_blkcg(bio);
+
+	if (!blkcg->css.parent) {
+		ret = bio_associate_blkg(bio, q->root_blkg);
+	} else {
+		blkg = blkg_lookup_create(blkcg, q);
+
+		ret = bio_associate_blkg(bio, blkg);
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
 /**
  * bio_disassociate_task - undo bio_associate_current()
  * @bio: target bio
@@ -2057,6 +2092,9 @@ void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
 {
 	if (src->bi_css)
 		WARN_ON(bio_associate_blkcg(dst, src->bi_css));
+
+	if (src->bi_blkg)
+		bio_associate_blkg(dst, src->bi_blkg);
 }
 EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
 #endif /* CONFIG_BLK_CGROUP */
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index ffde3ab9f84c..7337fbc7f850 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -392,34 +392,14 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
 				     spinlock_t *lock)
 {
 	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
-	struct blkcg *blkcg;
-	struct blkcg_gq *blkg;
-	struct request_queue *q = rqos->q;
+	struct blkcg_gq *blkg = bio->bi_blkg;
 	bool issue_as_root = bio_issue_as_root_blkg(bio);
 
 	if (!blk_iolatency_enabled(blkiolat))
 		return;
 
-	rcu_read_lock();
-	bio_associate_blkcg(bio, NULL);
-	blkcg = bio_blkcg(bio);
-	blkg = blkg_lookup(blkcg, q);
-	if (unlikely(!blkg)) {
-		if (!lock)
-			spin_lock_irq(q->queue_lock);
-		blkg = __blkg_lookup_create(blkcg, q);
-		if (IS_ERR(blkg))
-			blkg = NULL;
-		if (!lock)
-			spin_unlock_irq(q->queue_lock);
-	}
-	if (!blkg)
-		goto out;
-
 	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-	bio_associate_blkg(bio, blkg);
-out:
-	rcu_read_unlock();
+
 	while (blkg && blkg->parent) {
 		struct iolatency_grp *iolat = blkg_to_lat(blkg);
 		if (!iolat) {
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index db1a3a2ae006..e62ae502891b 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2118,9 +2118,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
 {
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	/* fallback to root_blkg if we fail to get a blkg ref */
-	if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
-		bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
 	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
 #endif
 }
@@ -2129,7 +2126,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 		    struct bio *bio)
 {
 	struct throtl_qnode *qn = NULL;
-	struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
+	struct throtl_grp *tg = blkg_to_tg(blkg);
 	struct throtl_service_queue *sq;
 	bool rw = bio_data_dir(bio);
 	bool throttled = false;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 14b4fa266357..829cd0bb407d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -542,11 +542,14 @@ static inline int bio_associate_blkcg_from_page(struct bio *bio,
 #ifdef CONFIG_BLK_CGROUP
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
+int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
 void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
 static inline int bio_associate_blkcg(struct bio *bio,
 			struct cgroup_subsys_state *blkcg_css) { return 0; }
+static inline int bio_associate_create_blkg(struct request_queue *q,
+					    struct bio *bio) { return 0; }
 static inline void bio_disassociate_task(struct bio *bio) { }
 static inline void bio_clone_blkcg_association(struct bio *dst,
 			struct bio *src) { }
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1fbff1bbb651..6e33ad1d92b4 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -900,29 +900,17 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio)
 {
-	struct blkcg *blkcg;
 	struct blkcg_gq *blkg;
 	bool throtl = false;
 
 	rcu_read_lock();
 
-	/* associate blkcg if bio hasn't attached one */
-	bio_associate_blkcg(bio, NULL);
-	blkcg = bio_blkcg(bio);
-
-	blkg = blkg_lookup(blkcg, q);
-	if (unlikely(!blkg)) {
-		spin_lock_irq(q->queue_lock);
-		blkg = __blkg_lookup_create(blkcg, q);
-		if (IS_ERR(blkg))
-			blkg = NULL;
-		spin_unlock_irq(q->queue_lock);
-	}
+	bio_associate_create_blkg(q, bio);
+	blkg = bio->bi_blkg;
 
 	throtl = blk_throtl_bio(q, blkg, bio);
 
 	if (!throtl) {
-		blkg = blkg ?: q->root_blkg;
 		/*
 		 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
 		 * is a split bio and we would have already accounted for the
-- 
cgit v1.2.3


From 5bf9a1f3b4efef7e463105dde8bba4d2397909c2 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:30 -0400
Subject: blkcg: consolidate bio_issue_init to be a part of core

bio_issue_init among other things initializes the timestamp for an IO.
Rather than have this logic handled by policies, this consolidates it to
be on the init paths (normal, clone, bounce clone).

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                | 2 ++
 block/blk-iolatency.c      | 2 --
 block/blk-throttle.c       | 8 --------
 block/bounce.c             | 2 ++
 include/linux/blk-cgroup.h | 9 +++++++++
 5 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 748d7132f172..80c948da061c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -610,6 +610,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_io_vec = bio_src->bi_io_vec;
 
 	bio_clone_blkcg_association(bio, bio_src);
+
+	blkcg_bio_issue_init(bio);
 }
 EXPORT_SYMBOL(__bio_clone_fast);
 
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 7337fbc7f850..2d848b2f8b87 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -398,8 +398,6 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
 	if (!blk_iolatency_enabled(blkiolat))
 		return;
 
-	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-
 	while (blkg && blkg->parent) {
 		struct iolatency_grp *iolat = blkg_to_lat(blkg);
 		if (!iolat) {
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index e62ae502891b..4bda70e8db48 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2115,13 +2115,6 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 }
 #endif
 
-static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
-{
-#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-#endif
-}
-
 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 		    struct bio *bio)
 {
@@ -2145,7 +2138,6 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 	if (unlikely(blk_queue_bypass(q)))
 		goto out_unlock;
 
-	blk_throtl_assoc_bio(tg, bio);
 	blk_throtl_update_idletime(tg);
 
 	sq = &tg->service_queue;
diff --git a/block/bounce.c b/block/bounce.c
index bc63b3a2d18c..7a08703b1204 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -259,6 +259,8 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 
 	bio_clone_blkcg_association(bio, bio_src);
 
+	blkcg_bio_issue_init(bio);
+
 	return bio;
 }
 
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6e33ad1d92b4..a6b6e741a75e 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -897,6 +897,12 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
 				  struct bio *bio) { return false; }
 #endif
 
+
+static inline void blkcg_bio_issue_init(struct bio *bio)
+{
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+}
+
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio)
 {
@@ -922,6 +928,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
+	blkcg_bio_issue_init(bio);
+
 	rcu_read_unlock();
 	return !throtl;
 }
@@ -1034,6 +1042,7 @@ static inline void blk_put_rl(struct request_list *rl) { }
 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
 
+static inline void blkcg_bio_issue_init(struct bio *bio) { }
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio) { return true; }
 
-- 
cgit v1.2.3


From 74b7c02a9bc124ee3df0d77880ee26db0a325516 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:31 -0400
Subject: blkcg: associate a blkg for pages being evicted by swap

A prior patch in this series added blkg association to bios issued by
cgroups. There are two other paths that we want to attribute work back
to the appropriate cgroup: swap and writeback. Here we modify the way
swap tags bios to include the blkg. Writeback will be tackle in the next
patch.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 83 +++++++++++++++++++++++++++++++++++++----------------
 include/linux/bio.h | 11 +++++--
 mm/page_io.c        |  2 +-
 3 files changed, 68 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 80c948da061c..387480de6992 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1956,30 +1956,6 @@ EXPORT_SYMBOL(bioset_init_from_src);
 
 #ifdef CONFIG_BLK_CGROUP
 
-#ifdef CONFIG_MEMCG
-/**
- * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
- * @bio: target bio
- * @page: the page to lookup the blkcg from
- *
- * Associate @bio with the blkcg from @page's owning memcg.  This works like
- * every other associate function wrt references.
- */
-int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
-{
-	struct cgroup_subsys_state *blkcg_css;
-
-	if (unlikely(bio->bi_css))
-		return -EBUSY;
-	if (!page->mem_cgroup)
-		return 0;
-	blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
-				     &io_cgrp_subsys);
-	bio->bi_css = blkcg_css;
-	return 0;
-}
-#endif /* CONFIG_MEMCG */
-
 /**
  * bio_associate_blkcg - associate a bio with the specified blkcg
  * @bio: target bio
@@ -2030,6 +2006,65 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 	return 0;
 }
 
+static int __bio_associate_blkg_from_css(struct bio *bio,
+					 struct cgroup_subsys_state *css)
+{
+	struct blkcg_gq *blkg;
+
+	rcu_read_lock();
+
+	blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue);
+
+	rcu_read_unlock();
+
+	return bio_associate_blkg(bio, blkg);
+}
+
+/**
+ * bio_associate_blkg_from_css - associate a bio with a specified css
+ * @bio: target bio
+ * @css: target css
+ *
+ * Associate @bio with the blkg found by combining the css's blkg and the
+ * request_queue of the @bio.  This takes a reference on the css that will
+ * be put upon freeing of @bio.
+ */
+int bio_associate_blkg_from_css(struct bio *bio,
+				struct cgroup_subsys_state *css)
+{
+	css_get(css);
+	bio->bi_css = css;
+	return __bio_associate_blkg_from_css(bio, css);
+}
+EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
+
+#ifdef CONFIG_MEMCG
+/**
+ * bio_associate_blkg_from_page - associate a bio with the page's blkg
+ * @bio: target bio
+ * @page: the page to lookup the blkcg from
+ *
+ * Associate @bio with the blkg from @page's owning memcg and the respective
+ * request_queue.  This works like every other associate function wrt
+ * references.
+ *
+ * Note: this must be called after bio has an associated device.
+ */
+int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
+{
+	struct cgroup_subsys_state *css;
+
+	if (unlikely(bio->bi_css))
+		return -EBUSY;
+	if (!page->mem_cgroup)
+		return 0;
+	css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
+	bio->bi_css = css;
+
+	return __bio_associate_blkg_from_css(bio, css);
+}
+#endif /* CONFIG_MEMCG */
+
 /**
  * bio_associate_create_blkg - associate a bio with a blkg from q
  * @q: request_queue where bio is going
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 829cd0bb407d..c73a870ebc0e 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -533,21 +533,26 @@ do {						\
 	disk_devt((bio)->bi_disk)
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
+int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
 #else
-static inline int bio_associate_blkcg_from_page(struct bio *bio,
-						struct page *page) {  return 0; }
+static inline int bio_associate_blkg_from_page(struct bio *bio,
+					       struct page *page) { return 0; }
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
 int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
+int bio_associate_blkg_from_css(struct bio *bio,
+				struct cgroup_subsys_state *css);
 int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
 void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
 static inline int bio_associate_blkcg(struct bio *bio,
 			struct cgroup_subsys_state *blkcg_css) { return 0; }
+static inline int bio_associate_blkg_from_css(struct bio *bio,
+					      struct cgroup_subsys_state *css)
+{ return 0; }
 static inline int bio_associate_create_blkg(struct request_queue *q,
 					    struct bio *bio) { return 0; }
 static inline void bio_disassociate_task(struct bio *bio) { }
diff --git a/mm/page_io.c b/mm/page_io.c
index aafd19ec1db4..573d3663d846 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		goto out;
 	}
 	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
-	bio_associate_blkcg_from_page(bio, page);
+	bio_associate_blkg_from_page(bio, page);
 	count_swpout_vm_event(page);
 	set_page_writeback(page);
 	unlock_page(page);
-- 
cgit v1.2.3


From bdc2491708c47601603918a9a20acddef6e1d814 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:32 -0400
Subject: blkcg: associate writeback bios with a blkg

One of the goals of this series is to remove a separate reference to
the css of the bio. This can and should be accessed via bio_blkcg. In
this patch, the wbc_init_bio call is changed such that it must be called
after a queue has been associated with the bio.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/cgroup-v2.rst |  8 +++++---
 fs/buffer.c                             | 10 +++++-----
 fs/ext4/page-io.c                       |  2 +-
 include/linux/writeback.h               |  5 +++--
 4 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 184193bcb262..caf36105a1c7 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1857,8 +1857,10 @@ following two functions.
 
   wbc_init_bio(@wbc, @bio)
 	Should be called for each bio carrying writeback data and
-	associates the bio with the inode's owner cgroup.  Can be
-	called anytime between bio allocation and submission.
+	associates the bio with the inode's owner cgroup and the
+	corresponding request queue.  This must be called after
+	a queue (device) has been associated with the bio and
+	before submission.
 
   wbc_account_io(@wbc, @page, @bytes)
 	Should be called for each data segment being written out.
@@ -1877,7 +1879,7 @@ the configuration, the bio may be executed at a lower priority and if
 the writeback session is holding shared resources, e.g. a journal
 entry, may lead to priority inversion.  There is no one easy solution
 for the problem.  Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_blkcg()
+cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
 directly.
 
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 6f1ae3ac9789..109f55196866 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3060,11 +3060,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 	 */
 	bio = bio_alloc(GFP_NOIO, 1);
 
-	if (wbc) {
-		wbc_init_bio(wbc, bio);
-		wbc_account_io(wbc, bh->b_page, bh->b_size);
-	}
-
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_write_hint = write_hint;
@@ -3084,6 +3079,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 		op_flags |= REQ_PRIO;
 	bio_set_op_attrs(bio, op, op_flags);
 
+	if (wbc) {
+		wbc_init_bio(wbc, bio);
+		wbc_account_io(wbc, bh->b_page, bh->b_size);
+	}
+
 	submit_bio(bio);
 	return 0;
 }
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index db7590178dfc..2aa62d58d8dd 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
 	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 	if (!bio)
 		return -ENOMEM;
-	wbc_init_bio(io->io_wbc, bio);
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_end_io = ext4_end_bio;
 	bio->bi_private = ext4_get_io_end(io->io_end);
 	io->io_bio = bio;
 	io->io_next_block = bh->b_blocknr;
+	wbc_init_bio(io->io_wbc, bio);
 	return 0;
 }
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fdfd04e348f6..738a0c24874f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
  *
  * @bio is a part of the writeback in progress controlled by @wbc.  Perform
  * writeback specific initialization.  This is used to apply the cgroup
- * writeback context.
+ * writeback context.  Must be called after the bio has been associated with
+ * a device.
  */
 static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 {
@@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 	 * regular writeback instead of writing things out itself.
 	 */
 	if (wbc->wb)
-		bio_associate_blkcg(bio, wbc->wb->blkcg_css);
+		bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
 }
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
-- 
cgit v1.2.3


From c839e7a03f92bafd71fd145b470dcdc7f43f2d4c Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:33 -0400
Subject: blkcg: remove bio->bi_css and instead use bio->bi_blkg

Prior patches ensured that all bios are now associated with some blkg.
This now makes bio->bi_css unnecessary as blkg maintains a reference to
the blkcg already.

This patch removes the field bi_css and transfers corresponding uses to
access via bi_blkg.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                | 56 +++++++++-------------------------------------
 block/bounce.c             |  2 +-
 drivers/block/loop.c       |  5 +++--
 drivers/md/raid0.c         |  2 +-
 include/linux/bio.h        |  9 +++-----
 include/linux/blk-cgroup.h |  8 +++----
 include/linux/blk_types.h  |  1 -
 kernel/trace/blktrace.c    |  4 ++--
 8 files changed, 25 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 387480de6992..71cfe3720ea7 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -609,7 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
 
-	bio_clone_blkcg_association(bio, bio_src);
+	bio_clone_blkg_association(bio, bio_src);
 
 	blkcg_bio_issue_init(bio);
 }
@@ -1956,34 +1956,6 @@ EXPORT_SYMBOL(bioset_init_from_src);
 
 #ifdef CONFIG_BLK_CGROUP
 
-/**
- * bio_associate_blkcg - associate a bio with the specified blkcg
- * @bio: target bio
- * @blkcg_css: css of the blkcg to associate
- *
- * Associate @bio with the blkcg specified by @blkcg_css.  Block layer will
- * treat @bio as if it were issued by a task which belongs to the blkcg.
- *
- * This function takes an extra reference of @blkcg_css which will be put
- * when @bio is released.  The caller must own @bio and is responsible for
- * synchronizing calls to this function.  If @blkcg_css is NULL, a call to
- * blkcg_get_css finds the current css from the kthread or task.
- */
-int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
-{
-	if (unlikely(bio->bi_css))
-		return -EBUSY;
-
-	if (blkcg_css)
-		css_get(blkcg_css);
-	else
-		blkcg_css = blkcg_get_css();
-
-	bio->bi_css = blkcg_css;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(bio_associate_blkcg);
-
 /**
  * bio_associate_blkg - associate a bio with the a blkg
  * @bio: target bio
@@ -2033,7 +2005,6 @@ int bio_associate_blkg_from_css(struct bio *bio,
 				struct cgroup_subsys_state *css)
 {
 	css_get(css);
-	bio->bi_css = css;
 	return __bio_associate_blkg_from_css(bio, css);
 }
 EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
@@ -2054,12 +2025,11 @@ int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
 {
 	struct cgroup_subsys_state *css;
 
-	if (unlikely(bio->bi_css))
+	if (unlikely(bio->bi_blkg))
 		return -EBUSY;
 	if (!page->mem_cgroup)
 		return 0;
 	css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
-	bio->bi_css = css;
 
 	return __bio_associate_blkg_from_css(bio, css);
 }
@@ -2085,8 +2055,7 @@ int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
 
 	rcu_read_lock();
 
-	bio_associate_blkcg(bio, NULL);
-	blkcg = bio_blkcg(bio);
+	blkcg = css_to_blkcg(blkcg_get_css());
 
 	if (!blkcg->css.parent) {
 		ret = bio_associate_blkg(bio, q->root_blkg);
@@ -2110,30 +2079,27 @@ void bio_disassociate_task(struct bio *bio)
 		put_io_context(bio->bi_ioc);
 		bio->bi_ioc = NULL;
 	}
-	if (bio->bi_css) {
-		css_put(bio->bi_css);
-		bio->bi_css = NULL;
-	}
 	if (bio->bi_blkg) {
+		/* a ref is always taken on css */
+		css_put(&bio_blkcg(bio)->css);
 		blkg_put(bio->bi_blkg);
 		bio->bi_blkg = NULL;
 	}
 }
 
 /**
- * bio_clone_blkcg_association - clone blkcg association from src to dst bio
+ * bio_clone_blkg_association - clone blkg association from src to dst bio
  * @dst: destination bio
  * @src: source bio
  */
-void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
+void bio_clone_blkg_association(struct bio *dst, struct bio *src)
 {
-	if (src->bi_css)
-		WARN_ON(bio_associate_blkcg(dst, src->bi_css));
-
-	if (src->bi_blkg)
+	if (src->bi_blkg) {
+		css_get(&bio_blkcg(src)->css);
 		bio_associate_blkg(dst, src->bi_blkg);
+	}
 }
-EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
+EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
 #endif /* CONFIG_BLK_CGROUP */
 
 static void __init biovec_init_slabs(void)
diff --git a/block/bounce.c b/block/bounce.c
index 7a08703b1204..b30071ac4ec6 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -257,7 +257,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 		}
 	}
 
-	bio_clone_blkcg_association(bio, bio_src);
+	bio_clone_blkg_association(bio, bio_src);
 
 	blkcg_bio_issue_init(bio);
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ea9debf59b22..abad6d15f956 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,6 +77,7 @@
 #include <linux/falloc.h>
 #include <linux/uio.h>
 #include <linux/ioprio.h>
+#include <linux/blk-cgroup.h>
 
 #include "loop.h"
 
@@ -1760,8 +1761,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	/* always use the first bio's css */
 #ifdef CONFIG_BLK_CGROUP
-	if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
-		cmd->css = rq->bio->bi_css;
+	if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
+		cmd->css = &bio_blkcg(rq->bio)->css;
 		css_get(cmd->css);
 	} else
 #endif
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ac1cffd2a09b..f3fb5bb8c82a 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
 		    !discard_bio)
 			continue;
 		bio_chain(discard_bio, bio);
-		bio_clone_blkcg_association(discard_bio, bio);
+		bio_clone_blkg_association(discard_bio, bio);
 		if (mddev->gendisk)
 			trace_block_bio_remap(bdev_get_queue(rdev->bdev),
 				discard_bio, disk_devt(mddev->gendisk),
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c73a870ebc0e..e973876625a8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -540,24 +540,21 @@ static inline int bio_associate_blkg_from_page(struct bio *bio,
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
-int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
 int bio_associate_blkg_from_css(struct bio *bio,
 				struct cgroup_subsys_state *css);
 int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
+void bio_clone_blkg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkcg(struct bio *bio,
-			struct cgroup_subsys_state *blkcg_css) { return 0; }
 static inline int bio_associate_blkg_from_css(struct bio *bio,
 					      struct cgroup_subsys_state *css)
 { return 0; }
 static inline int bio_associate_create_blkg(struct request_queue *q,
 					    struct bio *bio) { return 0; }
 static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkcg_association(struct bio *dst,
-			struct bio *src) { }
+static inline void bio_clone_blkg_association(struct bio *dst,
+					      struct bio *src) { }
 #endif	/* CONFIG_BLK_CGROUP */
 
 #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a6b6e741a75e..c41cfcc2b4d8 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -308,8 +308,8 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
  */
 static inline struct blkcg *__bio_blkcg(struct bio *bio)
 {
-	if (bio && bio->bi_css)
-		return css_to_blkcg(bio->bi_css);
+	if (bio && bio->bi_blkg)
+		return bio->bi_blkg->blkcg;
 	return css_to_blkcg(blkcg_css());
 }
 
@@ -323,8 +323,8 @@ static inline struct blkcg *__bio_blkcg(struct bio *bio)
  */
 static inline struct blkcg *bio_blkcg(struct bio *bio)
 {
-	if (bio && bio->bi_css)
-		return css_to_blkcg(bio->bi_css);
+	if (bio && bio->bi_blkg)
+		return bio->bi_blkg->blkcg;
 	return NULL;
 }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index f6dfb30737d8..9578c7ab1eb6 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,7 +178,6 @@ struct bio {
 	 * release.  Read comment on top of bio_associate_current().
 	 */
 	struct io_context	*bi_ioc;
-	struct cgroup_subsys_state *bi_css;
 	struct blkcg_gq		*bi_blkg;
 	struct bio_issue	bi_issue;
 #endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 2868d85f1fb1..fac0ddf8a8e2 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
 	if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
 		return NULL;
 
-	if (!bio->bi_css)
+	if (!bio->bi_blkg)
 		return NULL;
-	return cgroup_get_kernfs_id(bio->bi_css->cgroup);
+	return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
 }
 #else
 static union kernfs_node_id *
-- 
cgit v1.2.3


From f0fcb3ec89f37167810e660b0595d9a6155d9807 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:34 -0400
Subject: blkcg: remove additional reference to the css

The previous patch in this series removed carrying around a pointer to
the css in blkg. However, the blkg association logic still relied on
taking a reference on the css to ensure we wouldn't fail in getting a
reference for the blkg.

Here the implicit dependency on the css is removed. The association
continues to rely on the tryget logic walking up the blkg tree. This
streamlines the three ways that association can happen: normal, swap,
and writeback.

Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                | 62 +++++++++++++++++++++++++++-------------------
 include/linux/blk-cgroup.h | 52 +++-----------------------------------
 include/linux/cgroup.h     |  2 ++
 kernel/cgroup/cgroup.c     | 48 ++++++++++++++++++++++++++++-------
 4 files changed, 81 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 71cfe3720ea7..c39251e69447 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1978,18 +1978,30 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 	return 0;
 }
 
+/**
+ * __bio_associate_blkg_from_css - internal blkg association function
+ *
+ * This in the core association function that all association paths rely on.
+ * A blkg reference is taken which is released upon freeing of the bio.
+ */
 static int __bio_associate_blkg_from_css(struct bio *bio,
 					 struct cgroup_subsys_state *css)
 {
+	struct request_queue *q = bio->bi_disk->queue;
 	struct blkcg_gq *blkg;
+	int ret;
 
 	rcu_read_lock();
 
-	blkg = blkg_lookup_create(css_to_blkcg(css), bio->bi_disk->queue);
+	if (!css || !css->parent)
+		blkg = q->root_blkg;
+	else
+		blkg = blkg_lookup_create(css_to_blkcg(css), q);
 
-	rcu_read_unlock();
+	ret = bio_associate_blkg(bio, blkg);
 
-	return bio_associate_blkg(bio, blkg);
+	rcu_read_unlock();
+	return ret;
 }
 
 /**
@@ -1998,13 +2010,14 @@ static int __bio_associate_blkg_from_css(struct bio *bio,
  * @css: target css
  *
  * Associate @bio with the blkg found by combining the css's blkg and the
- * request_queue of the @bio.  This takes a reference on the css that will
- * be put upon freeing of @bio.
+ * request_queue of the @bio.  This falls back to the queue's root_blkg if
+ * the association fails with the css.
  */
 int bio_associate_blkg_from_css(struct bio *bio,
 				struct cgroup_subsys_state *css)
 {
-	css_get(css);
+	if (unlikely(bio->bi_blkg))
+		return -EBUSY;
 	return __bio_associate_blkg_from_css(bio, css);
 }
 EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
@@ -2016,22 +2029,29 @@ EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
  * @page: the page to lookup the blkcg from
  *
  * Associate @bio with the blkg from @page's owning memcg and the respective
- * request_queue.  This works like every other associate function wrt
- * references.
+ * request_queue.  If cgroup_e_css returns NULL, fall back to the queue's
+ * root_blkg.
  *
  * Note: this must be called after bio has an associated device.
  */
 int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
 {
 	struct cgroup_subsys_state *css;
+	int ret;
 
 	if (unlikely(bio->bi_blkg))
 		return -EBUSY;
 	if (!page->mem_cgroup)
 		return 0;
-	css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
 
-	return __bio_associate_blkg_from_css(bio, css);
+	rcu_read_lock();
+
+	css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
+
+	ret = __bio_associate_blkg_from_css(bio, css);
+
+	rcu_read_unlock();
+	return ret;
 }
 #endif /* CONFIG_MEMCG */
 
@@ -2041,12 +2061,12 @@ int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
  * @bio: target bio
  *
  * Associate @bio with the blkg found from the bio's css and the request_queue.
- * If one is not found, bio_lookup_blkg creates the blkg.
+ * If one is not found, bio_lookup_blkg creates the blkg.  This falls back to
+ * the queue's root_blkg if association fails.
  */
 int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
 {
-	struct blkcg *blkcg;
-	struct blkcg_gq *blkg;
+	struct cgroup_subsys_state *css;
 	int ret = 0;
 
 	/* someone has already associated this bio with a blkg */
@@ -2055,15 +2075,9 @@ int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
 
 	rcu_read_lock();
 
-	blkcg = css_to_blkcg(blkcg_get_css());
+	css = blkcg_css();
 
-	if (!blkcg->css.parent) {
-		ret = bio_associate_blkg(bio, q->root_blkg);
-	} else {
-		blkg = blkg_lookup_create(blkcg, q);
-
-		ret = bio_associate_blkg(bio, blkg);
-	}
+	ret = __bio_associate_blkg_from_css(bio, css);
 
 	rcu_read_unlock();
 	return ret;
@@ -2080,8 +2094,6 @@ void bio_disassociate_task(struct bio *bio)
 		bio->bi_ioc = NULL;
 	}
 	if (bio->bi_blkg) {
-		/* a ref is always taken on css */
-		css_put(&bio_blkcg(bio)->css);
 		blkg_put(bio->bi_blkg);
 		bio->bi_blkg = NULL;
 	}
@@ -2094,10 +2106,8 @@ void bio_disassociate_task(struct bio *bio)
  */
 void bio_clone_blkg_association(struct bio *dst, struct bio *src)
 {
-	if (src->bi_blkg) {
-		css_get(&bio_blkcg(src)->css);
+	if (src->bi_blkg)
 		bio_associate_blkg(dst, src->bi_blkg);
-	}
 }
 EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
 #endif /* CONFIG_BLK_CGROUP */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index c41cfcc2b4d8..2951ea3541b1 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -249,47 +249,6 @@ static inline struct cgroup_subsys_state *blkcg_css(void)
 	return task_css(current, io_cgrp_id);
 }
 
-/**
- * blkcg_get_css - find and get a reference to the css
- *
- * Find the css associated with either the kthread or the current task.
- * This takes a reference on the blkcg which will need to be managed by the
- * caller.
- */
-static inline struct cgroup_subsys_state *blkcg_get_css(void)
-{
-	struct cgroup_subsys_state *css;
-
-	rcu_read_lock();
-
-	css = kthread_blkcg();
-	if (css) {
-		css_get(css);
-	} else {
-		/*
-		 * This is a bit complicated.  It is possible task_css is seeing
-		 * an old css pointer here.  This is caused by the current
-		 * thread migrating away from this cgroup and this cgroup dying.
-		 * css_tryget() will fail when trying to take a ref on a cgroup
-		 * that's ref count has hit 0.
-		 *
-		 * Therefore, if it does fail, this means current must have
-		 * been swapped away already and this is waiting for it to
-		 * propagate on the polling cpu.  Hence the use of cpu_relax().
-		 */
-		while (true) {
-			css = task_css(current, io_cgrp_id);
-			if (likely(css_tryget(css)))
-				break;
-			cpu_relax();
-		}
-	}
-
-	rcu_read_unlock();
-
-	return css;
-}
-
 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 {
 	return css ? container_of(css, struct blkcg, css) : NULL;
@@ -628,10 +587,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 	rcu_read_lock();
 
 	blkcg = bio_blkcg(bio);
-	if (blkcg)
-		css_get(&blkcg->css);
-	else
-		blkcg = css_to_blkcg(blkcg_get_css());
+	if (!blkcg)
+		blkcg = css_to_blkcg(blkcg_css());
 
 	/* bypass blkg lookup and use @q->root_rl directly for root */
 	if (blkcg == &blkcg_root)
@@ -646,7 +603,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 	if (unlikely(!blkg))
 		goto root_rl;
 
-	blkg_get(blkg);
+	if (!blkg_try_get(blkg))
+		goto root_rl;
 	rcu_read_unlock();
 	return &blkg->rl;
 root_rl:
@@ -663,8 +621,6 @@ root_rl:
  */
 static inline void blk_put_rl(struct request_list *rl)
 {
-	/* an additional ref is always taken for rl */
-	css_put(&rl->blkg->blkcg->css);
 	if (rl->blkg->blkcg != &blkcg_root)
 		blkg_put(rl->blkg);
 }
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 32c553556bbd..b8bcbdeb2eac 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,6 +93,8 @@ extern struct css_set init_css_set;
 
 bool css_has_online_children(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
+struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
+					 struct cgroup_subsys *ss);
 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
 					     struct cgroup_subsys *ss);
 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index aae10baf1902..48fb22e49467 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -492,7 +492,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
 }
 
 /**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
+ * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
  * @cgrp: the cgroup of interest
  * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
@@ -501,8 +501,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
  * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
  * function is guaranteed to return non-NULL css.
  */
-static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
-						struct cgroup_subsys *ss)
+static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
+							struct cgroup_subsys *ss)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
@@ -522,6 +522,35 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 	return cgroup_css(cgrp, ss);
 }
 
+/**
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest
+ *
+ * Find and get the effective css of @cgrp for @ss.  The effective css is
+ * defined as the matching css of the nearest ancestor including self which
+ * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
+ * the root css is returned, so this function always returns a valid css.
+ *
+ * The returned css is not guaranteed to be online, and therefore it is the
+ * callers responsiblity to tryget a reference for it.
+ */
+struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+					 struct cgroup_subsys *ss)
+{
+	struct cgroup_subsys_state *css;
+
+	do {
+		css = cgroup_css(cgrp, ss);
+
+		if (css)
+			return css;
+		cgrp = cgroup_parent(cgrp);
+	} while (cgrp);
+
+	return init_css_set.subsys[ss->id];
+}
+
 /**
  * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
@@ -604,10 +633,11 @@ EXPORT_SYMBOL_GPL(of_css);
  *
  * Should be called under cgroup_[tree_]mutex.
  */
-#define for_each_e_css(css, ssid, cgrp)					\
-	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	\
-		if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
-			;						\
+#define for_each_e_css(css, ssid, cgrp)					    \
+	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	    \
+		if (!((css) = cgroup_e_css_by_mask(cgrp,		    \
+						   cgroup_subsys[(ssid)]))) \
+			;						    \
 		else
 
 /**
@@ -1006,7 +1036,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
 			 * @ss is in this hierarchy, so we want the
 			 * effective css from @cgrp.
 			 */
-			template[i] = cgroup_e_css(cgrp, ss);
+			template[i] = cgroup_e_css_by_mask(cgrp, ss);
 		} else {
 			/*
 			 * @ss is not in this hierarchy, so we don't want
@@ -3019,7 +3049,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
 		return ret;
 
 	/*
-	 * At this point, cgroup_e_css() results reflect the new csses
+	 * At this point, cgroup_e_css_by_mask() results reflect the new csses
 	 * making the following cgroup_update_dfl_csses() properly update
 	 * css associations of all tasks in the subtree.
 	 */
-- 
cgit v1.2.3


From e2b0989954ae7c80609f77e7ce203bea6d2c54e1 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:35 -0400
Subject: blkcg: cleanup and make blk_get_rl use blkg_lookup_create

blk_get_rl is responsible for identifying which request_list a request
should be allocated to. Try get logic was added earlier, but
semantically the logic was not changed.

This patch makes better use of the bio already having a reference to the
blkg in the hot path. The cold path uses a better fallback of
blkg_lookup_create rather than just blkg_lookup and then falling back to
the q->root_rl. If lookup_create fails with anything but -ENODEV, it
falls back to q->root_rl.

A clarifying comment is added to explain why q->root_rl is used rather
than the root blkg's rl.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 2951ea3541b1..d2f7f1b00fcf 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -586,28 +586,36 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 
 	rcu_read_lock();
 
-	blkcg = bio_blkcg(bio);
-	if (!blkcg)
-		blkcg = css_to_blkcg(blkcg_css());
+	if (bio && bio->bi_blkg) {
+		blkcg = bio->bi_blkg->blkcg;
+		if (blkcg == &blkcg_root)
+			goto rl_use_root;
+
+		blkg_get(bio->bi_blkg);
+		rcu_read_unlock();
+		return &bio->bi_blkg->rl;
+	}
 
-	/* bypass blkg lookup and use @q->root_rl directly for root */
+	blkcg = css_to_blkcg(blkcg_css());
 	if (blkcg == &blkcg_root)
-		goto root_rl;
+		goto rl_use_root;
 
-	/*
-	 * Try to use blkg->rl.  blkg lookup may fail under memory pressure
-	 * or if either the blkcg or queue is going away.  Fall back to
-	 * root_rl in such cases.
-	 */
 	blkg = blkg_lookup(blkcg, q);
 	if (unlikely(!blkg))
-		goto root_rl;
+		blkg = __blkg_lookup_create(blkcg, q);
 
 	if (!blkg_try_get(blkg))
-		goto root_rl;
+		goto rl_use_root;
+
 	rcu_read_unlock();
 	return &blkg->rl;
-root_rl:
+
+	/*
+	 * Each blkg has its own request_list, however, the root blkcg
+	 * uses the request_queue's root_rl.  This is to avoid most
+	 * overhead for the root blkcg.
+	 */
+rl_use_root:
 	rcu_read_unlock();
 	return &q->root_rl;
 }
-- 
cgit v1.2.3


From b3b9f24f5fcc099c41f7dc1d02350635830888e5 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:36 -0400
Subject: blkcg: change blkg reference counting to use percpu_ref

Now that every bio is associated with a blkg, this puts the use of
blkg_get, blkg_try_get, and blkg_put on the hot path. This switches over
the refcnt in blkg to use percpu_ref.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 64 ++++++++++++++++++++++++++++------------------
 include/linux/blk-cgroup.h | 15 ++++-------
 2 files changed, 44 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e9e3a955f61a..ab3676e1e15e 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -84,6 +84,37 @@ static void blkg_free(struct blkcg_gq *blkg)
 	kfree(blkg);
 }
 
+static void __blkg_release(struct rcu_head *rcu)
+{
+	struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
+
+	percpu_ref_exit(&blkg->refcnt);
+
+	/* release the blkcg and parent blkg refs this blkg has been holding */
+	css_put(&blkg->blkcg->css);
+	if (blkg->parent)
+		blkg_put(blkg->parent);
+
+	wb_congested_put(blkg->wb_congested);
+
+	blkg_free(blkg);
+}
+
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid.  For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+static void blkg_release(struct percpu_ref *ref)
+{
+	struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
+
+	call_rcu(&blkg->rcu_head, __blkg_release);
+}
+
 /**
  * blkg_alloc - allocate a blkg
  * @blkcg: block cgroup the new blkg is associated with
@@ -110,7 +141,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
 	blkg->blkcg = blkcg;
-	atomic_set(&blkg->refcnt, 1);
 
 	/* root blkg uses @q->root_rl, init rl only for !root blkgs */
 	if (blkcg != &blkcg_root) {
@@ -217,6 +247,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 		blkg_get(blkg->parent);
 	}
 
+	ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
+			      GFP_NOWAIT | __GFP_NOWARN);
+	if (ret)
+		goto err_cancel_ref;
+
 	/* invoke per-policy init */
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkcg_policy *pol = blkcg_policy[i];
@@ -249,6 +284,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 	blkg_put(blkg);
 	return ERR_PTR(ret);
 
+err_cancel_ref:
+	percpu_ref_exit(&blkg->refcnt);
 err_put_congested:
 	wb_congested_put(wb_congested);
 err_put_css:
@@ -387,7 +424,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
 	 */
-	blkg_put(blkg);
+	percpu_ref_kill(&blkg->refcnt);
 }
 
 /**
@@ -414,29 +451,6 @@ static void blkg_destroy_all(struct request_queue *q)
 	q->root_rl.blkg = NULL;
 }
 
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid.  For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-void __blkg_release_rcu(struct rcu_head *rcu_head)
-{
-	struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
-
-	/* release the blkcg and parent blkg refs this blkg has been holding */
-	css_put(&blkg->blkcg->css);
-	if (blkg->parent)
-		blkg_put(blkg->parent);
-
-	wb_congested_put(blkg->wb_congested);
-
-	blkg_free(blkg);
-}
-EXPORT_SYMBOL_GPL(__blkg_release_rcu);
-
 /*
  * The next function used by blk_queue_for_each_rl().  It's a bit tricky
  * because the root blkg uses @q->root_rl instead of its own rl.
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index d2f7f1b00fcf..7ff5d8ba8c7a 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -126,7 +126,7 @@ struct blkcg_gq {
 	struct request_list		rl;
 
 	/* reference count */
-	atomic_t			refcnt;
+	struct percpu_ref		refcnt;
 
 	/* is this blkg online? protected by both blkcg and q locks */
 	bool				online;
@@ -490,8 +490,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
  */
 static inline void blkg_get(struct blkcg_gq *blkg)
 {
-	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
-	atomic_inc(&blkg->refcnt);
+	percpu_ref_get(&blkg->refcnt);
 }
 
 /**
@@ -503,7 +502,7 @@ static inline void blkg_get(struct blkcg_gq *blkg)
  */
 static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
 {
-	if (atomic_inc_not_zero(&blkg->refcnt))
+	if (percpu_ref_tryget(&blkg->refcnt))
 		return blkg;
 	return NULL;
 }
@@ -517,23 +516,19 @@ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
  */
 static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg)
 {
-	while (!atomic_inc_not_zero(&blkg->refcnt))
+	while (!percpu_ref_tryget(&blkg->refcnt))
 		blkg = blkg->parent;
 
 	return blkg;
 }
 
-void __blkg_release_rcu(struct rcu_head *rcu);
-
 /**
  * blkg_put - put a blkg reference
  * @blkg: blkg to put
  */
 static inline void blkg_put(struct blkcg_gq *blkg)
 {
-	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
-	if (atomic_dec_and_test(&blkg->refcnt))
-		call_rcu(&blkg->rcu_head, __blkg_release_rcu);
+	percpu_ref_put(&blkg->refcnt);
 }
 
 /**
-- 
cgit v1.2.3


From 101246ec02b54adf6a77180a01ccbe310add2c32 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Tue, 11 Sep 2018 14:41:37 -0400
Subject: blkcg: rename blkg_try_get to blkg_tryget

blkg reference counting now uses percpu_ref rather than atomic_t. Let's
make this consistent with css_tryget. This renames blkg_try_get to
blkg_tryget and now returns a bool rather than the blkg or NULL.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c                |  2 +-
 block/blk-cgroup.c         |  3 +--
 block/blk-iolatency.c      |  2 +-
 include/linux/blk-cgroup.h | 14 ++++++--------
 4 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index c39251e69447..1cd47f218200 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1974,7 +1974,7 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 {
 	if (unlikely(bio->bi_blkg))
 		return -EBUSY;
-	bio->bi_blkg = blkg_try_get_closest(blkg);
+	bio->bi_blkg = blkg_tryget_closest(blkg);
 	return 0;
 }
 
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ab3676e1e15e..76136bea7a7f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1794,8 +1794,7 @@ void blkcg_maybe_throttle_current(void)
 	blkg = blkg_lookup(blkcg, q);
 	if (!blkg)
 		goto out;
-	blkg = blkg_try_get(blkg);
-	if (!blkg)
+	if (!blkg_tryget(blkg))
 		goto out;
 	rcu_read_unlock();
 
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 2d848b2f8b87..27c14f8d2576 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -625,7 +625,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
 		 * We could be exiting, don't access the pd unless we have a
 		 * ref on the blkg.
 		 */
-		if (!blkg_try_get(blkg))
+		if (!blkg_tryget(blkg))
 			continue;
 
 		iolat = blkg_to_lat(blkg);
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 7ff5d8ba8c7a..b7fd08013de2 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -494,27 +494,25 @@ static inline void blkg_get(struct blkcg_gq *blkg)
 }
 
 /**
- * blkg_try_get - try and get a blkg reference
+ * blkg_tryget - try and get a blkg reference
  * @blkg: blkg to get
  *
  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
  * of freeing this blkg, so we can only use it if the refcnt is not zero.
  */
-static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
+static inline bool blkg_tryget(struct blkcg_gq *blkg)
 {
-	if (percpu_ref_tryget(&blkg->refcnt))
-		return blkg;
-	return NULL;
+	return percpu_ref_tryget(&blkg->refcnt);
 }
 
 /**
- * blkg_try_get_closest - try and get a blkg ref on the closet blkg
+ * blkg_tryget_closest - try and get a blkg ref on the closet blkg
  * @blkg: blkg to get
  *
  * This walks up the blkg tree to find the closest non-dying blkg and returns
  * the blkg that it did association with as it may not be the passed in blkg.
  */
-static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg)
+static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
 {
 	while (!percpu_ref_tryget(&blkg->refcnt))
 		blkg = blkg->parent;
@@ -599,7 +597,7 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 	if (unlikely(!blkg))
 		blkg = __blkg_lookup_create(blkcg, q);
 
-	if (!blkg_try_get(blkg))
+	if (!blkg_tryget(blkg))
 		goto rl_use_root;
 
 	rcu_read_unlock();
-- 
cgit v1.2.3


From 9799ccb0e984a5c1311b22a212e7ff96e8b736de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 08:51:49 -0700
Subject: tcp: add tcp_wstamp_ns socket field

TCP will soon provide earliest departure time on TX skbs.
It needs to track this in a new variable.

tcp_mstamp_refresh() needs to update this variable, and
became too big to stay an inline.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  2 ++
 include/net/tcp.h     | 12 +-----------
 net/ipv4/tcp_output.c | 16 ++++++++++++++++
 3 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 263e37271afd..848f5b25e178 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -248,6 +248,8 @@ struct tcp_sock {
 		syn_smc:1;	/* SYN includes SMC */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
+	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
+
 /* RTT measurement */
 	u64	tcp_mstamp;	/* most recent packet received/sent */
 	u32	srtt_us;	/* smoothed round trip time << 3 in usecs */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0ca5ea10dc06..370198fdc65d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -752,17 +752,7 @@ static inline u32 tcp_time_stamp_raw(void)
 	return div_u64(tcp_clock_ns(), NSEC_PER_SEC / TCP_TS_HZ);
 }
 
-
-/* Refresh 1us clock of a TCP socket,
- * ensuring monotically increasing values.
- */
-static inline void tcp_mstamp_refresh(struct tcp_sock *tp)
-{
-	u64 val = tcp_clock_us();
-
-	if (val > tp->tcp_mstamp)
-		tp->tcp_mstamp = val;
-}
+void tcp_mstamp_refresh(struct tcp_sock *tp);
 
 static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
 {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b95aa72d8823..5a8105e84f7c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -45,6 +45,22 @@
 
 #include <trace/events/tcp.h>
 
+/* Refresh clocks of a TCP socket,
+ * ensuring monotically increasing values.
+ */
+void tcp_mstamp_refresh(struct tcp_sock *tp)
+{
+	u64 val = tcp_clock_ns();
+
+	/* departure time for next data packet */
+	if (val > tp->tcp_wstamp_ns)
+		tp->tcp_wstamp_ns = val;
+
+	val = div_u64(val, NSEC_PER_USEC);
+	if (val > tp->tcp_mstamp)
+		tp->tcp_mstamp = val;
+}
+
 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 			   int push_one, gfp_t gfp);
 
-- 
cgit v1.2.3


From d3edd06ea8ea9e03de6567fda80b8be57e21a537 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 08:51:50 -0700
Subject: tcp: provide earliest departure time in skb->tstamp

Switch internal TCP skb->skb_mstamp to skb->skb_mstamp_ns,
from usec units to nsec units.

Do not clear skb->tstamp before entering IP stacks in TX,
so that qdisc or devices can implement pacing based on the
earliest departure time instead of socket sk->sk_pacing_rate

Packets are fed with tcp_wstamp_ns, and following patch
will update tcp_wstamp_ns when both TCP and sch_fq switch to
the earliest departure time mechanism.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 +-
 include/net/tcp.h      |  6 +++---
 net/ipv4/syncookies.c  |  2 +-
 net/ipv4/tcp.c         |  2 +-
 net/ipv4/tcp_output.c  | 13 ++++++-------
 net/ipv4/tcp_timer.c   |  2 +-
 6 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e3a53ca4a9b5..86f337e9a81d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -689,7 +689,7 @@ struct sk_buff {
 
 	union {
 		ktime_t		tstamp;
-		u64		skb_mstamp;
+		u64		skb_mstamp_ns; /* earliest departure time */
 	};
 	/*
 	 * This is the control buffer. It is free to use for every
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 370198fdc65d..ff15d8e0d525 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -761,13 +761,13 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0)
 
 static inline u32 tcp_skb_timestamp(const struct sk_buff *skb)
 {
-	return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ);
+	return div_u64(skb->skb_mstamp_ns, NSEC_PER_SEC / TCP_TS_HZ);
 }
 
 /* provide the departure time in us unit */
 static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb)
 {
-	return skb->skb_mstamp;
+	return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC);
 }
 
 
@@ -813,7 +813,7 @@ struct tcp_skb_cb {
 #define TCPCB_SACKED_RETRANS	0x02	/* SKB retransmitted		*/
 #define TCPCB_LOST		0x04	/* SKB is lost			*/
 #define TCPCB_TAGBITS		0x07	/* All tag bits			*/
-#define TCPCB_REPAIRED		0x10	/* SKB repaired (no skb_mstamp)	*/
+#define TCPCB_REPAIRED		0x10	/* SKB repaired (no skb_mstamp_ns)	*/
 #define TCPCB_EVER_RETRANS	0x80	/* Ever retransmitted frame	*/
 #define TCPCB_RETRANS		(TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
 				TCPCB_REPAIRED)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index c3387dfd725b..606f868d9f3f 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -88,7 +88,7 @@ u64 cookie_init_timestamp(struct request_sock *req)
 		ts <<= TSBITS;
 		ts |= options;
 	}
-	return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ);
+	return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ);
 }
 
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 67670fac7c8d..69c236943f56 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1295,7 +1295,7 @@ new_segment:
 			copy = size_goal;
 
 			/* All packets are restored as if they have
-			 * already been sent. skb_mstamp isn't set to
+			 * already been sent. skb_mstamp_ns isn't set to
 			 * avoid wrong rtt estimation.
 			 */
 			if (tp->repair)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5a8105e84f7c..957f7a0e21c0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1014,7 +1014,7 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
 
 static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	skb->skb_mstamp = tp->tcp_mstamp;
+	skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
 	list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
 }
 
@@ -1061,7 +1061,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 		if (unlikely(!skb))
 			return -ENOBUFS;
 	}
-	skb->skb_mstamp = tp->tcp_mstamp;
+	skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
 
 	inet = inet_sk(sk);
 	tcb = TCP_SKB_CB(skb);
@@ -1165,8 +1165,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 	skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
 	skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
 
-	/* Our usage of tstamp should remain private */
-	skb->tstamp = 0;
+	/* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */
 
 	/* Cleanup our debris for IP stacks */
 	memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
@@ -3221,10 +3220,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	memset(&opts, 0, sizeof(opts));
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(req->cookie_ts))
-		skb->skb_mstamp = cookie_init_timestamp(req);
+		skb->skb_mstamp_ns = cookie_init_timestamp(req);
 	else
 #endif
-		skb->skb_mstamp = tcp_clock_us();
+		skb->skb_mstamp_ns = tcp_clock_ns();
 
 #ifdef CONFIG_TCP_MD5SIG
 	rcu_read_lock();
@@ -3440,7 +3439,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 
 	err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation);
 
-	syn->skb_mstamp = syn_data->skb_mstamp;
+	syn->skb_mstamp_ns = syn_data->skb_mstamp_ns;
 
 	/* Now full SYN+DATA was cloned and sent (or not),
 	 * remove the SYN from the original skb (syn_data)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7fdf222a0bdf..61023d50cd60 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk)
 	 */
 	start_ts = tcp_skb_timestamp(skb);
 	if (!start_ts)
-		skb->skb_mstamp = tp->tcp_mstamp;
+		skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
 	else if (icsk->icsk_user_timeout &&
 		 (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
 		goto abort;
-- 
cgit v1.2.3


From c798c88f3962ddff89c7aa818986caeecd46ab4c Mon Sep 17 00:00:00 2001
From: Fan Wu <wufan@codeaurora.org>
Date: Wed, 19 Sep 2018 01:59:00 +0000
Subject: EDAC, ghes: Use CPER module handles to locate DIMMs

Use SMBIOS module handle type 17, on platforms which provide valid
ones, to locate the corresponding DIMM and thus have per-DIMM error
counter updates.

Signed-off-by: Fan Wu <wufan@codeaurora.org>
[ Massage commit message. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tyler Baicar <baicar.tyler@gmail.com>
Reviewed-by: James Morse <james.morse@arm.com>
Tested-by: Toshi Kani <toshi.kani@hpe.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: baicar.tyler@gmail.com
Cc: john.garry@huawei.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: shiju.jose@huawei.com
Cc: tanxiaofei@huawei.com
Cc: wanghuiqiang@huawei.com
Link: http://lkml.kernel.org/r/1537322340-1860-1-git-send-email-wufan@codeaurora.org
---
 drivers/edac/ghes_edac.c | 23 +++++++++++++++++++++++
 include/linux/edac.h     |  2 ++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c
index 473aeec4b1da..49396bf6ad88 100644
--- a/drivers/edac/ghes_edac.c
+++ b/drivers/edac/ghes_edac.c
@@ -81,6 +81,18 @@ static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg)
 		(*num_dimm)++;
 }
 
+static int get_dimm_smbios_index(u16 handle)
+{
+	struct mem_ctl_info *mci = ghes_pvt->mci;
+	int i;
+
+	for (i = 0; i < mci->tot_dimms; i++) {
+		if (mci->dimms[i]->smbios_handle == handle)
+			return i;
+	}
+	return -1;
+}
+
 static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
 {
 	struct ghes_edac_dimm_fill *dimm_fill = arg;
@@ -177,6 +189,8 @@ static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg)
 				entry->total_width, entry->data_width);
 		}
 
+		dimm->smbios_handle = entry->handle;
+
 		dimm_fill->count++;
 	}
 }
@@ -327,12 +341,21 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 		p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos);
 	if (mem_err->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
 		const char *bank = NULL, *device = NULL;
+		int index = -1;
+
 		dmi_memdev_name(mem_err->mem_dev_handle, &bank, &device);
 		if (bank != NULL && device != NULL)
 			p += sprintf(p, "DIMM location:%s %s ", bank, device);
 		else
 			p += sprintf(p, "DIMM DMI handle: 0x%.4x ",
 				     mem_err->mem_dev_handle);
+
+		index = get_dimm_smbios_index(mem_err->mem_dev_handle);
+		if (index >= 0) {
+			e->top_layer = index;
+			e->enable_per_layer_report = true;
+		}
+
 	}
 	if (p > e->location)
 		*(p - 1) = '\0';
diff --git a/include/linux/edac.h b/include/linux/edac.h
index bffb97828ed6..a45ce1f84bfc 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -451,6 +451,8 @@ struct dimm_info {
 	u32 nr_pages;			/* number of pages on this dimm */
 
 	unsigned csrow, cschannel;	/* Points to the old API data */
+
+	u16 smbios_handle;              /* Handle for SMBIOS type 17 */
 };
 
 /**
-- 
cgit v1.2.3


From 65a272642ce13aab911f68279c8994cde3282b0a Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 18 May 2018 15:47:50 -0700
Subject: soc: qcom: geni: Make version macros simpler

This macro doesn't work, because it hides a local variable inside of the
macro to hold the version and that variable name is called 'ver' and
'version' sometimes.

Let's change this to be more explicit. Introduce three macros for the
major, minor, and step of the version, and require callers to pass the
version in to get the part of the version out. This way we don't hide
local variables inside macros and things are less evil overall.

Cc: Karthikeyan Ramasubramanian <kramasub@codeaurora.org>
Cc: Sagar Dharia <sdharia@codeaurora.org>
Cc: Girish Mahadevan <girishm@codeaurora.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Andy Gross <andy.gross@linaro.org>
---
 include/linux/qcom-geni-se.h | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/qcom-geni-se.h b/include/linux/qcom-geni-se.h
index 5d6144977828..3bcd67fd5548 100644
--- a/include/linux/qcom-geni-se.h
+++ b/include/linux/qcom-geni-se.h
@@ -225,19 +225,14 @@ struct geni_se {
 #define HW_VER_MINOR_SHFT		16
 #define HW_VER_STEP_MASK		GENMASK(15, 0)
 
+#define GENI_SE_VERSION_MAJOR(ver) ((ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT)
+#define GENI_SE_VERSION_MINOR(ver) ((ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT)
+#define GENI_SE_VERSION_STEP(ver) (ver & HW_VER_STEP_MASK)
+
 #if IS_ENABLED(CONFIG_QCOM_GENI_SE)
 
 u32 geni_se_get_qup_hw_version(struct geni_se *se);
 
-#define geni_se_get_wrapper_version(se, major, minor, step) do { \
-	u32 ver; \
-\
-	ver = geni_se_get_qup_hw_version(se); \
-	major = (ver & HW_VER_MAJOR_MASK) >> HW_VER_MAJOR_SHFT; \
-	minor = (ver & HW_VER_MINOR_MASK) >> HW_VER_MINOR_SHFT; \
-	step = version & HW_VER_STEP_MASK; \
-} while (0)
-
 /**
  * geni_se_read_proto() - Read the protocol configured for a serial engine
  * @se:		Pointer to the concerned serial engine.
-- 
cgit v1.2.3


From ac3d9dd034e565df2c034ab2ca71f0a9f69153c1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 21 Sep 2018 15:27:38 -0700
Subject: netpoll: make ndo_poll_controller() optional

As diagnosed by Song Liu, ndo_poll_controller() can
be very dangerous on loaded hosts, since the cpu
calling ndo_poll_controller() might steal all NAPI
contexts (for all RX/TX queues of the NIC). This capture
can last for unlimited amount of time, since one
cpu is generally not able to drain all the queues under load.

It seems that all networking drivers that do use NAPI
for their TX completions, should not provide a ndo_poll_controller().

NAPI drivers have netpoll support already handled
in core networking stack, since netpoll_poll_dev()
uses poll_napi(dev) to iterate through registered
NAPI contexts for a device.

This patch allows netpoll_poll_dev() to process NAPI
contexts even for drivers not providing ndo_poll_controller(),
allowing for following patches in NAPI drivers.

Also we export netpoll_poll_dev() so that it can be called
by bonding/team drivers in following patches.

Reported-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  5 +++--
 net/core/netpoll.c      | 19 +++++++------------
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 67662d01130a..3ef82d3a78db 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -49,8 +49,9 @@ struct netpoll_info {
 };
 
 #ifdef CONFIG_NETPOLL
-extern void netpoll_poll_disable(struct net_device *dev);
-extern void netpoll_poll_enable(struct net_device *dev);
+void netpoll_poll_dev(struct net_device *dev);
+void netpoll_poll_disable(struct net_device *dev);
+void netpoll_poll_enable(struct net_device *dev);
 #else
 static inline void netpoll_poll_disable(struct net_device *dev) { return; }
 static inline void netpoll_poll_enable(struct net_device *dev) { return; }
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 57557a6a950c..3219a2932463 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -187,16 +187,16 @@ static void poll_napi(struct net_device *dev)
 	}
 }
 
-static void netpoll_poll_dev(struct net_device *dev)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	const struct net_device_ops *ops;
 	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
+	const struct net_device_ops *ops;
 
 	/* Don't do any rx activity if the dev_lock mutex is held
 	 * the dev_open/close paths use this to block netpoll activity
 	 * while changing device state
 	 */
-	if (down_trylock(&ni->dev_lock))
+	if (!ni || down_trylock(&ni->dev_lock))
 		return;
 
 	if (!netif_running(dev)) {
@@ -205,13 +205,8 @@ static void netpoll_poll_dev(struct net_device *dev)
 	}
 
 	ops = dev->netdev_ops;
-	if (!ops->ndo_poll_controller) {
-		up(&ni->dev_lock);
-		return;
-	}
-
-	/* Process pending work on NIC */
-	ops->ndo_poll_controller(dev);
+	if (ops->ndo_poll_controller)
+		ops->ndo_poll_controller(dev);
 
 	poll_napi(dev);
 
@@ -219,6 +214,7 @@ static void netpoll_poll_dev(struct net_device *dev)
 
 	zap_completion_queue();
 }
+EXPORT_SYMBOL(netpoll_poll_dev);
 
 void netpoll_poll_disable(struct net_device *dev)
 {
@@ -613,8 +609,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
 	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
-	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
-	    !ndev->netdev_ops->ndo_poll_controller) {
+	if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
 		np_err(np, "%s doesn't support polling, aborting\n",
 		       np->dev_name);
 		err = -ENOTSUPP;
-- 
cgit v1.2.3


From 1cc33161a83d20b5462b1e93f95d3ce6388079ee Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Date: Mon, 20 Aug 2018 10:12:47 +0530
Subject: uprobes: Support SDT markers having reference count (semaphore)

Userspace Statically Defined Tracepoints[1] are dtrace style markers
inside userspace applications. Applications like PostgreSQL, MySQL,
Pthread, Perl, Python, Java, Ruby, Node.js, libvirt, QEMU, glib etc
have these markers embedded in them. These markers are added by developer
at important places in the code. Each marker source expands to a single
nop instruction in the compiled code but there may be additional
overhead for computing the marker arguments which expands to couple of
instructions. In case the overhead is more, execution of it can be
omitted by runtime if() condition when no one is tracing on the marker:

    if (reference_counter > 0) {
        Execute marker instructions;
    }

Default value of reference counter is 0. Tracer has to increment the
reference counter before tracing on a marker and decrement it when
done with the tracing.

Implement the reference counter logic in core uprobe. User will be
able to use it from trace_uprobe as well as from kernel module. New
trace_uprobe definition with reference counter will now be:

    <path>:<offset>[(ref_ctr_offset)]

where ref_ctr_offset is an optional field. For kernel module, new
variant of uprobe_register() has been introduced:

    uprobe_register_refctr(inode, offset, ref_ctr_offset, consumer)

No new variant for uprobe_unregister() because it's assumed to have
only one reference counter for one uprobe.

[1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation

Note: 'reference counter' is called as 'semaphore' in original Dtrace
(or Systemtap, bcc and even in ELF) documentation and code. But the
term 'semaphore' is misleading in this context. This is just a counter
used to hold number of tracers tracing on a marker. This is not really
used for any synchronization. So we are calling it a 'reference counter'
in kernel / perf code.

Link: http://lkml.kernel.org/r/20180820044250.11659-2-ravi.bangoria@linux.ibm.com

Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
[Only trace_uprobe.c]
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Song Liu <songliubraving@fb.com>
Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/uprobes.h     |   5 +
 kernel/events/uprobes.c     | 259 ++++++++++++++++++++++++++++++++++++++++++--
 kernel/trace/trace.c        |   2 +-
 kernel/trace/trace_uprobe.c |  38 ++++++-
 4 files changed, 293 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index bb9d2084af03..103a48a48872 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -123,6 +123,7 @@ extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc);
 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
 extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
 extern int uprobe_mmap(struct vm_area_struct *vma);
@@ -160,6 +161,10 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
 {
 	return -ENOSYS;
 }
+static inline int uprobe_register_refctr(struct inode *inode, loff_t offset, loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+{
+	return -ENOSYS;
+}
 static inline int
 uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
 {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 3207a4d26849..934feb39f6be 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -73,6 +73,7 @@ struct uprobe {
 	struct uprobe_consumer	*consumers;
 	struct inode		*inode;		/* Also hold a ref to inode */
 	loff_t			offset;
+	loff_t			ref_ctr_offset;
 	unsigned long		flags;
 
 	/*
@@ -88,6 +89,15 @@ struct uprobe {
 	struct arch_uprobe	arch;
 };
 
+struct delayed_uprobe {
+	struct list_head list;
+	struct uprobe *uprobe;
+	struct mm_struct *mm;
+};
+
+static DEFINE_MUTEX(delayed_uprobe_lock);
+static LIST_HEAD(delayed_uprobe_list);
+
 /*
  * Execute out of line area: anonymous executable mapping installed
  * by the probed task to execute the copy of the original instruction
@@ -282,6 +292,166 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
 	return 1;
 }
 
+static struct delayed_uprobe *
+delayed_uprobe_check(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct delayed_uprobe *du;
+
+	list_for_each_entry(du, &delayed_uprobe_list, list)
+		if (du->uprobe == uprobe && du->mm == mm)
+			return du;
+	return NULL;
+}
+
+static int delayed_uprobe_add(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct delayed_uprobe *du;
+
+	if (delayed_uprobe_check(uprobe, mm))
+		return 0;
+
+	du  = kzalloc(sizeof(*du), GFP_KERNEL);
+	if (!du)
+		return -ENOMEM;
+
+	du->uprobe = uprobe;
+	du->mm = mm;
+	list_add(&du->list, &delayed_uprobe_list);
+	return 0;
+}
+
+static void delayed_uprobe_delete(struct delayed_uprobe *du)
+{
+	if (WARN_ON(!du))
+		return;
+	list_del(&du->list);
+	kfree(du);
+}
+
+static void delayed_uprobe_remove(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct list_head *pos, *q;
+	struct delayed_uprobe *du;
+
+	if (!uprobe && !mm)
+		return;
+
+	list_for_each_safe(pos, q, &delayed_uprobe_list) {
+		du = list_entry(pos, struct delayed_uprobe, list);
+
+		if (uprobe && du->uprobe != uprobe)
+			continue;
+		if (mm && du->mm != mm)
+			continue;
+
+		delayed_uprobe_delete(du);
+	}
+}
+
+static bool valid_ref_ctr_vma(struct uprobe *uprobe,
+			      struct vm_area_struct *vma)
+{
+	unsigned long vaddr = offset_to_vaddr(vma, uprobe->ref_ctr_offset);
+
+	return uprobe->ref_ctr_offset &&
+		vma->vm_file &&
+		file_inode(vma->vm_file) == uprobe->inode &&
+		(vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+		vma->vm_start <= vaddr &&
+		vma->vm_end > vaddr;
+}
+
+static struct vm_area_struct *
+find_ref_ctr_vma(struct uprobe *uprobe, struct mm_struct *mm)
+{
+	struct vm_area_struct *tmp;
+
+	for (tmp = mm->mmap; tmp; tmp = tmp->vm_next)
+		if (valid_ref_ctr_vma(uprobe, tmp))
+			return tmp;
+
+	return NULL;
+}
+
+static int
+__update_ref_ctr(struct mm_struct *mm, unsigned long vaddr, short d)
+{
+	void *kaddr;
+	struct page *page;
+	struct vm_area_struct *vma;
+	int ret;
+	short *ptr;
+
+	if (!vaddr || !d)
+		return -EINVAL;
+
+	ret = get_user_pages_remote(NULL, mm, vaddr, 1,
+			FOLL_WRITE, &page, &vma, NULL);
+	if (unlikely(ret <= 0)) {
+		/*
+		 * We are asking for 1 page. If get_user_pages_remote() fails,
+		 * it may return 0, in that case we have to return error.
+		 */
+		return ret == 0 ? -EBUSY : ret;
+	}
+
+	kaddr = kmap_atomic(page);
+	ptr = kaddr + (vaddr & ~PAGE_MASK);
+
+	if (unlikely(*ptr + d < 0)) {
+		pr_warn("ref_ctr going negative. vaddr: 0x%lx, "
+			"curr val: %d, delta: %d\n", vaddr, *ptr, d);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	*ptr += d;
+	ret = 0;
+out:
+	kunmap_atomic(kaddr);
+	put_page(page);
+	return ret;
+}
+
+static void update_ref_ctr_warn(struct uprobe *uprobe,
+				struct mm_struct *mm, short d)
+{
+	pr_warn("ref_ctr %s failed for inode: 0x%lx offset: "
+		"0x%llx ref_ctr_offset: 0x%llx of mm: 0x%pK\n",
+		d > 0 ? "increment" : "decrement", uprobe->inode->i_ino,
+		(unsigned long long) uprobe->offset,
+		(unsigned long long) uprobe->ref_ctr_offset, mm);
+}
+
+static int update_ref_ctr(struct uprobe *uprobe, struct mm_struct *mm,
+			  short d)
+{
+	struct vm_area_struct *rc_vma;
+	unsigned long rc_vaddr;
+	int ret = 0;
+
+	rc_vma = find_ref_ctr_vma(uprobe, mm);
+
+	if (rc_vma) {
+		rc_vaddr = offset_to_vaddr(rc_vma, uprobe->ref_ctr_offset);
+		ret = __update_ref_ctr(mm, rc_vaddr, d);
+		if (ret)
+			update_ref_ctr_warn(uprobe, mm, d);
+
+		if (d > 0)
+			return ret;
+	}
+
+	mutex_lock(&delayed_uprobe_lock);
+	if (d > 0)
+		ret = delayed_uprobe_add(uprobe, mm);
+	else
+		delayed_uprobe_remove(uprobe, mm);
+	mutex_unlock(&delayed_uprobe_lock);
+
+	return ret;
+}
+
 /*
  * NOTE:
  * Expect the breakpoint instruction to be the smallest size instruction for
@@ -302,9 +472,13 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
 int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
 			unsigned long vaddr, uprobe_opcode_t opcode)
 {
+	struct uprobe *uprobe;
 	struct page *old_page, *new_page;
 	struct vm_area_struct *vma;
-	int ret;
+	int ret, is_register, ref_ctr_updated = 0;
+
+	is_register = is_swbp_insn(&opcode);
+	uprobe = container_of(auprobe, struct uprobe, arch);
 
 retry:
 	/* Read the page with vaddr into memory */
@@ -317,6 +491,15 @@ retry:
 	if (ret <= 0)
 		goto put_old;
 
+	/* We are going to replace instruction, update ref_ctr. */
+	if (!ref_ctr_updated && uprobe->ref_ctr_offset) {
+		ret = update_ref_ctr(uprobe, mm, is_register ? 1 : -1);
+		if (ret)
+			goto put_old;
+
+		ref_ctr_updated = 1;
+	}
+
 	ret = anon_vma_prepare(vma);
 	if (ret)
 		goto put_old;
@@ -337,6 +520,11 @@ put_old:
 
 	if (unlikely(ret == -EAGAIN))
 		goto retry;
+
+	/* Revert back reference counter if instruction update failed. */
+	if (ret && is_register && ref_ctr_updated)
+		update_ref_ctr(uprobe, mm, -1);
+
 	return ret;
 }
 
@@ -378,8 +566,15 @@ static struct uprobe *get_uprobe(struct uprobe *uprobe)
 
 static void put_uprobe(struct uprobe *uprobe)
 {
-	if (atomic_dec_and_test(&uprobe->ref))
+	if (atomic_dec_and_test(&uprobe->ref)) {
+		/*
+		 * If application munmap(exec_vma) before uprobe_unregister()
+		 * gets called, we don't get a chance to remove uprobe from
+		 * delayed_uprobe_list from remove_breakpoint(). Do it here.
+		 */
+		delayed_uprobe_remove(uprobe, NULL);
 		kfree(uprobe);
+	}
 }
 
 static int match_uprobe(struct uprobe *l, struct uprobe *r)
@@ -484,7 +679,8 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
 	return u;
 }
 
-static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
+static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset,
+				   loff_t ref_ctr_offset)
 {
 	struct uprobe *uprobe, *cur_uprobe;
 
@@ -494,6 +690,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
 
 	uprobe->inode = inode;
 	uprobe->offset = offset;
+	uprobe->ref_ctr_offset = ref_ctr_offset;
 	init_rwsem(&uprobe->register_rwsem);
 	init_rwsem(&uprobe->consumer_rwsem);
 
@@ -895,7 +1092,7 @@ EXPORT_SYMBOL_GPL(uprobe_unregister);
  * else return 0 (success)
  */
 static int __uprobe_register(struct inode *inode, loff_t offset,
-			     struct uprobe_consumer *uc)
+			     loff_t ref_ctr_offset, struct uprobe_consumer *uc)
 {
 	struct uprobe *uprobe;
 	int ret;
@@ -912,7 +1109,7 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
 		return -EINVAL;
 
  retry:
-	uprobe = alloc_uprobe(inode, offset);
+	uprobe = alloc_uprobe(inode, offset, ref_ctr_offset);
 	if (!uprobe)
 		return -ENOMEM;
 	/*
@@ -938,10 +1135,17 @@ static int __uprobe_register(struct inode *inode, loff_t offset,
 int uprobe_register(struct inode *inode, loff_t offset,
 		    struct uprobe_consumer *uc)
 {
-	return __uprobe_register(inode, offset, uc);
+	return __uprobe_register(inode, offset, 0, uc);
 }
 EXPORT_SYMBOL_GPL(uprobe_register);
 
+int uprobe_register_refctr(struct inode *inode, loff_t offset,
+			   loff_t ref_ctr_offset, struct uprobe_consumer *uc)
+{
+	return __uprobe_register(inode, offset, ref_ctr_offset, uc);
+}
+EXPORT_SYMBOL_GPL(uprobe_register_refctr);
+
 /*
  * uprobe_apply - unregister an already registered probe.
  * @inode: the file in which the probe has to be removed.
@@ -1060,6 +1264,35 @@ static void build_probe_list(struct inode *inode,
 	spin_unlock(&uprobes_treelock);
 }
 
+/* @vma contains reference counter, not the probed instruction. */
+static int delayed_ref_ctr_inc(struct vm_area_struct *vma)
+{
+	struct list_head *pos, *q;
+	struct delayed_uprobe *du;
+	unsigned long vaddr;
+	int ret = 0, err = 0;
+
+	mutex_lock(&delayed_uprobe_lock);
+	list_for_each_safe(pos, q, &delayed_uprobe_list) {
+		du = list_entry(pos, struct delayed_uprobe, list);
+
+		if (du->mm != vma->vm_mm ||
+		    !valid_ref_ctr_vma(du->uprobe, vma))
+			continue;
+
+		vaddr = offset_to_vaddr(vma, du->uprobe->ref_ctr_offset);
+		ret = __update_ref_ctr(vma->vm_mm, vaddr, 1);
+		if (ret) {
+			update_ref_ctr_warn(du->uprobe, vma->vm_mm, 1);
+			if (!err)
+				err = ret;
+		}
+		delayed_uprobe_delete(du);
+	}
+	mutex_unlock(&delayed_uprobe_lock);
+	return err;
+}
+
 /*
  * Called from mmap_region/vma_adjust with mm->mmap_sem acquired.
  *
@@ -1072,7 +1305,15 @@ int uprobe_mmap(struct vm_area_struct *vma)
 	struct uprobe *uprobe, *u;
 	struct inode *inode;
 
-	if (no_uprobe_events() || !valid_vma(vma, true))
+	if (no_uprobe_events())
+		return 0;
+
+	if (vma->vm_file &&
+	    (vma->vm_flags & (VM_WRITE|VM_SHARED)) == VM_WRITE &&
+	    test_bit(MMF_HAS_UPROBES, &vma->vm_mm->flags))
+		delayed_ref_ctr_inc(vma);
+
+	if (!valid_vma(vma, true))
 		return 0;
 
 	inode = file_inode(vma->vm_file);
@@ -1246,6 +1487,10 @@ void uprobe_clear_state(struct mm_struct *mm)
 {
 	struct xol_area *area = mm->uprobes_state.xol_area;
 
+	mutex_lock(&delayed_uprobe_lock);
+	delayed_uprobe_remove(NULL, mm);
+	mutex_unlock(&delayed_uprobe_lock);
+
 	if (!area)
 		return;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bf6f1d70484d..147be8523560 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4621,7 +4621,7 @@ static const char readme_msg[] =
   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
-	"\t    place: <path>:<offset>\n"
+  "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
 #endif
 	"\t     args: <name>=fetcharg[:type]\n"
 	"\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index e696667da29a..7b85172beab6 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -47,6 +47,7 @@ struct trace_uprobe {
 	struct inode			*inode;
 	char				*filename;
 	unsigned long			offset;
+	unsigned long			ref_ctr_offset;
 	unsigned long			nhit;
 	struct trace_probe		tp;
 };
@@ -352,10 +353,10 @@ end:
 static int create_trace_uprobe(int argc, char **argv)
 {
 	struct trace_uprobe *tu;
-	char *arg, *event, *group, *filename;
+	char *arg, *event, *group, *filename, *rctr, *rctr_end;
 	char buf[MAX_EVENT_NAME_LEN];
 	struct path path;
-	unsigned long offset;
+	unsigned long offset, ref_ctr_offset;
 	bool is_delete, is_return;
 	int i, ret;
 
@@ -364,6 +365,7 @@ static int create_trace_uprobe(int argc, char **argv)
 	is_return = false;
 	event = NULL;
 	group = NULL;
+	ref_ctr_offset = 0;
 
 	/* argc must be >= 1 */
 	if (argv[0][0] == '-')
@@ -438,6 +440,26 @@ static int create_trace_uprobe(int argc, char **argv)
 		goto fail_address_parse;
 	}
 
+	/* Parse reference counter offset if specified. */
+	rctr = strchr(arg, '(');
+	if (rctr) {
+		rctr_end = strchr(rctr, ')');
+		if (rctr > rctr_end || *(rctr_end + 1) != 0) {
+			ret = -EINVAL;
+			pr_info("Invalid reference counter offset.\n");
+			goto fail_address_parse;
+		}
+
+		*rctr++ = '\0';
+		*rctr_end = '\0';
+		ret = kstrtoul(rctr, 0, &ref_ctr_offset);
+		if (ret) {
+			pr_info("Invalid reference counter offset.\n");
+			goto fail_address_parse;
+		}
+	}
+
+	/* Parse uprobe offset. */
 	ret = kstrtoul(arg, 0, &offset);
 	if (ret)
 		goto fail_address_parse;
@@ -472,6 +494,7 @@ static int create_trace_uprobe(int argc, char **argv)
 		goto fail_address_parse;
 	}
 	tu->offset = offset;
+	tu->ref_ctr_offset = ref_ctr_offset;
 	tu->path = path;
 	tu->filename = kstrdup(filename, GFP_KERNEL);
 
@@ -590,6 +613,9 @@ static int probes_seq_show(struct seq_file *m, void *v)
 			trace_event_name(&tu->tp.call), tu->filename,
 			(int)(sizeof(void *) * 2), tu->offset);
 
+	if (tu->ref_ctr_offset)
+		seq_printf(m, "(0x%lx)", tu->ref_ctr_offset);
+
 	for (i = 0; i < tu->tp.nr_args; i++)
 		seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm);
 
@@ -905,7 +931,13 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
 
 	tu->consumer.filter = filter;
 	tu->inode = d_real_inode(tu->path.dentry);
-	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+	if (tu->ref_ctr_offset) {
+		ret = uprobe_register_refctr(tu->inode, tu->offset,
+				tu->ref_ctr_offset, &tu->consumer);
+	} else {
+		ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
+	}
+
 	if (ret)
 		goto err_buffer;
 
-- 
cgit v1.2.3


From a725356b6659469d182d662f22d770d83d3bc7b5 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Tue, 18 Sep 2018 16:34:34 +0300
Subject: vfs: swap names of {do,vfs}_clone_file_range()

Commit 031a072a0b8a ("vfs: call vfs_clone_file_range() under freeze
protection") created a wrapper do_clone_file_range() around
vfs_clone_file_range() moving the freeze protection to former, so
overlayfs could call the latter.

The more common vfs practice is to call do_xxx helpers from vfs_xxx
helpers, where freeze protecction is taken in the vfs_xxx helper, so
this anomality could be a source of confusion.

It seems that commit 8ede205541ff ("ovl: add reflink/copyfile/dedup
support") may have fallen a victim to this confusion -
ovl_clone_file_range() calls the vfs_clone_file_range() helper in the
hope of getting freeze protection on upper fs, but in fact results in
overlayfs allowing to bypass upper fs freeze protection.

Swap the names of the two helpers to conform to common vfs practice
and call the correct helpers from overlayfs and nfsd.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
---
 fs/ioctl.c             |  2 +-
 fs/nfsd/vfs.c          |  3 ++-
 fs/overlayfs/copy_up.c |  2 +-
 fs/overlayfs/file.c    |  2 +-
 fs/read_write.c        | 17 +++++++++++++++--
 include/linux/fs.h     | 17 +++--------------
 6 files changed, 23 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 3212c29235ce..2005529af560 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -230,7 +230,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 	ret = -EXDEV;
 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
 		goto fdput;
-	ret = do_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+	ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
 fdput:
 	fdput(src_file);
 	return ret;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 55a099e47ba2..b53e76391e52 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -541,7 +541,8 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
 		u64 dst_pos, u64 count)
 {
-	return nfserrno(do_clone_file_range(src, src_pos, dst, dst_pos, count));
+	return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
+					     count));
 }
 
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 296037afecdb..1cc797a08a5b 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -141,7 +141,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	}
 
 	/* Try to use clone_file_range to clone up within the same fs */
-	error = vfs_clone_file_range(old_file, 0, new_file, 0, len);
+	error = do_clone_file_range(old_file, 0, new_file, 0, len);
 	if (!error)
 		goto out;
 	/* Couldn't clone, so now we try to copy the data */
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 5d1b4b38f743..986313da0c88 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -461,7 +461,7 @@ static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 		break;
 
 	case OVL_CLONE:
-		ret = do_clone_file_range(real_in.file, pos_in,
+		ret = vfs_clone_file_range(real_in.file, pos_in,
 					   real_out.file, pos_out, len);
 		break;
 
diff --git a/fs/read_write.c b/fs/read_write.c
index 39b4a21dd933..8a2737f0d61d 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1818,8 +1818,8 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 }
 EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
 
-int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len)
+int do_clone_file_range(struct file *file_in, loff_t pos_in,
+			struct file *file_out, loff_t pos_out, u64 len)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -1866,6 +1866,19 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 
 	return ret;
 }
+EXPORT_SYMBOL(do_clone_file_range);
+
+int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+			 struct file *file_out, loff_t pos_out, u64 len)
+{
+	int ret;
+
+	file_start_write(file_out);
+	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+	file_end_write(file_out);
+
+	return ret;
+}
 EXPORT_SYMBOL(vfs_clone_file_range);
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6c0b4a1c22ff..897eae8faee1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1828,8 +1828,10 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 				      struct inode *inode_out, loff_t pos_out,
 				      u64 *len, bool is_dedupe);
+extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
+			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len);
+				struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
 					 loff_t len, bool *is_same);
@@ -2773,19 +2775,6 @@ static inline void file_end_write(struct file *file)
 	__sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
 }
 
-static inline int do_clone_file_range(struct file *file_in, loff_t pos_in,
-				      struct file *file_out, loff_t pos_out,
-				      u64 len)
-{
-	int ret;
-
-	file_start_write(file_out);
-	ret = vfs_clone_file_range(file_in, pos_in, file_out, pos_out, len);
-	file_end_write(file_out);
-
-	return ret;
-}
-
 /*
  * get_write_access() gets write permission for a file.
  * put_write_access() releases this write permission.
-- 
cgit v1.2.3


From ec0daae685b20f3bee196719f64d79d1cc40457e Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 20 Sep 2018 12:35:30 -0700
Subject: gpio: omap: Add level wakeup handling for omap4 based SoCs

I noticed that unlike omap2 and 3 based SoCs, omap4 based SoCs keep
the GPIO clocks enabled for GPIO level interrupts with wakeup enabled.
This blocks deeper idle states as the whole domain will stay busy.

The GPIO functional clock seems to stay enabled if the wakeup register
is enabled and a level interrupt is triggered. In that case the only
way to have the GPIO module idle is to reset it. It is possible this
has gone unnoticed with OSWR (Open SWitch Retention) and off mode
during idle resetting GPIO context most GPIO instances in the earlier
Android trees for example.

Looks like the way to deal with this is to have omap4 based SoCs
only set wake for the duration of idle for level interrupts, and clear
level registers for the idle. With level interrupts we can do this as
the level interrupt from device will be still there on resume.

I've taken the long path to fixing this to avoid yet more hard to
read code. I've set up a quirks flag, and a struct for function
pointers so we can use these to clean up other quirk handling easier
in the later patches. The current level quirk handling is moved to
the new functions.

Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Ladislav Michl <ladis@linux-mips.org>
Cc: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Grygorii Strashko <grygorii.strashko@ti.com>
Tested-by: Keerthy <j-keerthy@ti.com>

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-omap.c                | 151 +++++++++++++++++++++++++-------
 include/linux/platform_data/gpio-omap.h |   2 +
 2 files changed, 120 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index e81008678a38..d0051ef53cc4 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -31,6 +31,8 @@
 #define OFF_MODE	1
 #define OMAP4_GPIO_DEBOUNCINGTIME_MASK 0xFF
 
+#define OMAP_GPIO_QUIRK_DEFERRED_WKUP_EN	BIT(1)
+
 static LIST_HEAD(omap_gpio_list);
 
 struct gpio_regs {
@@ -48,6 +50,13 @@ struct gpio_regs {
 	u32 debounce_en;
 };
 
+struct gpio_bank;
+
+struct gpio_omap_funcs {
+	void (*idle_enable_level_quirk)(struct gpio_bank *bank);
+	void (*idle_disable_level_quirk)(struct gpio_bank *bank);
+};
+
 struct gpio_bank {
 	struct list_head node;
 	void __iomem *base;
@@ -55,6 +64,7 @@ struct gpio_bank {
 	u32 non_wakeup_gpios;
 	u32 enabled_non_wakeup_gpios;
 	struct gpio_regs context;
+	struct gpio_omap_funcs funcs;
 	u32 saved_datain;
 	u32 level_mask;
 	u32 toggle_mask;
@@ -75,6 +85,7 @@ struct gpio_bank {
 	int context_loss_count;
 	int power_mode;
 	bool workaround_enabled;
+	u32 quirks;
 
 	void (*set_dataout)(struct gpio_bank *bank, unsigned gpio, int enable);
 	void (*set_dataout_multiple)(struct gpio_bank *bank,
@@ -368,9 +379,18 @@ static inline void omap_set_gpio_trigger(struct gpio_bank *bank, int gpio,
 			readl_relaxed(bank->base + bank->regs->fallingdetect);
 
 	if (likely(!(bank->non_wakeup_gpios & gpio_bit))) {
-		omap_gpio_rmw(base, bank->regs->wkup_en, gpio_bit, trigger != 0);
-		bank->context.wake_en =
-			readl_relaxed(bank->base + bank->regs->wkup_en);
+		/* Defer wkup_en register update until we idle? */
+		if (bank->quirks & OMAP_GPIO_QUIRK_DEFERRED_WKUP_EN) {
+			if (trigger)
+				bank->context.wake_en |= gpio_bit;
+			else
+				bank->context.wake_en &= ~gpio_bit;
+		} else {
+			omap_gpio_rmw(base, bank->regs->wkup_en, gpio_bit,
+				      trigger != 0);
+			bank->context.wake_en =
+				readl_relaxed(bank->base + bank->regs->wkup_en);
+		}
 	}
 
 	/* This part needs to be executed always for OMAP{34xx, 44xx} */
@@ -899,6 +919,82 @@ static void omap_gpio_unmask_irq(struct irq_data *d)
 	raw_spin_unlock_irqrestore(&bank->lock, flags);
 }
 
+/*
+ * Only edges can generate a wakeup event to the PRCM.
+ *
+ * Therefore, ensure any wake-up capable GPIOs have
+ * edge-detection enabled before going idle to ensure a wakeup
+ * to the PRCM is generated on a GPIO transition. (c.f. 34xx
+ * NDA TRM 25.5.3.1)
+ *
+ * The normal values will be restored upon ->runtime_resume()
+ * by writing back the values saved in bank->context.
+ */
+static void __maybe_unused
+omap2_gpio_enable_level_quirk(struct gpio_bank *bank)
+{
+	u32 wake_low, wake_hi;
+
+	/* Enable additional edge detection for level gpios for idle */
+	wake_low = bank->context.leveldetect0 & bank->context.wake_en;
+	if (wake_low)
+		writel_relaxed(wake_low | bank->context.fallingdetect,
+			       bank->base + bank->regs->fallingdetect);
+
+	wake_hi = bank->context.leveldetect1 & bank->context.wake_en;
+	if (wake_hi)
+		writel_relaxed(wake_hi | bank->context.risingdetect,
+			       bank->base + bank->regs->risingdetect);
+}
+
+static void __maybe_unused
+omap2_gpio_disable_level_quirk(struct gpio_bank *bank)
+{
+	/* Disable edge detection for level gpios after idle */
+	writel_relaxed(bank->context.fallingdetect,
+		       bank->base + bank->regs->fallingdetect);
+	writel_relaxed(bank->context.risingdetect,
+		       bank->base + bank->regs->risingdetect);
+}
+
+/*
+ * On omap4 and later SoC variants a level interrupt with wkup_en
+ * enabled blocks the GPIO functional clock from idling until the GPIO
+ * instance has been reset. To avoid that, we must set wkup_en only for
+ * idle for level interrupts, and clear level registers for the duration
+ * of idle. The level interrupts will be still there on wakeup by their
+ * nature.
+ */
+static void __maybe_unused
+omap4_gpio_enable_level_quirk(struct gpio_bank *bank)
+{
+	/* Update wake register for idle, edge bits might be already set */
+	writel_relaxed(bank->context.wake_en,
+		       bank->base + bank->regs->wkup_en);
+
+	/* Clear level registers for idle */
+	writel_relaxed(0, bank->base + bank->regs->leveldetect0);
+	writel_relaxed(0, bank->base + bank->regs->leveldetect1);
+}
+
+static void __maybe_unused
+omap4_gpio_disable_level_quirk(struct gpio_bank *bank)
+{
+	/* Restore level registers after idle */
+	writel_relaxed(bank->context.leveldetect0,
+		       bank->base + bank->regs->leveldetect0);
+	writel_relaxed(bank->context.leveldetect1,
+		       bank->base + bank->regs->leveldetect1);
+
+	/* Clear saved wkup_en for level, it will be set for next idle again */
+	bank->context.wake_en &= ~(bank->context.leveldetect0 |
+				   bank->context.leveldetect1);
+
+	/* Update wake with only edge configuration */
+	writel_relaxed(bank->context.wake_en,
+		       bank->base + bank->regs->wkup_en);
+}
+
 /*---------------------------------------------------------------------*/
 
 static int omap_mpuio_suspend_noirq(struct device *dev)
@@ -1270,6 +1366,7 @@ static int omap_gpio_probe(struct platform_device *pdev)
 	bank->chip.parent = dev;
 	bank->chip.owner = THIS_MODULE;
 	bank->dbck_flag = pdata->dbck_flag;
+	bank->quirks = pdata->quirks;
 	bank->stride = pdata->bank_stride;
 	bank->width = pdata->bank_width;
 	bank->is_mpuio = pdata->is_mpuio;
@@ -1278,6 +1375,7 @@ static int omap_gpio_probe(struct platform_device *pdev)
 #ifdef CONFIG_OF_GPIO
 	bank->chip.of_node = of_node_get(node);
 #endif
+
 	if (node) {
 		if (!of_property_read_bool(node, "ti,gpio-always-on"))
 			bank->loses_context = true;
@@ -1298,6 +1396,18 @@ static int omap_gpio_probe(struct platform_device *pdev)
 				omap_set_gpio_dataout_mask_multiple;
 	}
 
+	if (bank->quirks & OMAP_GPIO_QUIRK_DEFERRED_WKUP_EN) {
+		bank->funcs.idle_enable_level_quirk =
+			omap4_gpio_enable_level_quirk;
+		bank->funcs.idle_disable_level_quirk =
+			omap4_gpio_disable_level_quirk;
+	} else {
+		bank->funcs.idle_enable_level_quirk =
+			omap2_gpio_enable_level_quirk;
+		bank->funcs.idle_disable_level_quirk =
+			omap2_gpio_disable_level_quirk;
+	}
+
 	raw_spin_lock_init(&bank->lock);
 	raw_spin_lock_init(&bank->wa_lock);
 
@@ -1372,29 +1482,11 @@ static int omap_gpio_runtime_suspend(struct device *dev)
 	struct gpio_bank *bank = platform_get_drvdata(pdev);
 	u32 l1 = 0, l2 = 0;
 	unsigned long flags;
-	u32 wake_low, wake_hi;
 
 	raw_spin_lock_irqsave(&bank->lock, flags);
 
-	/*
-	 * Only edges can generate a wakeup event to the PRCM.
-	 *
-	 * Therefore, ensure any wake-up capable GPIOs have
-	 * edge-detection enabled before going idle to ensure a wakeup
-	 * to the PRCM is generated on a GPIO transition. (c.f. 34xx
-	 * NDA TRM 25.5.3.1)
-	 *
-	 * The normal values will be restored upon ->runtime_resume()
-	 * by writing back the values saved in bank->context.
-	 */
-	wake_low = bank->context.leveldetect0 & bank->context.wake_en;
-	if (wake_low)
-		writel_relaxed(wake_low | bank->context.fallingdetect,
-			     bank->base + bank->regs->fallingdetect);
-	wake_hi = bank->context.leveldetect1 & bank->context.wake_en;
-	if (wake_hi)
-		writel_relaxed(wake_hi | bank->context.risingdetect,
-			     bank->base + bank->regs->risingdetect);
+	if (bank->funcs.idle_enable_level_quirk)
+		bank->funcs.idle_enable_level_quirk(bank);
 
 	if (!bank->enabled_non_wakeup_gpios)
 		goto update_gpio_context_count;
@@ -1459,16 +1551,8 @@ static int omap_gpio_runtime_resume(struct device *dev)
 
 	omap_gpio_dbck_enable(bank);
 
-	/*
-	 * In ->runtime_suspend(), level-triggered, wakeup-enabled
-	 * GPIOs were set to edge trigger also in order to be able to
-	 * generate a PRCM wakeup.  Here we restore the
-	 * pre-runtime_suspend() values for edge triggering.
-	 */
-	writel_relaxed(bank->context.fallingdetect,
-		     bank->base + bank->regs->fallingdetect);
-	writel_relaxed(bank->context.risingdetect,
-		     bank->base + bank->regs->risingdetect);
+	if (bank->funcs.idle_disable_level_quirk)
+		bank->funcs.idle_disable_level_quirk(bank);
 
 	if (bank->loses_context) {
 		if (!bank->get_context_loss_count) {
@@ -1706,6 +1790,7 @@ static const struct omap_gpio_platform_data omap4_pdata = {
 	.regs = &omap4_gpio_regs,
 	.bank_width = 32,
 	.dbck_flag = true,
+	.quirks = OMAP_GPIO_QUIRK_DEFERRED_WKUP_EN,
 };
 
 static const struct of_device_id omap_gpio_match[] = {
diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
index 8612855691b2..d952151b6e33 100644
--- a/include/linux/platform_data/gpio-omap.h
+++ b/include/linux/platform_data/gpio-omap.h
@@ -197,6 +197,8 @@ struct omap_gpio_platform_data {
 	bool is_mpuio;		/* whether the bank is of type MPUIO */
 	u32 non_wakeup_gpios;
 
+	u32 quirks;		/* Version specific quirks mask */
+
 	struct omap_gpio_reg_offs *regs;
 
 	/* Return context loss count due to PM states changing */
-- 
cgit v1.2.3


From b764a5863fd834836e65e258303e3a4a81c20f38 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 20 Sep 2018 12:35:31 -0700
Subject: gpio: omap: Remove custom PM calls and use cpu_pm instead

For a long time the gpio-omap custom PM calls have been annoying me so
let's replace them with cpu_pm instead. This will enable GPIO PM for
deeper idle states on omap4. And we can handle GPIO PM for omap2/3/4
in the same way.

Note that with this patch we are also slightly changing GPIO PM to be
less aggressive for omap3 and only will idle GPIO when PER context
may be lost.

For omap2, we don't need to save context and don't want to remove any
triggering so let's add a quirk flag for that.

Let's do this all in a single patch to avoid a situation where old
custom calls still are used with new code.

Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Keerthy <j-keerthy@ti.com>
Cc: Ladislav Michl <ladis@linux-mips.org>
Cc: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Acked-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 arch/arm/mach-omap2/pm24xx.c            |   7 +-
 arch/arm/mach-omap2/pm34xx.c            |  14 +--
 drivers/gpio/gpio-omap.c                | 178 +++++++++++++++++++-------------
 include/linux/platform_data/gpio-omap.h |  13 ---
 4 files changed, 116 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c
index 2a1a4180d5d0..1298b53ac263 100644
--- a/arch/arm/mach-omap2/pm24xx.c
+++ b/arch/arm/mach-omap2/pm24xx.c
@@ -18,6 +18,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/cpu_pm.h>
 #include <linux/suspend.h>
 #include <linux/sched.h>
 #include <linux/proc_fs.h>
@@ -29,8 +30,6 @@
 #include <linux/clk-provider.h>
 #include <linux/irq.h>
 #include <linux/time.h>
-#include <linux/gpio.h>
-#include <linux/platform_data/gpio-omap.h>
 
 #include <asm/fncpy.h>
 
@@ -87,7 +86,7 @@ static int omap2_enter_full_retention(void)
 	l = omap_ctrl_readl(OMAP2_CONTROL_DEVCONF0) | OMAP24XX_USBSTANDBYCTRL;
 	omap_ctrl_writel(l, OMAP2_CONTROL_DEVCONF0);
 
-	omap2_gpio_prepare_for_idle(0);
+	cpu_cluster_pm_enter();
 
 	/* One last check for pending IRQs to avoid extra latency due
 	 * to sleeping unnecessarily. */
@@ -100,7 +99,7 @@ static int omap2_enter_full_retention(void)
 			   OMAP_SDRC_REGADDR(SDRC_POWER));
 
 no_sleep:
-	omap2_gpio_resume_after_idle();
+	cpu_cluster_pm_exit();
 
 	clk_enable(osc_ck);
 
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 36c55547137c..1a90050361f1 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -18,19 +18,18 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/cpu_pm.h>
 #include <linux/pm.h>
 #include <linux/suspend.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
 #include <linux/list.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/omap-dma.h>
 #include <linux/omap-gpmc.h>
-#include <linux/platform_data/gpio-omap.h>
 
 #include <trace/events/power.h>
 
@@ -197,7 +196,6 @@ void omap_sram_idle(void)
 	int mpu_next_state = PWRDM_POWER_ON;
 	int per_next_state = PWRDM_POWER_ON;
 	int core_next_state = PWRDM_POWER_ON;
-	int per_going_off;
 	u32 sdrc_pwr = 0;
 
 	mpu_next_state = pwrdm_read_next_pwrst(mpu_pwrdm);
@@ -227,10 +225,8 @@ void omap_sram_idle(void)
 	pwrdm_pre_transition(NULL);
 
 	/* PER */
-	if (per_next_state < PWRDM_POWER_ON) {
-		per_going_off = (per_next_state == PWRDM_POWER_OFF) ? 1 : 0;
-		omap2_gpio_prepare_for_idle(per_going_off);
-	}
+	if (per_next_state == PWRDM_POWER_OFF)
+		cpu_cluster_pm_enter();
 
 	/* CORE */
 	if (core_next_state < PWRDM_POWER_ON) {
@@ -295,8 +291,8 @@ void omap_sram_idle(void)
 	pwrdm_post_transition(NULL);
 
 	/* PER */
-	if (per_next_state < PWRDM_POWER_ON)
-		omap2_gpio_resume_after_idle();
+	if (per_next_state == PWRDM_POWER_OFF)
+		cpu_cluster_pm_exit();
 }
 
 static void omap3_pm_idle(void)
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index d0051ef53cc4..3d021f648c5d 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -19,6 +19,7 @@
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/cpu_pm.h>
 #include <linux/device.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm.h>
@@ -28,9 +29,9 @@
 #include <linux/bitops.h>
 #include <linux/platform_data/gpio-omap.h>
 
-#define OFF_MODE	1
 #define OMAP4_GPIO_DEBOUNCINGTIME_MASK 0xFF
 
+#define OMAP_GPIO_QUIRK_IDLE_REMOVE_TRIGGER	BIT(2)
 #define OMAP_GPIO_QUIRK_DEFERRED_WKUP_EN	BIT(1)
 
 static LIST_HEAD(omap_gpio_list);
@@ -72,6 +73,8 @@ struct gpio_bank {
 	raw_spinlock_t wa_lock;
 	struct gpio_chip chip;
 	struct clk *dbck;
+	struct notifier_block nb;
+	unsigned int is_suspended:1;
 	u32 mod_usage;
 	u32 irq_usage;
 	u32 dbck_enable_mask;
@@ -83,7 +86,6 @@ struct gpio_bank {
 	int stride;
 	u32 width;
 	int context_loss_count;
-	int power_mode;
 	bool workaround_enabled;
 	u32 quirks;
 
@@ -1314,6 +1316,38 @@ static int omap_gpio_chip_init(struct gpio_bank *bank, struct irq_chip *irqc)
 	return ret;
 }
 
+static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context);
+static void omap_gpio_unidle(struct gpio_bank *bank);
+
+static int gpio_omap_cpu_notifier(struct notifier_block *nb,
+				  unsigned long cmd, void *v)
+{
+	struct gpio_bank *bank;
+	struct device *dev;
+	unsigned long flags;
+
+	bank = container_of(nb, struct gpio_bank, nb);
+	dev = bank->chip.parent;
+
+	raw_spin_lock_irqsave(&bank->lock, flags);
+	switch (cmd) {
+	case CPU_CLUSTER_PM_ENTER:
+		if (bank->is_suspended)
+			break;
+		omap_gpio_idle(bank, true);
+		break;
+	case CPU_CLUSTER_PM_ENTER_FAILED:
+	case CPU_CLUSTER_PM_EXIT:
+		if (bank->is_suspended)
+			break;
+		omap_gpio_unidle(bank);
+		break;
+	}
+	raw_spin_unlock_irqrestore(&bank->lock, flags);
+
+	return NOTIFY_OK;
+}
+
 static const struct of_device_id omap_gpio_match[];
 
 static int omap_gpio_probe(struct platform_device *pdev)
@@ -1401,7 +1435,7 @@ static int omap_gpio_probe(struct platform_device *pdev)
 			omap4_gpio_enable_level_quirk;
 		bank->funcs.idle_disable_level_quirk =
 			omap4_gpio_disable_level_quirk;
-	} else {
+	} else if (bank->quirks & OMAP_GPIO_QUIRK_IDLE_REMOVE_TRIGGER) {
 		bank->funcs.idle_enable_level_quirk =
 			omap2_gpio_enable_level_quirk;
 		bank->funcs.idle_disable_level_quirk =
@@ -1451,6 +1485,12 @@ static int omap_gpio_probe(struct platform_device *pdev)
 
 	omap_gpio_show_rev(bank);
 
+	if (bank->funcs.idle_enable_level_quirk &&
+	    bank->funcs.idle_disable_level_quirk) {
+		bank->nb.notifier_call = gpio_omap_cpu_notifier;
+		cpu_pm_register_notifier(&bank->nb);
+	}
+
 	pm_runtime_put(dev);
 
 	list_add_tail(&bank->node, &omap_gpio_list);
@@ -1462,6 +1502,8 @@ static int omap_gpio_remove(struct platform_device *pdev)
 {
 	struct gpio_bank *bank = platform_get_drvdata(pdev);
 
+	if (bank->nb.notifier_call)
+		cpu_pm_unregister_notifier(&bank->nb);
 	list_del(&bank->node);
 	gpiochip_remove(&bank->chip);
 	pm_runtime_disable(&pdev->dev);
@@ -1471,19 +1513,12 @@ static int omap_gpio_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_ARCH_OMAP2PLUS
-
-#if defined(CONFIG_PM)
 static void omap_gpio_restore_context(struct gpio_bank *bank);
 
-static int omap_gpio_runtime_suspend(struct device *dev)
+static void omap_gpio_idle(struct gpio_bank *bank, bool may_lose_context)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct gpio_bank *bank = platform_get_drvdata(pdev);
+	struct device *dev = bank->chip.parent;
 	u32 l1 = 0, l2 = 0;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&bank->lock, flags);
 
 	if (bank->funcs.idle_enable_level_quirk)
 		bank->funcs.idle_enable_level_quirk(bank);
@@ -1491,10 +1526,9 @@ static int omap_gpio_runtime_suspend(struct device *dev)
 	if (!bank->enabled_non_wakeup_gpios)
 		goto update_gpio_context_count;
 
-	if (bank->power_mode != OFF_MODE) {
-		bank->power_mode = 0;
+	if (!may_lose_context)
 		goto update_gpio_context_count;
-	}
+
 	/*
 	 * If going to OFF, remove triggering for all
 	 * non-wakeup GPIOs.  Otherwise spurious IRQs will be
@@ -1519,23 +1553,16 @@ update_gpio_context_count:
 				bank->get_context_loss_count(dev);
 
 	omap_gpio_dbck_disable(bank);
-	raw_spin_unlock_irqrestore(&bank->lock, flags);
-
-	return 0;
 }
 
 static void omap_gpio_init_context(struct gpio_bank *p);
 
-static int omap_gpio_runtime_resume(struct device *dev)
+static void omap_gpio_unidle(struct gpio_bank *bank)
 {
-	struct platform_device *pdev = to_platform_device(dev);
-	struct gpio_bank *bank = platform_get_drvdata(pdev);
+	struct device *dev = bank->chip.parent;
 	u32 l = 0, gen, gen0, gen1;
-	unsigned long flags;
 	int c;
 
-	raw_spin_lock_irqsave(&bank->lock, flags);
-
 	/*
 	 * On the first resume during the probe, the context has not
 	 * been initialised and so initialise it now. Also initialise
@@ -1562,16 +1589,13 @@ static int omap_gpio_runtime_resume(struct device *dev)
 			if (c != bank->context_loss_count) {
 				omap_gpio_restore_context(bank);
 			} else {
-				raw_spin_unlock_irqrestore(&bank->lock, flags);
-				return 0;
+				return;
 			}
 		}
 	}
 
-	if (!bank->workaround_enabled) {
-		raw_spin_unlock_irqrestore(&bank->lock, flags);
-		return 0;
-	}
+	if (!bank->workaround_enabled)
+		return;
 
 	l = readl_relaxed(bank->base + bank->regs->datain);
 
@@ -1624,41 +1648,8 @@ static int omap_gpio_runtime_resume(struct device *dev)
 	}
 
 	bank->workaround_enabled = false;
-	raw_spin_unlock_irqrestore(&bank->lock, flags);
-
-	return 0;
 }
-#endif /* CONFIG_PM */
-
-#if IS_BUILTIN(CONFIG_GPIO_OMAP)
-void omap2_gpio_prepare_for_idle(int pwr_mode)
-{
-	struct gpio_bank *bank;
 
-	list_for_each_entry(bank, &omap_gpio_list, node) {
-		if (!BANK_USED(bank) || !bank->loses_context)
-			continue;
-
-		bank->power_mode = pwr_mode;
-
-		pm_runtime_put_sync_suspend(bank->chip.parent);
-	}
-}
-
-void omap2_gpio_resume_after_idle(void)
-{
-	struct gpio_bank *bank;
-
-	list_for_each_entry(bank, &omap_gpio_list, node) {
-		if (!BANK_USED(bank) || !bank->loses_context)
-			continue;
-
-		pm_runtime_get_sync(bank->chip.parent);
-	}
-}
-#endif
-
-#if defined(CONFIG_PM)
 static void omap_gpio_init_context(struct gpio_bank *p)
 {
 	struct omap_gpio_reg_offs *regs = p->regs;
@@ -1715,17 +1706,57 @@ static void omap_gpio_restore_context(struct gpio_bank *bank)
 	writel_relaxed(bank->context.irqenable2,
 				bank->base + bank->regs->irqenable2);
 }
-#endif /* CONFIG_PM */
-#else
-#define omap_gpio_runtime_suspend NULL
-#define omap_gpio_runtime_resume NULL
-static inline void omap_gpio_init_context(struct gpio_bank *p) {}
-#endif
 
+static int __maybe_unused omap_gpio_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct gpio_bank *bank = platform_get_drvdata(pdev);
+	unsigned long flags;
+	int error = 0;
+
+	raw_spin_lock_irqsave(&bank->lock, flags);
+	/* Must be idled only by CPU_CLUSTER_PM_ENTER? */
+	if (bank->irq_usage) {
+		error = -EBUSY;
+		goto unlock;
+	}
+	omap_gpio_idle(bank, true);
+	bank->is_suspended = true;
+unlock:
+	raw_spin_unlock_irqrestore(&bank->lock, flags);
+
+	return error;
+}
+
+static int __maybe_unused omap_gpio_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct gpio_bank *bank = platform_get_drvdata(pdev);
+	unsigned long flags;
+	int error = 0;
+
+	raw_spin_lock_irqsave(&bank->lock, flags);
+	/* Must be unidled only by CPU_CLUSTER_PM_ENTER? */
+	if (bank->irq_usage) {
+		error = -EBUSY;
+		goto unlock;
+	}
+	omap_gpio_unidle(bank);
+	bank->is_suspended = false;
+unlock:
+	raw_spin_unlock_irqrestore(&bank->lock, flags);
+
+	return error;
+}
+
+#ifdef CONFIG_ARCH_OMAP2PLUS
 static const struct dev_pm_ops gpio_pm_ops = {
 	SET_RUNTIME_PM_OPS(omap_gpio_runtime_suspend, omap_gpio_runtime_resume,
 									NULL)
 };
+#else
+static const struct dev_pm_ops gpio_pm_ops;
+#endif	/* CONFIG_ARCH_OMAP2PLUS */
 
 #if defined(CONFIG_OF)
 static struct omap_gpio_reg_offs omap2_gpio_regs = {
@@ -1774,6 +1805,11 @@ static struct omap_gpio_reg_offs omap4_gpio_regs = {
 	.fallingdetect =	OMAP4_GPIO_FALLINGDETECT,
 };
 
+/*
+ * Note that omap2 does not currently support idle modes with context loss so
+ * no need to add OMAP_GPIO_QUIRK_IDLE_REMOVE_TRIGGER quirk flag to save
+ * and restore context.
+ */
 static const struct omap_gpio_platform_data omap2_pdata = {
 	.regs = &omap2_gpio_regs,
 	.bank_width = 32,
@@ -1784,13 +1820,15 @@ static const struct omap_gpio_platform_data omap3_pdata = {
 	.regs = &omap2_gpio_regs,
 	.bank_width = 32,
 	.dbck_flag = true,
+	.quirks = OMAP_GPIO_QUIRK_IDLE_REMOVE_TRIGGER,
 };
 
 static const struct omap_gpio_platform_data omap4_pdata = {
 	.regs = &omap4_gpio_regs,
 	.bank_width = 32,
 	.dbck_flag = true,
-	.quirks = OMAP_GPIO_QUIRK_DEFERRED_WKUP_EN,
+	.quirks = OMAP_GPIO_QUIRK_IDLE_REMOVE_TRIGGER |
+		  OMAP_GPIO_QUIRK_DEFERRED_WKUP_EN,
 };
 
 static const struct of_device_id omap_gpio_match[] = {
diff --git a/include/linux/platform_data/gpio-omap.h b/include/linux/platform_data/gpio-omap.h
index d952151b6e33..8485c6a9a383 100644
--- a/include/linux/platform_data/gpio-omap.h
+++ b/include/linux/platform_data/gpio-omap.h
@@ -205,17 +205,4 @@ struct omap_gpio_platform_data {
 	int (*get_context_loss_count)(struct device *dev);
 };
 
-#if IS_BUILTIN(CONFIG_GPIO_OMAP)
-extern void omap2_gpio_prepare_for_idle(int off_mode);
-extern void omap2_gpio_resume_after_idle(void);
-#else
-static inline void omap2_gpio_prepare_for_idle(int off_mode)
-{
-}
-
-static inline void omap2_gpio_resume_after_idle(void)
-{
-}
-#endif
-
 #endif
-- 
cgit v1.2.3


From 463659a08d7999d5461fa45b35b17686189a70ca Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hansverk@cisco.com>
Date: Thu, 13 Sep 2018 07:47:29 -0400
Subject: media: hdmi.h: rename ADOBE_RGB to OPRGB and ADOBE_YCC to OPYCC

These names have been renamed in the CTA-861 standard due to trademark
issues. Replace them here as well so they are in sync with the standard.

Signed-off-by: Hans Verkuil <hansverk@cisco.com>
Cc: stable@vger.kernel.org
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
---
 drivers/media/i2c/adv7511.c               | 4 ++--
 drivers/media/v4l2-core/v4l2-dv-timings.c | 4 ++--
 drivers/video/hdmi.c                      | 8 ++++----
 include/linux/hdmi.h                      | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/i2c/adv7511.c b/drivers/media/i2c/adv7511.c
index a1f73d998495..f3899cc84e27 100644
--- a/drivers/media/i2c/adv7511.c
+++ b/drivers/media/i2c/adv7511.c
@@ -1357,8 +1357,8 @@ static int adv7511_set_fmt(struct v4l2_subdev *sd,
 	switch (format->format.colorspace) {
 	case V4L2_COLORSPACE_OPRGB:
 		c = HDMI_COLORIMETRY_EXTENDED;
-		ec = y ? HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601 :
-			 HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB;
+		ec = y ? HDMI_EXTENDED_COLORIMETRY_OPYCC_601 :
+			 HDMI_EXTENDED_COLORIMETRY_OPRGB;
 		break;
 	case V4L2_COLORSPACE_SMPTE170M:
 		c = y ? HDMI_COLORIMETRY_ITU_601 : HDMI_COLORIMETRY_NONE;
diff --git a/drivers/media/v4l2-core/v4l2-dv-timings.c b/drivers/media/v4l2-core/v4l2-dv-timings.c
index 19aabd1fcd2b..4f23e939ead0 100644
--- a/drivers/media/v4l2-core/v4l2-dv-timings.c
+++ b/drivers/media/v4l2-core/v4l2-dv-timings.c
@@ -877,7 +877,7 @@ v4l2_hdmi_rx_colorimetry(const struct hdmi_avi_infoframe *avi,
 		switch (avi->colorimetry) {
 		case HDMI_COLORIMETRY_EXTENDED:
 			switch (avi->extended_colorimetry) {
-			case HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB:
+			case HDMI_EXTENDED_COLORIMETRY_OPRGB:
 				c.colorspace = V4L2_COLORSPACE_OPRGB;
 				c.xfer_func = V4L2_XFER_FUNC_OPRGB;
 				break;
@@ -948,7 +948,7 @@ v4l2_hdmi_rx_colorimetry(const struct hdmi_avi_infoframe *avi,
 				c.ycbcr_enc = V4L2_YCBCR_ENC_601;
 				c.xfer_func = V4L2_XFER_FUNC_SRGB;
 				break;
-			case HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601:
+			case HDMI_EXTENDED_COLORIMETRY_OPYCC_601:
 				c.colorspace = V4L2_COLORSPACE_OPRGB;
 				c.ycbcr_enc = V4L2_YCBCR_ENC_601;
 				c.xfer_func = V4L2_XFER_FUNC_OPRGB;
diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
index 38716eb50408..8a3e8f61b991 100644
--- a/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@ -592,10 +592,10 @@ hdmi_extended_colorimetry_get_name(enum hdmi_extended_colorimetry ext_col)
 		return "xvYCC 709";
 	case HDMI_EXTENDED_COLORIMETRY_S_YCC_601:
 		return "sYCC 601";
-	case HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601:
-		return "Adobe YCC 601";
-	case HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB:
-		return "Adobe RGB";
+	case HDMI_EXTENDED_COLORIMETRY_OPYCC_601:
+		return "opYCC 601";
+	case HDMI_EXTENDED_COLORIMETRY_OPRGB:
+		return "opRGB";
 	case HDMI_EXTENDED_COLORIMETRY_BT2020_CONST_LUM:
 		return "BT.2020 Constant Luminance";
 	case HDMI_EXTENDED_COLORIMETRY_BT2020:
diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index d271ff23984f..4f3febc0f971 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -101,8 +101,8 @@ enum hdmi_extended_colorimetry {
 	HDMI_EXTENDED_COLORIMETRY_XV_YCC_601,
 	HDMI_EXTENDED_COLORIMETRY_XV_YCC_709,
 	HDMI_EXTENDED_COLORIMETRY_S_YCC_601,
-	HDMI_EXTENDED_COLORIMETRY_ADOBE_YCC_601,
-	HDMI_EXTENDED_COLORIMETRY_ADOBE_RGB,
+	HDMI_EXTENDED_COLORIMETRY_OPYCC_601,
+	HDMI_EXTENDED_COLORIMETRY_OPRGB,
 
 	/* The following EC values are only defined in CEA-861-F. */
 	HDMI_EXTENDED_COLORIMETRY_BT2020_CONST_LUM,
-- 
cgit v1.2.3


From 808c43b7c7f70360ed7b9e43e2cf980f388e71fa Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Fri, 21 Sep 2018 17:03:27 -0700
Subject: cgroup: Simplify cgroup_ancestor

Simplify cgroup_ancestor function. This is follow-up for
commit 7723628101aa ("bpf: Introduce bpf_skb_ancestor_cgroup_id helper")

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 32c553556bbd..e03a92430383 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -567,20 +567,11 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
 static inline struct cgroup *cgroup_ancestor(struct cgroup *cgrp,
 					     int ancestor_level)
 {
-	struct cgroup *ptr;
-
 	if (cgrp->level < ancestor_level)
 		return NULL;
-
-	for (ptr = cgrp;
-	     ptr && ptr->level > ancestor_level;
-	     ptr = cgroup_parent(ptr))
-		;
-
-	if (ptr && ptr->level == ancestor_level)
-		return ptr;
-
-	return NULL;
+	while (cgrp && cgrp->level > ancestor_level)
+		cgrp = cgroup_parent(cgrp);
+	return cgrp;
 }
 
 /**
-- 
cgit v1.2.3


From 43b729bfe9cf30ad11499a66e3b7bd300c716d44 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 09:43:47 +0200
Subject: block: move integrity_req_gap_{back,front}_merge to blk.h

No need to expose these to drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h            | 35 +++++++++++++++++++++++++++++++++--
 include/linux/blkdev.h | 31 -------------------------------
 2 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 9db4e389582c..441c2de1d4b9 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -158,7 +158,38 @@ static inline bool bio_integrity_endio(struct bio *bio)
 		return __bio_integrity_endio(bio);
 	return true;
 }
-#else
+
+static inline bool integrity_req_gap_back_merge(struct request *req,
+		struct bio *next)
+{
+	struct bio_integrity_payload *bip = bio_integrity(req->bio);
+	struct bio_integrity_payload *bip_next = bio_integrity(next);
+
+	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
+				bip_next->bip_vec[0].bv_offset);
+}
+
+static inline bool integrity_req_gap_front_merge(struct request *req,
+		struct bio *bio)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+	struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
+
+	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
+				bip_next->bip_vec[0].bv_offset);
+}
+#else /* CONFIG_BLK_DEV_INTEGRITY */
+static inline bool integrity_req_gap_back_merge(struct request *req,
+		struct bio *next)
+{
+	return false;
+}
+static inline bool integrity_req_gap_front_merge(struct request *req,
+		struct bio *bio)
+{
+	return false;
+}
+
 static inline void blk_flush_integrity(void)
 {
 }
@@ -166,7 +197,7 @@ static inline bool bio_integrity_endio(struct bio *bio)
 {
 	return true;
 }
-#endif
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 void blk_timeout_work(struct work_struct *work);
 unsigned long blk_rq_timeout(unsigned long timeout);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d6869e0e2b64..bc534c857344 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1843,26 +1843,6 @@ queue_max_integrity_segments(struct request_queue *q)
 	return q->limits.max_integrity_segments;
 }
 
-static inline bool integrity_req_gap_back_merge(struct request *req,
-						struct bio *next)
-{
-	struct bio_integrity_payload *bip = bio_integrity(req->bio);
-	struct bio_integrity_payload *bip_next = bio_integrity(next);
-
-	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
-				bip_next->bip_vec[0].bv_offset);
-}
-
-static inline bool integrity_req_gap_front_merge(struct request *req,
-						 struct bio *bio)
-{
-	struct bio_integrity_payload *bip = bio_integrity(bio);
-	struct bio_integrity_payload *bip_next = bio_integrity(req->bio);
-
-	return bvec_gap_to_prev(req->q, &bip->bip_vec[bip->bip_vcnt - 1],
-				bip_next->bip_vec[0].bv_offset);
-}
-
 /**
  * bio_integrity_intervals - Return number of integrity intervals for a bio
  * @bi:		blk_integrity profile for device
@@ -1947,17 +1927,6 @@ static inline bool blk_integrity_merge_bio(struct request_queue *rq,
 	return true;
 }
 
-static inline bool integrity_req_gap_back_merge(struct request *req,
-						struct bio *next)
-{
-	return false;
-}
-static inline bool integrity_req_gap_front_merge(struct request *req,
-						 struct bio *bio)
-{
-	return false;
-}
-
 static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
 						   unsigned int sectors)
 {
-- 
cgit v1.2.3


From e9907009cbfc0c93d987d5a8fdf3d6c3c7b89717 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 09:43:48 +0200
Subject: block: move req_gap_{back,front}_merge to blk-merge.c

Keep it close to the actual users instead of exposing the function to all
drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-merge.c      | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/blkdev.h | 69 --------------------------------------------------
 2 files changed, 65 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-merge.c b/block/blk-merge.c
index aaec38cc37b8..ad8a226347a6 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -12,6 +12,71 @@
 
 #include "blk.h"
 
+/*
+ * Check if the two bvecs from two bios can be merged to one segment.  If yes,
+ * no need to check gap between the two bios since the 1st bio and the 1st bvec
+ * in the 2nd bio can be handled in one segment.
+ */
+static inline bool bios_segs_mergeable(struct request_queue *q,
+		struct bio *prev, struct bio_vec *prev_last_bv,
+		struct bio_vec *next_first_bv)
+{
+	if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
+		return false;
+	if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
+		return false;
+	if (prev->bi_seg_back_size + next_first_bv->bv_len >
+			queue_max_segment_size(q))
+		return false;
+	return true;
+}
+
+static inline bool bio_will_gap(struct request_queue *q,
+		struct request *prev_rq, struct bio *prev, struct bio *next)
+{
+	struct bio_vec pb, nb;
+
+	if (!bio_has_data(prev) || !queue_virt_boundary(q))
+		return false;
+
+	/*
+	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
+	 * is quite difficult to respect the sg gap limit.  We work hard to
+	 * merge a huge number of small single bios in case of mkfs.
+	 */
+	if (prev_rq)
+		bio_get_first_bvec(prev_rq->bio, &pb);
+	else
+		bio_get_first_bvec(prev, &pb);
+	if (pb.bv_offset)
+		return true;
+
+	/*
+	 * We don't need to worry about the situation that the merged segment
+	 * ends in unaligned virt boundary:
+	 *
+	 * - if 'pb' ends aligned, the merged segment ends aligned
+	 * - if 'pb' ends unaligned, the next bio must include
+	 *   one single bvec of 'nb', otherwise the 'nb' can't
+	 *   merge with 'pb'
+	 */
+	bio_get_last_bvec(prev, &pb);
+	bio_get_first_bvec(next, &nb);
+	if (bios_segs_mergeable(q, prev, &pb, &nb))
+		return false;
+	return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
+}
+
+static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
+{
+	return bio_will_gap(req->q, req, req->biotail, bio);
+}
+
+static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
+{
+	return bio_will_gap(req->q, NULL, bio, req->bio);
+}
+
 static struct bio *blk_bio_discard_split(struct request_queue *q,
 					 struct bio *bio,
 					 struct bio_set *bs,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bc534c857344..b7e676bb01bc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1695,75 +1695,6 @@ static inline bool bvec_gap_to_prev(struct request_queue *q,
 	return __bvec_gap_to_prev(q, bprv, offset);
 }
 
-/*
- * Check if the two bvecs from two bios can be merged to one segment.
- * If yes, no need to check gap between the two bios since the 1st bio
- * and the 1st bvec in the 2nd bio can be handled in one segment.
- */
-static inline bool bios_segs_mergeable(struct request_queue *q,
-		struct bio *prev, struct bio_vec *prev_last_bv,
-		struct bio_vec *next_first_bv)
-{
-	if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
-		return false;
-	if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
-		return false;
-	if (prev->bi_seg_back_size + next_first_bv->bv_len >
-			queue_max_segment_size(q))
-		return false;
-	return true;
-}
-
-static inline bool bio_will_gap(struct request_queue *q,
-				struct request *prev_rq,
-				struct bio *prev,
-				struct bio *next)
-{
-	if (bio_has_data(prev) && queue_virt_boundary(q)) {
-		struct bio_vec pb, nb;
-
-		/*
-		 * don't merge if the 1st bio starts with non-zero
-		 * offset, otherwise it is quite difficult to respect
-		 * sg gap limit. We work hard to merge a huge number of small
-		 * single bios in case of mkfs.
-		 */
-		if (prev_rq)
-			bio_get_first_bvec(prev_rq->bio, &pb);
-		else
-			bio_get_first_bvec(prev, &pb);
-		if (pb.bv_offset)
-			return true;
-
-		/*
-		 * We don't need to worry about the situation that the
-		 * merged segment ends in unaligned virt boundary:
-		 *
-		 * - if 'pb' ends aligned, the merged segment ends aligned
-		 * - if 'pb' ends unaligned, the next bio must include
-		 *   one single bvec of 'nb', otherwise the 'nb' can't
-		 *   merge with 'pb'
-		 */
-		bio_get_last_bvec(prev, &pb);
-		bio_get_first_bvec(next, &nb);
-
-		if (!bios_segs_mergeable(q, prev, &pb, &nb))
-			return __bvec_gap_to_prev(q, &pb, nb.bv_offset);
-	}
-
-	return false;
-}
-
-static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
-{
-	return bio_will_gap(req->q, req, req->biotail, bio);
-}
-
-static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
-{
-	return bio_will_gap(req->q, NULL, bio, req->bio);
-}
-
 int kblockd_schedule_work(struct work_struct *work);
 int kblockd_schedule_work_on(int cpu, struct work_struct *work);
 int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
-- 
cgit v1.2.3


From 27ca1d4ed04ea29dc77b47190a3cc82697023e76 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 09:43:49 +0200
Subject: block: move req_gap_back_merge to blk.h

No need to expose these helpers outside the block layer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk.h            | 19 +++++++++++++++++++
 include/linux/blkdev.h | 19 -------------------
 2 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk.h b/block/blk.h
index 441c2de1d4b9..63035c95689c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -149,6 +149,25 @@ static inline void blk_queue_enter_live(struct request_queue *q)
 	percpu_ref_get(&q->q_usage_counter);
 }
 
+static inline bool __bvec_gap_to_prev(struct request_queue *q,
+		struct bio_vec *bprv, unsigned int offset)
+{
+	return offset ||
+		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
+}
+
+/*
+ * Check if adding a bio_vec after bprv with offset would create a gap in
+ * the SG list. Most drivers don't care about this, but some do.
+ */
+static inline bool bvec_gap_to_prev(struct request_queue *q,
+		struct bio_vec *bprv, unsigned int offset)
+{
+	if (!queue_virt_boundary(q))
+		return false;
+	return __bvec_gap_to_prev(q, bprv, offset);
+}
+
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 void blk_flush_integrity(void);
 bool __bio_integrity_endio(struct bio *);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b7e676bb01bc..1d5e14139795 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1676,25 +1676,6 @@ static inline void put_dev_sector(Sector p)
 	put_page(p.v);
 }
 
-static inline bool __bvec_gap_to_prev(struct request_queue *q,
-				struct bio_vec *bprv, unsigned int offset)
-{
-	return offset ||
-		((bprv->bv_offset + bprv->bv_len) & queue_virt_boundary(q));
-}
-
-/*
- * Check if adding a bio_vec after bprv with offset would create a gap in
- * the SG list. Most drivers don't care about this, but some do.
- */
-static inline bool bvec_gap_to_prev(struct request_queue *q,
-				struct bio_vec *bprv, unsigned int offset)
-{
-	if (!queue_virt_boundary(q))
-		return false;
-	return __bvec_gap_to_prev(q, bprv, offset);
-}
-
 int kblockd_schedule_work(struct work_struct *work);
 int kblockd_schedule_work_on(int cpu, struct work_struct *work);
 int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
-- 
cgit v1.2.3


From 6a9f5f240adfdced863a098d34f8f05ca6ab9d5f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 09:43:50 +0200
Subject: block: simplify BIOVEC_PHYS_MERGEABLE

Turn the macro into an inline, move it to blk.h and simplify the
arch hooks a bit.

Also rename the function to biovec_phys_mergeable as there is no need
to shout.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/arm/include/asm/io.h   |  5 ++---
 arch/arm64/include/asm/io.h |  5 ++---
 arch/x86/include/asm/io.h   |  5 ++---
 block/bio.c                 |  2 +-
 block/blk-integrity.c       |  4 ++--
 block/blk-merge.c           | 10 +++++-----
 block/blk.h                 | 14 ++++++++++++++
 include/linux/bio.h         | 13 -------------
 8 files changed, 28 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 2cfbc531f63b..3c835d6263fa 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -469,9 +469,8 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *addr);
 struct bio_vec;
 extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
 				      const struct bio_vec *vec2);
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
-	(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&				\
-	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
+#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
+	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))
 
 #ifdef CONFIG_MMU
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 35b2e50f17fb..774e03ea1bb0 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -208,9 +208,8 @@ extern int devmem_is_allowed(unsigned long pfn);
 struct bio_vec;
 extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
 				      const struct bio_vec *vec2);
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
-	(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&				\
-	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
+#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
+	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))
 
 #endif	/* __KERNEL__ */
 #endif	/* __ASM_IO_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 6de64840dd22..7c6106216d9c 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -376,9 +376,8 @@ struct bio_vec;
 extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
 				      const struct bio_vec *vec2);
 
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
-	(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) &&				\
-	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
+#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2)				\
+	 (!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2))
 #endif	/* CONFIG_XEN */
 
 #define IO_SPACE_LIMIT 0xffff
diff --git a/block/bio.c b/block/bio.c
index 1cd47f218200..81d90b839e05 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -731,7 +731,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
 	}
 
 	/* If we may be able to merge these biovecs, force a recount */
-	if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
+	if (bio->bi_vcnt > 1 && biovec_phys_mergeable(bvec-1, bvec))
 		bio_clear_flag(bio, BIO_SEG_VALID);
 
  done:
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 6121611e1316..0f7267916509 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -49,7 +49,7 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
 	bio_for_each_integrity_vec(iv, bio, iter) {
 
 		if (prev) {
-			if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
+			if (!biovec_phys_mergeable(&ivprv, &iv))
 				goto new_segment;
 
 			if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
@@ -95,7 +95,7 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
 	bio_for_each_integrity_vec(iv, bio, iter) {
 
 		if (prev) {
-			if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
+			if (!biovec_phys_mergeable(&ivprv, &iv))
 				goto new_segment;
 
 			if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
diff --git a/block/blk-merge.c b/block/blk-merge.c
index ad8a226347a6..5e63e8259f92 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -21,7 +21,7 @@ static inline bool bios_segs_mergeable(struct request_queue *q,
 		struct bio *prev, struct bio_vec *prev_last_bv,
 		struct bio_vec *next_first_bv)
 {
-	if (!BIOVEC_PHYS_MERGEABLE(prev_last_bv, next_first_bv))
+	if (!biovec_phys_mergeable(prev_last_bv, next_first_bv))
 		return false;
 	if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
 		return false;
@@ -199,7 +199,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		if (bvprvp && blk_queue_cluster(q)) {
 			if (seg_size + bv.bv_len > queue_max_segment_size(q))
 				goto new_segment;
-			if (!BIOVEC_PHYS_MERGEABLE(bvprvp, &bv))
+			if (!biovec_phys_mergeable(bvprvp, &bv))
 				goto new_segment;
 			if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv))
 				goto new_segment;
@@ -332,7 +332,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 				if (seg_size + bv.bv_len
 				    > queue_max_segment_size(q))
 					goto new_segment;
-				if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
+				if (!biovec_phys_mergeable(&bvprv, &bv))
 					goto new_segment;
 				if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
 					goto new_segment;
@@ -414,7 +414,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 	bio_get_last_bvec(bio, &end_bv);
 	bio_get_first_bvec(nxt, &nxt_bv);
 
-	if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
+	if (!biovec_phys_mergeable(&end_bv, &nxt_bv))
 		return 0;
 
 	/*
@@ -439,7 +439,7 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
 		if ((*sg)->length + nbytes > queue_max_segment_size(q))
 			goto new_segment;
 
-		if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
+		if (!biovec_phys_mergeable(bvprv, bvec))
 			goto new_segment;
 		if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
 			goto new_segment;
diff --git a/block/blk.h b/block/blk.h
index 63035c95689c..aed99cbc1bca 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -149,6 +149,20 @@ static inline void blk_queue_enter_live(struct request_queue *q)
 	percpu_ref_get(&q->q_usage_counter);
 }
 
+#ifndef ARCH_BIOVEC_PHYS_MERGEABLE
+#define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) true
+#endif
+
+static inline bool biovec_phys_mergeable(const struct bio_vec *vec1,
+		const struct bio_vec *vec2)
+{
+	if (bvec_to_phys(vec1) + vec1->bv_len != bvec_to_phys(vec2))
+		return false;
+	if (!ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2))
+		return false;
+	return true;
+}
+
 static inline bool __bvec_gap_to_prev(struct request_queue *q,
 		struct bio_vec *bprv, unsigned int offset)
 {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index e973876625a8..e2adb96346f0 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -140,19 +140,6 @@ static inline bool bio_full(struct bio *bio)
 /*
  * merge helpers etc
  */
-
-/* Default implementation of BIOVEC_PHYS_MERGEABLE */
-#define __BIOVEC_PHYS_MERGEABLE(vec1, vec2)	\
-	((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2)))
-
-/*
- * allow arch override, for eg virtualized architectures (put in asm/io.h)
- */
-#ifndef BIOVEC_PHYS_MERGEABLE
-#define BIOVEC_PHYS_MERGEABLE(vec1, vec2)	\
-	__BIOVEC_PHYS_MERGEABLE(vec1, vec2)
-#endif
-
 #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
 	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
 #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
-- 
cgit v1.2.3


From 3dccdae54fe836a22cee9dc6df9fd1708ae075ce Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 09:43:52 +0200
Subject: block: merge BIOVEC_SEG_BOUNDARY into biovec_phys_mergeable

These two checks should always be performed together, so merge them into
a single helper.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c           |  4 +---
 block/blk-integrity.c | 12 ++----------
 block/blk-merge.c     | 29 +++++------------------------
 block/blk.h           | 12 +++++++++---
 include/linux/bio.h   |  8 --------
 5 files changed, 17 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index c254e5aa331f..e9f92b50724d 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -731,9 +731,7 @@ int bio_add_pc_page(struct request_queue *q, struct bio *bio, struct page
 	}
 
 	/* If we may be able to merge these biovecs, force a recount */
-	if (bio->bi_vcnt > 1 &&
-	    biovec_phys_mergeable(bvec - 1, bvec) &&
-	    BIOVEC_SEG_BOUNDARY(q, bvec - 1, bvec))
+	if (bio->bi_vcnt > 1 && biovec_phys_mergeable(q, bvec - 1, bvec))
 		bio_clear_flag(bio, BIO_SEG_VALID);
 
  done:
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 0f7267916509..d1ab089e0919 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -49,12 +49,8 @@ int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
 	bio_for_each_integrity_vec(iv, bio, iter) {
 
 		if (prev) {
-			if (!biovec_phys_mergeable(&ivprv, &iv))
+			if (!biovec_phys_mergeable(q, &ivprv, &iv))
 				goto new_segment;
-
-			if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
-				goto new_segment;
-
 			if (seg_size + iv.bv_len > queue_max_segment_size(q))
 				goto new_segment;
 
@@ -95,12 +91,8 @@ int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
 	bio_for_each_integrity_vec(iv, bio, iter) {
 
 		if (prev) {
-			if (!biovec_phys_mergeable(&ivprv, &iv))
+			if (!biovec_phys_mergeable(q, &ivprv, &iv))
 				goto new_segment;
-
-			if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
-				goto new_segment;
-
 			if (sg->length + iv.bv_len > queue_max_segment_size(q))
 				goto new_segment;
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 5e63e8259f92..42a46744c11b 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -21,9 +21,7 @@ static inline bool bios_segs_mergeable(struct request_queue *q,
 		struct bio *prev, struct bio_vec *prev_last_bv,
 		struct bio_vec *next_first_bv)
 {
-	if (!biovec_phys_mergeable(prev_last_bv, next_first_bv))
-		return false;
-	if (!BIOVEC_SEG_BOUNDARY(q, prev_last_bv, next_first_bv))
+	if (!biovec_phys_mergeable(q, prev_last_bv, next_first_bv))
 		return false;
 	if (prev->bi_seg_back_size + next_first_bv->bv_len >
 			queue_max_segment_size(q))
@@ -199,9 +197,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 		if (bvprvp && blk_queue_cluster(q)) {
 			if (seg_size + bv.bv_len > queue_max_segment_size(q))
 				goto new_segment;
-			if (!biovec_phys_mergeable(bvprvp, &bv))
-				goto new_segment;
-			if (!BIOVEC_SEG_BOUNDARY(q, bvprvp, &bv))
+			if (!biovec_phys_mergeable(q, bvprvp, &bv))
 				goto new_segment;
 
 			seg_size += bv.bv_len;
@@ -332,9 +328,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 				if (seg_size + bv.bv_len
 				    > queue_max_segment_size(q))
 					goto new_segment;
-				if (!biovec_phys_mergeable(&bvprv, &bv))
-					goto new_segment;
-				if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
+				if (!biovec_phys_mergeable(q, &bvprv, &bv))
 					goto new_segment;
 
 				seg_size += bv.bv_len;
@@ -414,17 +408,7 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
 	bio_get_last_bvec(bio, &end_bv);
 	bio_get_first_bvec(nxt, &nxt_bv);
 
-	if (!biovec_phys_mergeable(&end_bv, &nxt_bv))
-		return 0;
-
-	/*
-	 * bio and nxt are contiguous in memory; check if the queue allows
-	 * these two to be merged into one
-	 */
-	if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv))
-		return 1;
-
-	return 0;
+	return biovec_phys_mergeable(q, &end_bv, &nxt_bv);
 }
 
 static inline void
@@ -438,10 +422,7 @@ __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
 	if (*sg && *cluster) {
 		if ((*sg)->length + nbytes > queue_max_segment_size(q))
 			goto new_segment;
-
-		if (!biovec_phys_mergeable(bvprv, bvec))
-			goto new_segment;
-		if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
+		if (!biovec_phys_mergeable(q, bvprv, bvec))
 			goto new_segment;
 
 		(*sg)->length += nbytes;
diff --git a/block/blk.h b/block/blk.h
index aed99cbc1bca..8f7229b6f63e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -153,13 +153,19 @@ static inline void blk_queue_enter_live(struct request_queue *q)
 #define ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2) true
 #endif
 
-static inline bool biovec_phys_mergeable(const struct bio_vec *vec1,
-		const struct bio_vec *vec2)
+static inline bool biovec_phys_mergeable(struct request_queue *q,
+		struct bio_vec *vec1, struct bio_vec *vec2)
 {
-	if (bvec_to_phys(vec1) + vec1->bv_len != bvec_to_phys(vec2))
+	unsigned long mask = queue_segment_boundary(q);
+	phys_addr_t addr1 = bvec_to_phys(vec1);
+	phys_addr_t addr2 = bvec_to_phys(vec2);
+
+	if (addr1 + vec1->bv_len != addr2)
 		return false;
 	if (!ARCH_BIOVEC_PHYS_MERGEABLE(vec1, vec2))
 		return false;
+	if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask))
+		return false;
 	return true;
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index e2adb96346f0..9b580e1cb2e8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -137,14 +137,6 @@ static inline bool bio_full(struct bio *bio)
  */
 #define bvec_to_phys(bv)	(page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
 
-/*
- * merge helpers etc
- */
-#define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \
-	(((addr1) | (mask)) == (((addr2) - 1) | (mask)))
-#define BIOVEC_SEG_BOUNDARY(q, b1, b2) \
-	__BIO_SEG_BOUNDARY(bvec_to_phys((b1)), bvec_to_phys((b2)) + (b2)->bv_len, queue_segment_boundary((q)))
-
 /*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
-- 
cgit v1.2.3


From 6e768461c215eaf8912e6c23e40fdff1cd962aca Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 09:43:53 +0200
Subject: block: remove bvec_to_phys

We only use it in biovec_phys_mergeable and a m68k paravirt driver,
so just opencode it there.  Also remove the pointless unsigned long cast
for the offset in the opencoded instances.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/m68k/emu/nfblock.c | 2 +-
 block/blk.h             | 4 ++--
 include/linux/bio.h     | 5 -----
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index e9110b9b8bcd..38049357d6d3 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -73,7 +73,7 @@ static blk_qc_t nfhd_make_request(struct request_queue *queue, struct bio *bio)
 		len = bvec.bv_len;
 		len >>= 9;
 		nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
-				bvec_to_phys(&bvec));
+				page_to_phys(bvec.bv_page) + bvec.bv_offset);
 		sec += len;
 	}
 	bio_endio(bio);
diff --git a/block/blk.h b/block/blk.h
index 8f7229b6f63e..50f74ce60453 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -157,8 +157,8 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
 		struct bio_vec *vec1, struct bio_vec *vec2)
 {
 	unsigned long mask = queue_segment_boundary(q);
-	phys_addr_t addr1 = bvec_to_phys(vec1);
-	phys_addr_t addr2 = bvec_to_phys(vec2);
+	phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
+	phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
 
 	if (addr1 + vec1->bv_len != addr2)
 		return false;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9b580e1cb2e8..9ad4b0a487a4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -132,11 +132,6 @@ static inline bool bio_full(struct bio *bio)
 	return bio->bi_vcnt >= bio->bi_max_vecs;
 }
 
-/*
- * will die
- */
-#define bvec_to_phys(bv)	(page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset)
-
 /*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
-- 
cgit v1.2.3


From bceacbfa48bf132d8d88058904cf5859eeb4f3e6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 09:43:54 +0200
Subject: block: don't include io.h from bio.h

Now that we don't need an override for BIOVEC_PHYS_MERGEABLE there is
no need to drag this header in.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 9ad4b0a487a4..b3d47862b1b4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -24,9 +24,6 @@
 #include <linux/bug.h>
 
 #ifdef CONFIG_BLOCK
-
-#include <asm/io.h>
-
 /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
 #include <linux/blk_types.h>
 
-- 
cgit v1.2.3


From 65969e5cb249b765cee261c8f0458ec778b0c22f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 09:43:55 +0200
Subject: block: don't include bug.h from bio.h

No need to pull in the BUG() defintion.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index b3d47862b1b4..f447b0ebb288 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -21,7 +21,6 @@
 #include <linux/highmem.h>
 #include <linux/mempool.h>
 #include <linux/ioprio.h>
-#include <linux/bug.h>
 
 #ifdef CONFIG_BLOCK
 /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */
-- 
cgit v1.2.3


From 40d9f9124822013331367fb4ab59936c3ac944d6 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Mon, 24 Sep 2018 12:16:54 -0700
Subject: bus: ti-sysc: Defer suspend as needed

We don't care when we suspend but some our children do. In order to
avoid tagging various modules with SYSC_QUIRK_RESOURCE_PROVIDER, let's
do it automatically by tagging modules that are busy on suspend for
noirq suspend. This way we can just do module detection on define DEBUG.

Note that we still need to keep SYSC_QUIRK_LEGACY_IDLE flag around so
the our legacy single-child devices that set pm_runtime_irq_safe() can
manage the interconnect target module themselves.

Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 drivers/bus/ti-sysc.c                 | 118 ++++++++++++++++++----------------
 include/linux/platform_data/ti-sysc.h |   1 -
 2 files changed, 61 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index c9bac9dc4637..087a67617eef 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -87,6 +87,7 @@ struct sysc {
 	u32 revision;
 	bool enabled;
 	bool needs_resume;
+	unsigned int noirq_suspend:1;
 	bool child_needs_resume;
 	struct delayed_work idle_work;
 };
@@ -712,19 +713,25 @@ static int sysc_suspend(struct device *dev)
 
 	ddata = dev_get_drvdata(dev);
 
-	if (ddata->cfg.quirks & (SYSC_QUIRK_RESOURCE_PROVIDER |
-				 SYSC_QUIRK_LEGACY_IDLE))
+	if (ddata->cfg.quirks & SYSC_QUIRK_LEGACY_IDLE)
 		return 0;
 
-	if (!ddata->enabled)
+	if (!ddata->enabled || ddata->noirq_suspend)
 		return 0;
 
 	dev_dbg(ddata->dev, "%s %s\n", __func__,
 		ddata->name ? ddata->name : "");
 
 	error = pm_runtime_put_sync_suspend(dev);
-	if (error < 0) {
-		dev_warn(ddata->dev, "%s not idle %i %s\n",
+	if (error == -EBUSY) {
+		dev_dbg(ddata->dev, "%s busy, tagging for noirq suspend %s\n",
+			__func__, ddata->name ? ddata->name : "");
+
+		ddata->noirq_suspend = true;
+
+		return 0;
+	} else if (error < 0) {
+		dev_warn(ddata->dev, "%s cannot suspend %i %s\n",
 			 __func__, error,
 			 ddata->name ? ddata->name : "");
 
@@ -743,73 +750,86 @@ static int sysc_resume(struct device *dev)
 
 	ddata = dev_get_drvdata(dev);
 
-	if (ddata->cfg.quirks & (SYSC_QUIRK_RESOURCE_PROVIDER |
-				 SYSC_QUIRK_LEGACY_IDLE))
+	if (ddata->cfg.quirks & SYSC_QUIRK_LEGACY_IDLE)
 		return 0;
 
-	if (ddata->needs_resume) {
-		dev_dbg(ddata->dev, "%s %s\n", __func__,
-			ddata->name ? ddata->name : "");
+	if (!ddata->needs_resume || ddata->noirq_suspend)
+		return 0;
 
-		error = pm_runtime_get_sync(dev);
-		if (error < 0) {
-			dev_err(ddata->dev, "%s  error %i %s\n",
-				__func__, error,
-				 ddata->name ? ddata->name : "");
+	dev_dbg(ddata->dev, "%s %s\n", __func__,
+		ddata->name ? ddata->name : "");
 
-			return error;
-		}
+	error = pm_runtime_get_sync(dev);
+	if (error < 0) {
+		dev_err(ddata->dev, "%s  error %i %s\n",
+			__func__, error,
+			ddata->name ? ddata->name : "");
 
-		ddata->needs_resume = false;
+		return error;
 	}
 
+	ddata->needs_resume = false;
+
 	return 0;
 }
 
 static int sysc_noirq_suspend(struct device *dev)
 {
 	struct sysc *ddata;
+	int error;
 
 	ddata = dev_get_drvdata(dev);
 
 	if (ddata->cfg.quirks & SYSC_QUIRK_LEGACY_IDLE)
 		return 0;
 
-	if (!(ddata->cfg.quirks & SYSC_QUIRK_RESOURCE_PROVIDER))
-		return 0;
-
-	if (!ddata->enabled)
+	if (!ddata->enabled || !ddata->noirq_suspend)
 		return 0;
 
 	dev_dbg(ddata->dev, "%s %s\n", __func__,
 		ddata->name ? ddata->name : "");
 
+	error = sysc_runtime_suspend(dev);
+	if (error) {
+		dev_warn(ddata->dev, "%s busy %i %s\n",
+			 __func__, error, ddata->name ? ddata->name : "");
+
+		return 0;
+	}
+
 	ddata->needs_resume = true;
 
-	return sysc_runtime_suspend(dev);
+	return 0;
 }
 
 static int sysc_noirq_resume(struct device *dev)
 {
 	struct sysc *ddata;
+	int error;
 
 	ddata = dev_get_drvdata(dev);
 
 	if (ddata->cfg.quirks & SYSC_QUIRK_LEGACY_IDLE)
 		return 0;
 
-	if (!(ddata->cfg.quirks & SYSC_QUIRK_RESOURCE_PROVIDER))
+	if (!ddata->needs_resume || !ddata->noirq_suspend)
 		return 0;
 
-	if (ddata->needs_resume) {
-		dev_dbg(ddata->dev, "%s %s\n", __func__,
-			ddata->name ? ddata->name : "");
+	dev_dbg(ddata->dev, "%s %s\n", __func__,
+		ddata->name ? ddata->name : "");
 
-		ddata->needs_resume = false;
+	error = sysc_runtime_resume(dev);
+	if (error) {
+		dev_warn(ddata->dev, "%s cannot resume %i %s\n",
+			 __func__, error,
+			 ddata->name ? ddata->name : "");
 
-		return sysc_runtime_resume(dev);
+		return error;
 	}
 
+	/* Maybe also reconsider clearing noirq_suspend at some point */
+	ddata->needs_resume = false;
+
 	return 0;
 }
 #endif
@@ -848,26 +868,6 @@ struct sysc_revision_quirk {
 	}
 
 static const struct sysc_revision_quirk sysc_revision_quirks[] = {
-	/* These need to use noirq_suspend */
-	SYSC_QUIRK("control", 0, 0, 0x10, -1, 0x40000900, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("mcspi", 0, 0, 0x10, -1, 0x40300a0b, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("prcm", 0, 0, -1, -1, 0x40000100, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("ocp2scp", 0, 0, 0x10, 0x14, 0x50060005, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("padconf", 0, 0, 0x10, -1, 0x4fff0800, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("scm", 0, 0, 0x10, -1, 0x40000900, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("scrm", 0, 0, -1, -1, 0x00000010, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-	SYSC_QUIRK("sdma", 0, 0, 0x2c, 0x28, 0x00010900, 0xffffffff,
-		   SYSC_QUIRK_RESOURCE_PROVIDER),
-
 	/* These drivers need to be fixed to not use pm_runtime_irq_safe() */
 	SYSC_QUIRK("gpio", 0, 0, 0x10, 0x114, 0x50600801, 0xffffffff,
 		   SYSC_QUIRK_LEGACY_IDLE | SYSC_QUIRK_OPT_CLKS_IN_RESET),
@@ -892,23 +892,25 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("uart", 0, 0x50, 0x54, 0x58, 0x50411e03, 0xffffffff,
 		   SYSC_QUIRK_LEGACY_IDLE),
 
-	/* These devices don't yet suspend properly without legacy setting */
-	SYSC_QUIRK("sdio", 0, 0, 0x10, -1, 0x40202301, 0xffffffff,
-		   SYSC_QUIRK_LEGACY_IDLE),
-	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xffffffff,
-		   SYSC_QUIRK_LEGACY_IDLE),
-	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0d00, 0xffffffff,
-		   SYSC_QUIRK_LEGACY_IDLE),
-
 #ifdef DEBUG
 	SYSC_QUIRK("aess", 0, 0, 0x10, -1, 0x40000000, 0xffffffff, 0),
+	SYSC_QUIRK("control", 0, 0, 0x10, -1, 0x40000900, 0xffffffff, 0),
 	SYSC_QUIRK("gpu", 0, 0x1fc00, 0x1fc10, -1, 0, 0, 0),
 	SYSC_QUIRK("hdq1w", 0, 0, 0x14, 0x18, 0x00000006, 0xffffffff, 0),
 	SYSC_QUIRK("hsi", 0, 0, 0x10, 0x14, 0x50043101, 0xffffffff, 0),
 	SYSC_QUIRK("iss", 0, 0, 0x10, -1, 0x40000101, 0xffffffff, 0),
+	SYSC_QUIRK("i2c", 0, 0, 0x10, 0x90, 0x5040000a, 0xffffffff, 0),
 	SYSC_QUIRK("mcasp", 0, 0, 0x4, -1, 0x44306302, 0xffffffff, 0),
 	SYSC_QUIRK("mcbsp", 0, -1, 0x8c, -1, 0, 0, 0),
+	SYSC_QUIRK("mcspi", 0, 0, 0x10, -1, 0x40300a0b, 0xffffffff, 0),
 	SYSC_QUIRK("mailbox", 0, 0, 0x10, -1, 0x00000400, 0xffffffff, 0),
+	SYSC_QUIRK("ocp2scp", 0, 0, 0x10, 0x14, 0x50060005, 0xffffffff, 0),
+	SYSC_QUIRK("padconf", 0, 0, 0x10, -1, 0x4fff0800, 0xffffffff, 0),
+	SYSC_QUIRK("prcm", 0, 0, -1, -1, 0x40000100, 0xffffffff, 0),
+	SYSC_QUIRK("scm", 0, 0, 0x10, -1, 0x40000900, 0xffffffff, 0),
+	SYSC_QUIRK("scrm", 0, 0, -1, -1, 0x00000010, 0xffffffff, 0),
+	SYSC_QUIRK("sdio", 0, 0, 0x10, -1, 0x40202301, 0xffffffff, 0),
+	SYSC_QUIRK("sdma", 0, 0, 0x2c, 0x28, 0x00010900, 0xffffffff, 0),
 	SYSC_QUIRK("slimbus", 0, 0, 0x10, -1, 0x40000902, 0xffffffff, 0),
 	SYSC_QUIRK("slimbus", 0, 0, 0x10, -1, 0x40002903, 0xffffffff, 0),
 	SYSC_QUIRK("spinlock", 0, 0, 0x10, -1, 0x50020000, 0xffffffff, 0),
@@ -916,6 +918,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = {
 	SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, 0x14, 0x50700100, 0xffffffff, 0),
 	SYSC_QUIRK("usb_otg_hs", 0, 0x400, 0x404, 0x408, 0x00000050,
 		   0xffffffff, 0),
+	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xffffffff, 0),
+	SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0d00, 0xffffffff, 0),
 #endif
 };
 
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 2efa3470a451..1ea3aab972b4 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -46,7 +46,6 @@ struct sysc_regbits {
 	s8 emufree_shift;
 };
 
-#define SYSC_QUIRK_RESOURCE_PROVIDER	BIT(9)
 #define SYSC_QUIRK_LEGACY_IDLE		BIT(8)
 #define SYSC_QUIRK_RESET_STATUS		BIT(7)
 #define SYSC_QUIRK_NO_IDLE_ON_INIT	BIT(6)
-- 
cgit v1.2.3


From 9ba481e2eb3b932ae5b6278342b256e4f92d2793 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:20 +0300
Subject: net/mlx5: Set uid as part of CQ commands

Set uid as part of CQ commands so that the firmware can manage the CQ
object in a secured way.

The firmware should mark this CQ with the given uid so that it can
be used later on only by objects with the same uid.

Upon DEVX flows that use this CQ (e.g. create QP command), the
pointed CQ must have the same uid as of the issuer uid command.

When a command is issued with uid=0 it means that the issuer of the
command is trusted (i.e. kernel), in that case any pointed object
can be used regardless of its uid.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cq.c | 4 ++++
 include/linux/mlx5/cq.h                      | 1 +
 include/linux/mlx5/mlx5_ifc.h                | 6 +++---
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index a4179122a279..4b85abb5c9f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -109,6 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	cq->cons_index = 0;
 	cq->arm_sn     = 0;
 	cq->eq         = eq;
+	cq->uid = MLX5_GET(create_cq_in, in, uid);
 	refcount_set(&cq->refcount, 1);
 	init_completion(&cq->free);
 	if (!cq->comp)
@@ -144,6 +145,7 @@ err_cmd:
 	memset(dout, 0, sizeof(dout));
 	MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+	MLX5_SET(destroy_cq_in, din, uid, cq->uid);
 	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
@@ -165,6 +167,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 
 	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+	MLX5_SET(destroy_cq_in, in, uid, cq->uid);
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
@@ -196,6 +199,7 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
 
 	MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+	MLX5_SET(modify_cq_in, in, uid, cq->uid);
 	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 EXPORT_SYMBOL(mlx5_core_modify_cq);
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 0ef6138eca49..31a750570c38 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -61,6 +61,7 @@ struct mlx5_core_cq {
 	int			reset_notify_added;
 	struct list_head	reset_notify;
 	struct mlx5_eq		*eq;
+	u16 uid;
 };
 
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 4c7a1d25d73b..1a412b355054 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5629,7 +5629,7 @@ enum {
 
 struct mlx5_ifc_modify_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6404,7 +6404,7 @@ struct mlx5_ifc_destroy_cq_out_bits {
 
 struct mlx5_ifc_destroy_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7164,7 +7164,7 @@ struct mlx5_ifc_create_cq_out_bits {
 
 struct mlx5_ifc_create_cq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From 4ac63ec72587f7426aae15ddfe78e8ab785724dc Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:21 +0300
Subject: net/mlx5: Set uid as part of QP commands

Set uid as part of QP commands so that the firmware can manage the
QP object in a secured way.

That will enable using a QP that was created by verbs application to
be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 45 +++++++++++++++++-----------
 include/linux/mlx5/mlx5_ifc.h                | 22 +++++++-------
 include/linux/mlx5/qp.h                      |  1 +
 3 files changed, 39 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 4ca07bfb6b14..4e2ab3c916bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -240,6 +240,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 	if (err)
 		return err;
 
+	qp->uid = MLX5_GET(create_qp_in, in, uid);
 	qp->qpn = MLX5_GET(create_qp_out, out, qpn);
 	mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
@@ -261,6 +262,7 @@ err_cmd:
 	memset(dout, 0, sizeof(dout));
 	MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
 	MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, din, uid, qp->uid);
 	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
 	return err;
 }
@@ -320,6 +322,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 
 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
 	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, in, uid, qp->uid);
 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 	if (err)
 		return err;
@@ -373,7 +376,7 @@ static void mbox_free(struct mbox_info *mbox)
 
 static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
 				u32 opt_param_mask, void *qpc,
-				struct mbox_info *mbox)
+				struct mbox_info *mbox, u16 uid)
 {
 	mbox->out = NULL;
 	mbox->in = NULL;
@@ -381,26 +384,32 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
 #define MBOX_ALLOC(mbox, typ)  \
 	mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
 
-#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
-	MLX5_SET(typ##_in, in, opcode, _opcode); \
-	MLX5_SET(typ##_in, in, qpn, _qpn)
-
-#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
-	MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
-	MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
-	memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid)                            \
+	do {                                                                   \
+		MLX5_SET(typ##_in, in, opcode, _opcode);                       \
+		MLX5_SET(typ##_in, in, qpn, _qpn);                             \
+		MLX5_SET(typ##_in, in, uid, _uid);                             \
+	} while (0)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid)          \
+	do {                                                                   \
+		MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid);                   \
+		MLX5_SET(typ##_in, in, opt_param_mask, _opt_p);                \
+		memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc,                  \
+		       MLX5_ST_SZ_BYTES(qpc));                                 \
+	} while (0)
 
 	switch (opcode) {
 	/* 2RST & 2ERR */
 	case MLX5_CMD_OP_2RST_QP:
 		if (MBOX_ALLOC(mbox, qp_2rst))
 			return -ENOMEM;
-		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
 		break;
 	case MLX5_CMD_OP_2ERR_QP:
 		if (MBOX_ALLOC(mbox, qp_2err))
 			return -ENOMEM;
-		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
 		break;
 
 	/* MODIFY with QPC */
@@ -408,37 +417,37 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
 		if (MBOX_ALLOC(mbox, rst2init_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_INIT2RTR_QP:
 		if (MBOX_ALLOC(mbox, init2rtr_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_RTR2RTS_QP:
 		if (MBOX_ALLOC(mbox, rtr2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_RTS2RTS_QP:
 		if (MBOX_ALLOC(mbox, rts2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_SQERR2RTS_QP:
 		if (MBOX_ALLOC(mbox, sqerr2rts_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	case MLX5_CMD_OP_INIT2INIT_QP:
 		if (MBOX_ALLOC(mbox, init2init_qp))
 			return -ENOMEM;
 		MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc);
+				  opt_param_mask, qpc, uid);
 		break;
 	default:
 		mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
@@ -456,7 +465,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
 	int err;
 
 	err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
-				   opt_param_mask, qpc, &mbox);
+				   opt_param_mask, qpc, &mbox, qp->uid);
 	if (err)
 		return err;
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 1a412b355054..3b6a612787ac 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -3394,7 +3394,7 @@ struct mlx5_ifc_sqerr2rts_qp_out_bits {
 
 struct mlx5_ifc_sqerr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3424,7 +3424,7 @@ struct mlx5_ifc_sqd2rts_qp_out_bits {
 
 struct mlx5_ifc_sqd2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3629,7 +3629,7 @@ struct mlx5_ifc_rts2rts_qp_out_bits {
 
 struct mlx5_ifc_rts2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3659,7 +3659,7 @@ struct mlx5_ifc_rtr2rts_qp_out_bits {
 
 struct mlx5_ifc_rtr2rts_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -3689,7 +3689,7 @@ struct mlx5_ifc_rst2init_qp_out_bits {
 
 struct mlx5_ifc_rst2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5192,7 +5192,7 @@ struct mlx5_ifc_qp_2rst_out_bits {
 
 struct mlx5_ifc_qp_2rst_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5214,7 +5214,7 @@ struct mlx5_ifc_qp_2err_out_bits {
 
 struct mlx5_ifc_qp_2err_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5789,7 +5789,7 @@ struct mlx5_ifc_init2rtr_qp_out_bits {
 
 struct mlx5_ifc_init2rtr_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5819,7 +5819,7 @@ struct mlx5_ifc_init2init_qp_out_bits {
 
 struct mlx5_ifc_init2init_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6230,7 +6230,7 @@ struct mlx5_ifc_destroy_qp_out_bits {
 
 struct mlx5_ifc_destroy_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6895,7 +6895,7 @@ struct mlx5_ifc_create_qp_out_bits {
 
 struct mlx5_ifc_create_qp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 4778d41085d4..fbe322c966bc 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -471,6 +471,7 @@ struct mlx5_core_qp {
 	int			qpn;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
+	u16			uid;
 };
 
 struct mlx5_core_dct {
-- 
cgit v1.2.3


From d269b3afffcb107375d9cf73127fc2e0181bc90b Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:22 +0300
Subject: net/mlx5: Set uid as part of RQ commands

Set uid as part of RQ commands so that the firmware can manage the
RQ object in a secured way.

That will enable using an RQ that was created by verbs application
to be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 16 ++++++++++++++--
 include/linux/mlx5/mlx5_ifc.h                |  6 +++---
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 4e2ab3c916bf..f57e08d4f970 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -540,6 +540,17 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
 
+static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {};
+
+	MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+	MLX5_SET(destroy_rq_in, in, rqn, rqn);
+	MLX5_SET(destroy_rq_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 				struct mlx5_core_qp *rq)
 {
@@ -550,6 +561,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 	if (err)
 		return err;
 
+	rq->uid = MLX5_GET(create_rq_in, in, uid);
 	rq->qpn = rqn;
 	err = create_resource_common(dev, rq, MLX5_RES_RQ);
 	if (err)
@@ -558,7 +570,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 	return 0;
 
 err_destroy_rq:
-	mlx5_core_destroy_rq(dev, rq->qpn);
+	destroy_rq_tracked(dev, rq->qpn, rq->uid);
 
 	return err;
 }
@@ -568,7 +580,7 @@ void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
 				  struct mlx5_core_qp *rq)
 {
 	destroy_resource_common(dev, rq);
-	mlx5_core_destroy_rq(dev, rq->qpn);
+	destroy_rq_tracked(dev, rq->qpn, rq->uid);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3b6a612787ac..689631ca27b8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5488,7 +5488,7 @@ enum {
 
 struct mlx5_ifc_modify_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6164,7 +6164,7 @@ struct mlx5_ifc_destroy_rq_out_bits {
 
 struct mlx5_ifc_destroy_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6847,7 +6847,7 @@ struct mlx5_ifc_create_rq_out_bits {
 
 struct mlx5_ifc_create_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From 430ae0d5a3ce1350375690cb6ab29ab6fae4a9ac Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:23 +0300
Subject: net/mlx5: Set uid as part of SQ commands

Set uid as part of SQ commands so that the firmware can manage the
SQ object in a secured way.

That will enable using an SQ that was created by verbs application
to be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 16 ++++++++++++++--
 include/linux/mlx5/mlx5_ifc.h                |  6 +++---
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index f57e08d4f970..d9b12136cbfd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -584,6 +584,17 @@ void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
 
+static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {};
+
+	MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+	MLX5_SET(destroy_sq_in, in, sqn, sqn);
+	MLX5_SET(destroy_sq_in, in, uid, uid);
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 				struct mlx5_core_qp *sq)
 {
@@ -594,6 +605,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 	if (err)
 		return err;
 
+	sq->uid = MLX5_GET(create_sq_in, in, uid);
 	sq->qpn = sqn;
 	err = create_resource_common(dev, sq, MLX5_RES_SQ);
 	if (err)
@@ -602,7 +614,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 	return 0;
 
 err_destroy_sq:
-	mlx5_core_destroy_sq(dev, sq->qpn);
+	destroy_sq_tracked(dev, sq->qpn, sq->uid);
 
 	return err;
 }
@@ -612,7 +624,7 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
 				  struct mlx5_core_qp *sq)
 {
 	destroy_resource_common(dev, sq);
-	mlx5_core_destroy_sq(dev, sq->qpn);
+	destroy_sq_tracked(dev, sq->qpn, sq->uid);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 689631ca27b8..72dd1e49a799 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5381,7 +5381,7 @@ struct mlx5_ifc_modify_sq_out_bits {
 
 struct mlx5_ifc_modify_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6096,7 +6096,7 @@ struct mlx5_ifc_destroy_sq_out_bits {
 
 struct mlx5_ifc_destroy_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6769,7 +6769,7 @@ struct mlx5_ifc_create_sq_out_bits {
 
 struct mlx5_ifc_create_sq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From a0d8c054318976927493ffd26055d9d183c9beec Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:24 +0300
Subject: net/mlx5: Set uid as part of SRQ commands

Set uid as part of SRQ commands so that the firmware can manage the
SRQ object in a secured way.

That will enable using an SRQ that was created by verbs application
to be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/srq.c | 30 ++++++++++++++++++++++++---
 include/linux/mlx5/driver.h                   |  1 +
 include/linux/mlx5/mlx5_ifc.h                 | 22 ++++++++++----------
 include/linux/mlx5/srq.h                      |  1 +
 4 files changed, 40 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index 23cc337a96c9..5c519615fb1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -166,6 +166,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	if (!create_in)
 		return -ENOMEM;
 
+	MLX5_SET(create_srq_in, create_in, uid, in->uid);
 	srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
 	pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
 
@@ -178,8 +179,10 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
 			    sizeof(create_out));
 	kvfree(create_in);
-	if (!err)
+	if (!err) {
 		srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+		srq->uid = in->uid;
+	}
 
 	return err;
 }
@@ -193,6 +196,7 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(destroy_srq_in, srq_in, opcode,
 		 MLX5_CMD_OP_DESTROY_SRQ);
 	MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
+	MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
 
 	return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
 			     srq_out, sizeof(srq_out));
@@ -208,6 +212,7 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
 	MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
 	MLX5_SET(arm_rq_in, srq_in, lwm,      lwm);
+	MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
 
 	return  mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
 			      srq_out, sizeof(srq_out));
@@ -260,6 +265,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
 	if (!create_in)
 		return -ENOMEM;
 
+	MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
 	xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
 				xrc_srq_context_entry);
 	pas	 = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
@@ -277,6 +283,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
 		goto out;
 
 	srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+	srq->uid = in->uid;
 out:
 	kvfree(create_in);
 	return err;
@@ -291,6 +298,7 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
 		 MLX5_CMD_OP_DESTROY_XRC_SRQ);
 	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+	MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
 
 	return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
 			     xrcsrq_out, sizeof(xrcsrq_out));
@@ -306,6 +314,7 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod,   MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
 	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm,      lwm);
+	MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
 
 	return  mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
 			      xrcsrq_out, sizeof(xrcsrq_out));
@@ -365,10 +374,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
 
 	MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+	MLX5_SET(create_rmp_in, create_in, uid, in->uid);
 	set_wq(wq, in);
 	memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
 
 	err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
+	if (!err)
+		srq->uid = in->uid;
 
 	kvfree(create_in);
 	return err;
@@ -377,7 +389,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
 			   struct mlx5_core_srq *srq)
 {
-	return mlx5_core_destroy_rmp(dev, srq->srqn);
+	u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]   = {};
+	u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
+
+	MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+	MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
+	MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
 static int arm_rmp_cmd(struct mlx5_core_dev *dev,
@@ -400,6 +418,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
 
 	MLX5_SET(modify_rmp_in, in,	 rmp_state, MLX5_RMPC_STATE_RDY);
 	MLX5_SET(modify_rmp_in, in,	 rmpn,      srq->srqn);
+	MLX5_SET(modify_rmp_in, in, uid, srq->uid);
 	MLX5_SET(wq,		wq,	 lwm,	    lwm);
 	MLX5_SET(rmp_bitmask,	bitmask, lwm,	    1);
 	MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
@@ -469,11 +488,14 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
 	MLX5_SET(xrqc, xrqc, user_index, in->user_index);
 	MLX5_SET(xrqc, xrqc, cqn, in->cqn);
 	MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+	MLX5_SET(create_xrq_in, create_in, uid, in->uid);
 	err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
 			    sizeof(create_out));
 	kvfree(create_in);
-	if (!err)
+	if (!err) {
 		srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+		srq->uid = in->uid;
+	}
 
 	return err;
 }
@@ -485,6 +507,7 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
 
 	MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
 	MLX5_SET(destroy_xrq_in, in, xrqn,   srq->srqn);
+	MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
 
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
@@ -500,6 +523,7 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(arm_rq_in, in, op_mod,     MLX5_ARM_RQ_IN_OP_MOD_XRQ);
 	MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
 	MLX5_SET(arm_rq_in, in, lwm,	    lwm);
+	MLX5_SET(arm_rq_in, in, uid, srq->uid);
 
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d885e9f0e054..8fb072aa8671 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -474,6 +474,7 @@ struct mlx5_core_srq {
 
 	atomic_t		refcount;
 	struct completion	free;
+	u16		uid;
 };
 
 struct mlx5_eq_table {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 72dd1e49a799..85f1237d80db 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5524,7 +5524,7 @@ struct mlx5_ifc_rmp_bitmask_bits {
 
 struct mlx5_ifc_modify_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5986,7 +5986,7 @@ struct mlx5_ifc_destroy_xrq_out_bits {
 
 struct mlx5_ifc_destroy_xrq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6008,7 +6008,7 @@ struct mlx5_ifc_destroy_xrc_srq_out_bits {
 
 struct mlx5_ifc_destroy_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6074,7 +6074,7 @@ struct mlx5_ifc_destroy_srq_out_bits {
 
 struct mlx5_ifc_destroy_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6208,7 +6208,7 @@ struct mlx5_ifc_destroy_rmp_out_bits {
 
 struct mlx5_ifc_destroy_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6641,7 +6641,7 @@ struct mlx5_ifc_create_xrq_out_bits {
 
 struct mlx5_ifc_create_xrq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6665,7 +6665,7 @@ struct mlx5_ifc_create_xrc_srq_out_bits {
 
 struct mlx5_ifc_create_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6741,7 +6741,7 @@ struct mlx5_ifc_create_srq_out_bits {
 
 struct mlx5_ifc_create_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6871,7 +6871,7 @@ struct mlx5_ifc_create_rmp_out_bits {
 
 struct mlx5_ifc_create_rmp_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7272,7 +7272,7 @@ enum {
 
 struct mlx5_ifc_arm_xrc_srq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7300,7 +7300,7 @@ enum {
 
 struct mlx5_ifc_arm_rq_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
diff --git a/include/linux/mlx5/srq.h b/include/linux/mlx5/srq.h
index 24ff23e27c8a..1b1f3c20c6a3 100644
--- a/include/linux/mlx5/srq.h
+++ b/include/linux/mlx5/srq.h
@@ -61,6 +61,7 @@ struct mlx5_srq_attr {
 	u32 tm_next_tag;
 	u32 tm_hw_phase_cnt;
 	u32 tm_sw_phase_cnt;
+	u16 uid;
 };
 
 struct mlx5_core_dev;
-- 
cgit v1.2.3


From 774ea6eea29025218f75bc94c764df9a641db471 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:25 +0300
Subject: net/mlx5: Set uid as part of DCT commands

Set uid as part of DCT commands so that the firmware can manage the
DCT object in a secured way.

That will enable using a DCT that was created by verbs application
to be used by the DEVX flow in case the uid is equal.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 4 ++++
 include/linux/mlx5/mlx5_ifc.h                | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index d9b12136cbfd..91b8139a388d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -211,6 +211,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
 	}
 
 	qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+	qp->uid = MLX5_GET(create_dct_in, in, uid);
 	err = create_resource_common(dev, qp, MLX5_RES_DCT);
 	if (err)
 		goto err_cmd;
@@ -219,6 +220,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
 err_cmd:
 	MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
 	MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
+	MLX5_SET(destroy_dct_in, din, uid, qp->uid);
 	mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
 		      (void *)&out, sizeof(dout));
 	return err;
@@ -277,6 +279,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
 
 	MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
 	MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+	MLX5_SET(drain_dct_in, in, uid, qp->uid);
 	return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
 			     (void *)&out, sizeof(out));
 }
@@ -303,6 +306,7 @@ destroy:
 	destroy_resource_common(dev, &dct->mqp);
 	MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
 	MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+	MLX5_SET(destroy_dct_in, in, uid, qp->uid);
 	err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
 			    (void *)&out, sizeof(out));
 	return err;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 85f1237d80db..62c0592a9fdb 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5918,7 +5918,7 @@ struct mlx5_ifc_drain_dct_out_bits {
 
 struct mlx5_ifc_drain_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6382,7 +6382,7 @@ struct mlx5_ifc_destroy_dct_out_bits {
 
 struct mlx5_ifc_destroy_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7138,7 +7138,7 @@ struct mlx5_ifc_create_dct_out_bits {
 
 struct mlx5_ifc_create_dct_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From bd37197554eb28a7fc38e44e005e303c77f788ed Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 20 Sep 2018 21:35:26 +0300
Subject: net/mlx5: Update mlx5_ifc with DEVX UID bits

Add DEVX information to WQ, SRQ, CQ, TIR, TIS, QP,
RQ, XRCD, PD, MKEY and MCG.

Each object that is created/destroyed/modified via verbs will
be stamped with a UID based on its user context. This is already
done for DEVX objects commands.

This will enable the firmware to enforce the usage of kernel objects
from the DEVX flow by validating that the same UID is used and the
resources are really related to the same user.

The addition of *_valid fields are needed to distinguish
how various addresses are passed.

For non-DEVX callers, all those fields will be zero.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 67 +++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 62c0592a9fdb..68f4d5f9d929 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1290,7 +1290,9 @@ struct mlx5_ifc_wq_bits {
 	u8         reserved_at_118[0x3];
 	u8         log_wq_sz[0x5];
 
-	u8         reserved_at_120[0x3];
+	u8         dbr_umem_valid[0x1];
+	u8         wq_umem_valid[0x1];
+	u8         reserved_at_122[0x1];
 	u8         log_hairpin_num_packets[0x5];
 	u8         reserved_at_128[0x3];
 	u8         log_hairpin_data_sz[0x5];
@@ -2364,7 +2366,10 @@ struct mlx5_ifc_qpc_bits {
 
 	u8         dc_access_key[0x40];
 
-	u8         reserved_at_680[0xc0];
+	u8         reserved_at_680[0x3];
+	u8         dbr_umem_valid[0x1];
+
+	u8         reserved_at_684[0xbc];
 };
 
 struct mlx5_ifc_roce_addr_layout_bits {
@@ -2464,7 +2469,7 @@ struct mlx5_ifc_xrc_srqc_bits {
 
 	u8         wq_signature[0x1];
 	u8         cont_srq[0x1];
-	u8         reserved_at_22[0x1];
+	u8         dbr_umem_valid[0x1];
 	u8         rlky[0x1];
 	u8         basic_cyclic_rcv_wqe[0x1];
 	u8         log_rq_stride[0x3];
@@ -3128,7 +3133,9 @@ enum {
 
 struct mlx5_ifc_cqc_bits {
 	u8         status[0x4];
-	u8         reserved_at_4[0x4];
+	u8         reserved_at_4[0x2];
+	u8         dbr_umem_valid[0x1];
+	u8         reserved_at_7[0x1];
 	u8         cqe_sz[0x3];
 	u8         cc[0x1];
 	u8         reserved_at_c[0x1];
@@ -5314,7 +5321,7 @@ struct mlx5_ifc_modify_tis_bitmask_bits {
 
 struct mlx5_ifc_modify_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5353,7 +5360,7 @@ struct mlx5_ifc_modify_tir_out_bits {
 
 struct mlx5_ifc_modify_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5454,7 +5461,7 @@ struct mlx5_ifc_rqt_bitmask_bits {
 
 struct mlx5_ifc_modify_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -5641,7 +5648,10 @@ struct mlx5_ifc_modify_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x40];
+
+	u8         cq_umem_valid[0x1];
+	u8         reserved_at_2c1[0x5bf];
 
 	u8         pas[0][0x40];
 };
@@ -5962,7 +5972,7 @@ struct mlx5_ifc_detach_from_mcg_out_bits {
 
 struct mlx5_ifc_detach_from_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6030,7 +6040,7 @@ struct mlx5_ifc_destroy_tis_out_bits {
 
 struct mlx5_ifc_destroy_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6052,7 +6062,7 @@ struct mlx5_ifc_destroy_tir_out_bits {
 
 struct mlx5_ifc_destroy_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6142,7 +6152,7 @@ struct mlx5_ifc_destroy_rqt_out_bits {
 
 struct mlx5_ifc_destroy_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6507,7 +6517,7 @@ struct mlx5_ifc_dealloc_xrcd_out_bits {
 
 struct mlx5_ifc_dealloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6595,7 +6605,7 @@ struct mlx5_ifc_dealloc_pd_out_bits {
 
 struct mlx5_ifc_dealloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6674,7 +6684,9 @@ struct mlx5_ifc_create_xrc_srq_in_bits {
 
 	struct mlx5_ifc_xrc_srqc_bits xrc_srq_context_entry;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x40];
+	u8         xrc_srq_umem_valid[0x1];
+	u8         reserved_at_2c1[0x5bf];
 
 	u8         pas[0][0x40];
 };
@@ -6693,7 +6705,7 @@ struct mlx5_ifc_create_tis_out_bits {
 
 struct mlx5_ifc_create_tis_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6717,7 +6729,7 @@ struct mlx5_ifc_create_tir_out_bits {
 
 struct mlx5_ifc_create_tir_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6823,7 +6835,7 @@ struct mlx5_ifc_create_rqt_out_bits {
 
 struct mlx5_ifc_create_rqt_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -6908,7 +6920,10 @@ struct mlx5_ifc_create_qp_in_bits {
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-	u8         reserved_at_800[0x80];
+	u8         reserved_at_800[0x60];
+
+	u8         wq_umem_valid[0x1];
+	u8         reserved_at_861[0x1f];
 
 	u8         pas[0][0x40];
 };
@@ -6970,7 +6985,8 @@ struct mlx5_ifc_create_mkey_in_bits {
 	u8         reserved_at_40[0x20];
 
 	u8         pg_access[0x1];
-	u8         reserved_at_61[0x1f];
+	u8         mkey_umem_valid[0x1];
+	u8         reserved_at_62[0x1e];
 
 	struct mlx5_ifc_mkc_bits memory_key_mkey_entry;
 
@@ -7173,7 +7189,10 @@ struct mlx5_ifc_create_cq_in_bits {
 
 	struct mlx5_ifc_cqc_bits cq_context;
 
-	u8         reserved_at_280[0x600];
+	u8         reserved_at_280[0x60];
+
+	u8         cq_umem_valid[0x1];
+	u8         reserved_at_2e1[0x59f];
 
 	u8         pas[0][0x40];
 };
@@ -7221,7 +7240,7 @@ struct mlx5_ifc_attach_to_mcg_out_bits {
 
 struct mlx5_ifc_attach_to_mcg_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7348,7 +7367,7 @@ struct mlx5_ifc_alloc_xrcd_out_bits {
 
 struct mlx5_ifc_alloc_xrcd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
@@ -7436,7 +7455,7 @@ struct mlx5_ifc_alloc_pd_out_bits {
 
 struct mlx5_ifc_alloc_pd_in_bits {
 	u8         opcode[0x10];
-	u8         reserved_at_10[0x10];
+	u8         uid[0x10];
 
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
-- 
cgit v1.2.3


From 2a6db719c92dbfe43c9eea7e4358ea2e51b5004e Mon Sep 17 00:00:00 2001
From: Nipun Gupta <nipun.gupta@nxp.com>
Date: Mon, 10 Sep 2018 19:19:16 +0530
Subject: iommu/of: make of_pci_map_rid() available for other devices too

iommu-map property is also used by devices with fsl-mc. This
patch moves the of_pci_map_rid to generic location, so that it
can be used by other busses too.

'of_pci_map_rid' is renamed here to 'of_map_rid' and there is no
functional change done in the API.

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/of_iommu.c |   5 +--
 drivers/of/base.c        | 102 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/of/irq.c         |   5 +--
 drivers/pci/of.c         | 101 ----------------------------------------------
 include/linux/of.h       |  11 +++++
 include/linux/of_pci.h   |  10 -----
 6 files changed, 117 insertions(+), 117 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index f7787e757244..c13eecac0e8a 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -132,9 +132,8 @@ static int of_pci_iommu_init(struct pci_dev *pdev, u16 alias, void *data)
 	struct of_phandle_args iommu_spec = { .args_count = 1 };
 	int err;
 
-	err = of_pci_map_rid(info->np, alias, "iommu-map",
-			     "iommu-map-mask", &iommu_spec.np,
-			     iommu_spec.args);
+	err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask",
+			 &iommu_spec.np, iommu_spec.args);
 	if (err)
 		return err == -ENODEV ? NO_IOMMU : err;
 
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 74eaedd5b860..90bf7d9fa17b 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -2045,3 +2045,105 @@ int of_find_last_cache_level(unsigned int cpu)
 
 	return cache_level;
 }
+
+/**
+ * of_map_rid - Translate a requester ID through a downstream mapping.
+ * @np: root complex device node.
+ * @rid: device requester ID to map.
+ * @map_name: property name of the map to use.
+ * @map_mask_name: optional property name of the mask to use.
+ * @target: optional pointer to a target device node.
+ * @id_out: optional pointer to receive the translated ID.
+ *
+ * Given a device requester ID, look up the appropriate implementation-defined
+ * platform ID and/or the target device which receives transactions on that
+ * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
+ * @id_out may be NULL if only the other is required. If @target points to
+ * a non-NULL device node pointer, only entries targeting that node will be
+ * matched; if it points to a NULL value, it will receive the device node of
+ * the first matching target phandle, with a reference held.
+ *
+ * Return: 0 on success or a standard error code on failure.
+ */
+int of_map_rid(struct device_node *np, u32 rid,
+	       const char *map_name, const char *map_mask_name,
+	       struct device_node **target, u32 *id_out)
+{
+	u32 map_mask, masked_rid;
+	int map_len;
+	const __be32 *map = NULL;
+
+	if (!np || !map_name || (!target && !id_out))
+		return -EINVAL;
+
+	map = of_get_property(np, map_name, &map_len);
+	if (!map) {
+		if (target)
+			return -ENODEV;
+		/* Otherwise, no map implies no translation */
+		*id_out = rid;
+		return 0;
+	}
+
+	if (!map_len || map_len % (4 * sizeof(*map))) {
+		pr_err("%pOF: Error: Bad %s length: %d\n", np,
+			map_name, map_len);
+		return -EINVAL;
+	}
+
+	/* The default is to select all bits. */
+	map_mask = 0xffffffff;
+
+	/*
+	 * Can be overridden by "{iommu,msi}-map-mask" property.
+	 * If of_property_read_u32() fails, the default is used.
+	 */
+	if (map_mask_name)
+		of_property_read_u32(np, map_mask_name, &map_mask);
+
+	masked_rid = map_mask & rid;
+	for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
+		struct device_node *phandle_node;
+		u32 rid_base = be32_to_cpup(map + 0);
+		u32 phandle = be32_to_cpup(map + 1);
+		u32 out_base = be32_to_cpup(map + 2);
+		u32 rid_len = be32_to_cpup(map + 3);
+
+		if (rid_base & ~map_mask) {
+			pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
+				np, map_name, map_name,
+				map_mask, rid_base);
+			return -EFAULT;
+		}
+
+		if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
+			continue;
+
+		phandle_node = of_find_node_by_phandle(phandle);
+		if (!phandle_node)
+			return -ENODEV;
+
+		if (target) {
+			if (*target)
+				of_node_put(phandle_node);
+			else
+				*target = phandle_node;
+
+			if (*target != phandle_node)
+				continue;
+		}
+
+		if (id_out)
+			*id_out = masked_rid - rid_base + out_base;
+
+		pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
+			np, map_name, map_mask, rid_base, out_base,
+			rid_len, rid, masked_rid - rid_base + out_base);
+		return 0;
+	}
+
+	pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
+		np, map_name, rid, target && *target ? *target : NULL);
+	return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(of_map_rid);
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 02ad93a304a4..e1f6f392a4c0 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -22,7 +22,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_pci.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 
@@ -588,8 +587,8 @@ static u32 __of_msi_map_rid(struct device *dev, struct device_node **np,
 	 * "msi-map" property.
 	 */
 	for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent)
-		if (!of_pci_map_rid(parent_dev->of_node, rid_in, "msi-map",
-				    "msi-map-mask", np, &rid_out))
+		if (!of_map_rid(parent_dev->of_node, rid_in, "msi-map",
+				"msi-map-mask", np, &rid_out))
 			break;
 	return rid_out;
 }
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index 1836b8ddf292..4c4217d0c3f1 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -355,107 +355,6 @@ failed:
 EXPORT_SYMBOL_GPL(devm_of_pci_get_host_bridge_resources);
 #endif /* CONFIG_OF_ADDRESS */
 
-/**
- * of_pci_map_rid - Translate a requester ID through a downstream mapping.
- * @np: root complex device node.
- * @rid: PCI requester ID to map.
- * @map_name: property name of the map to use.
- * @map_mask_name: optional property name of the mask to use.
- * @target: optional pointer to a target device node.
- * @id_out: optional pointer to receive the translated ID.
- *
- * Given a PCI requester ID, look up the appropriate implementation-defined
- * platform ID and/or the target device which receives transactions on that
- * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
- * @id_out may be NULL if only the other is required. If @target points to
- * a non-NULL device node pointer, only entries targeting that node will be
- * matched; if it points to a NULL value, it will receive the device node of
- * the first matching target phandle, with a reference held.
- *
- * Return: 0 on success or a standard error code on failure.
- */
-int of_pci_map_rid(struct device_node *np, u32 rid,
-		   const char *map_name, const char *map_mask_name,
-		   struct device_node **target, u32 *id_out)
-{
-	u32 map_mask, masked_rid;
-	int map_len;
-	const __be32 *map = NULL;
-
-	if (!np || !map_name || (!target && !id_out))
-		return -EINVAL;
-
-	map = of_get_property(np, map_name, &map_len);
-	if (!map) {
-		if (target)
-			return -ENODEV;
-		/* Otherwise, no map implies no translation */
-		*id_out = rid;
-		return 0;
-	}
-
-	if (!map_len || map_len % (4 * sizeof(*map))) {
-		pr_err("%pOF: Error: Bad %s length: %d\n", np,
-			map_name, map_len);
-		return -EINVAL;
-	}
-
-	/* The default is to select all bits. */
-	map_mask = 0xffffffff;
-
-	/*
-	 * Can be overridden by "{iommu,msi}-map-mask" property.
-	 * If of_property_read_u32() fails, the default is used.
-	 */
-	if (map_mask_name)
-		of_property_read_u32(np, map_mask_name, &map_mask);
-
-	masked_rid = map_mask & rid;
-	for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
-		struct device_node *phandle_node;
-		u32 rid_base = be32_to_cpup(map + 0);
-		u32 phandle = be32_to_cpup(map + 1);
-		u32 out_base = be32_to_cpup(map + 2);
-		u32 rid_len = be32_to_cpup(map + 3);
-
-		if (rid_base & ~map_mask) {
-			pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
-				np, map_name, map_name,
-				map_mask, rid_base);
-			return -EFAULT;
-		}
-
-		if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
-			continue;
-
-		phandle_node = of_find_node_by_phandle(phandle);
-		if (!phandle_node)
-			return -ENODEV;
-
-		if (target) {
-			if (*target)
-				of_node_put(phandle_node);
-			else
-				*target = phandle_node;
-
-			if (*target != phandle_node)
-				continue;
-		}
-
-		if (id_out)
-			*id_out = masked_rid - rid_base + out_base;
-
-		pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
-			np, map_name, map_mask, rid_base, out_base,
-			rid_len, rid, masked_rid - rid_base + out_base);
-		return 0;
-	}
-
-	pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
-		np, map_name, rid, target && *target ? *target : NULL);
-	return -EFAULT;
-}
-
 #if IS_ENABLED(CONFIG_OF_IRQ)
 /**
  * of_irq_parse_pci - Resolve the interrupt for a PCI device
diff --git a/include/linux/of.h b/include/linux/of.h
index 99b0ebf49632..bf577ca3c8e7 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -550,6 +550,10 @@ bool of_console_check(struct device_node *dn, char *name, int index);
 
 extern int of_cpu_node_to_id(struct device_node *np);
 
+int of_map_rid(struct device_node *np, u32 rid,
+	       const char *map_name, const char *map_mask_name,
+	       struct device_node **target, u32 *id_out);
+
 #else /* CONFIG_OF */
 
 static inline void of_core_init(void)
@@ -952,6 +956,13 @@ static inline int of_cpu_node_to_id(struct device_node *np)
 	return -ENODEV;
 }
 
+static inline int of_map_rid(struct device_node *np, u32 rid,
+			     const char *map_name, const char *map_mask_name,
+			     struct device_node **target, u32 *id_out)
+{
+	return -EINVAL;
+}
+
 #define of_match_ptr(_ptr)	NULL
 #define of_match_node(_matches, _node)	NULL
 #endif /* CONFIG_OF */
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index e83d87fc5673..21a89c4880fa 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -14,9 +14,6 @@ struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn);
 int of_pci_get_devfn(struct device_node *np);
 void of_pci_check_probe_only(void);
-int of_pci_map_rid(struct device_node *np, u32 rid,
-		   const char *map_name, const char *map_mask_name,
-		   struct device_node **target, u32 *id_out);
 #else
 static inline struct device_node *of_pci_find_child_device(struct device_node *parent,
 					     unsigned int devfn)
@@ -29,13 +26,6 @@ static inline int of_pci_get_devfn(struct device_node *np)
 	return -EINVAL;
 }
 
-static inline int of_pci_map_rid(struct device_node *np, u32 rid,
-			const char *map_name, const char *map_mask_name,
-			struct device_node **target, u32 *id_out)
-{
-	return -EINVAL;
-}
-
 static inline void of_pci_check_probe_only(void) { }
 #endif
 
-- 
cgit v1.2.3


From eab03e2a1a3d9d354943aff5ae5e4254ee1ec967 Mon Sep 17 00:00:00 2001
From: Nipun Gupta <nipun.gupta@nxp.com>
Date: Mon, 10 Sep 2018 19:19:18 +0530
Subject: iommu/arm-smmu: Add support for the fsl-mc bus

Implement bus specific support for the fsl-mc bus including
registering arm_smmu_ops and bus specific device add operations.

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm-smmu.c |  7 +++++++
 drivers/iommu/iommu.c    | 13 +++++++++++++
 include/linux/fsl/mc.h   |  8 ++++++++
 include/linux/iommu.h    |  2 ++
 4 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index fd1b80ef9490..7baf4b03e8ef 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -52,6 +52,7 @@
 #include <linux/spinlock.h>
 
 #include <linux/amba/bus.h>
+#include <linux/fsl/mc.h>
 
 #include "io-pgtable.h"
 #include "arm-smmu-regs.h"
@@ -1459,6 +1460,8 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
 
 	if (dev_is_pci(dev))
 		group = pci_device_group(dev);
+	else if (dev_is_fsl_mc(dev))
+		group = fsl_mc_device_group(dev);
 	else
 		group = generic_device_group(dev);
 
@@ -2036,6 +2039,10 @@ static void arm_smmu_bus_init(void)
 		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
 	}
 #endif
+#ifdef CONFIG_FSL_MC_BUS
+	if (!iommu_present(&fsl_mc_bus_type))
+		bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
+#endif
 }
 
 static int arm_smmu_device_probe(struct platform_device *pdev)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8c15c5980299..7e5cb7cf2bbe 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -32,6 +32,7 @@
 #include <linux/pci.h>
 #include <linux/bitops.h>
 #include <linux/property.h>
+#include <linux/fsl/mc.h>
 #include <trace/events/iommu.h>
 
 static struct kset *iommu_group_kset;
@@ -1024,6 +1025,18 @@ struct iommu_group *pci_device_group(struct device *dev)
 	return iommu_group_alloc();
 }
 
+/* Get the IOMMU group for device on fsl-mc bus */
+struct iommu_group *fsl_mc_device_group(struct device *dev)
+{
+	struct device *cont_dev = fsl_mc_cont_dev(dev);
+	struct iommu_group *group;
+
+	group = iommu_group_get(cont_dev);
+	if (!group)
+		group = iommu_group_alloc();
+	return group;
+}
+
 /**
  * iommu_group_get_for_dev - Find or create the IOMMU group for a device
  * @dev: target device
diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h
index f27cb14088a4..dddaca17d684 100644
--- a/include/linux/fsl/mc.h
+++ b/include/linux/fsl/mc.h
@@ -351,6 +351,14 @@ int mc_send_command(struct fsl_mc_io *mc_io, struct fsl_mc_command *cmd);
 #define dev_is_fsl_mc(_dev) (0)
 #endif
 
+/* Macro to check if a device is a container device */
+#define fsl_mc_is_cont_dev(_dev) (to_fsl_mc_device(_dev)->flags & \
+	FSL_MC_IS_DPRC)
+
+/* Macro to get the container device of a MC device */
+#define fsl_mc_cont_dev(_dev) (fsl_mc_is_cont_dev(_dev) ? \
+	(_dev) : (_dev)->parent)
+
 /*
  * module_fsl_mc_driver() - Helper macro for drivers that don't do
  * anything special in module init/exit.  This eliminates a lot of
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 87994c265bf5..70102df4b994 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -377,6 +377,8 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain)
 extern struct iommu_group *pci_device_group(struct device *dev);
 /* Generic device grouping function */
 extern struct iommu_group *generic_device_group(struct device *dev);
+/* FSL-MC device grouping function */
+struct iommu_group *fsl_mc_device_group(struct device *dev);
 
 /**
  * struct iommu_fwspec - per-device IOMMU instance data
-- 
cgit v1.2.3


From e48d194d1204b19655c1a9d78a67f2f01d2fe432 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 25 Sep 2018 09:54:14 +0200
Subject: gpio: Add comments on single direction chips

A patch from Ricardo got me thinking about some gpio chip
semantics so let's drop in some comments to make things
more clear around that.

Cc: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Cc: Bartosz Golaszewski <brgl@bgdev.pl>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 33 +++++++++++++++++++++++++++------
 include/linux/gpio/driver.h |  6 +++++-
 2 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index cabe1b460458..4ce402f9fc69 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2548,20 +2548,31 @@ int gpiod_direction_input(struct gpio_desc *desc)
 	VALIDATE_DESC(desc);
 	chip = desc->gdev->chip;
 
+	/*
+	 * It is legal to have no .get() and .direction_input() specified if
+	 * the chip is output-only, but you can't specify .direction_input()
+	 * and not support the .get() operation, that doesn't make sense.
+	 */
 	if (!chip->get && chip->direction_input) {
 		gpiod_warn(desc,
-			"%s: missing get() and direction_input() operations\n",
-			__func__);
+			   "%s: missing get() but have direction_input()\n",
+			   __func__);
 		return -EIO;
 	}
 
+	/*
+	 * If we have a .direction_input() callback, things are simple,
+	 * just call it. Else we are some input-only chip so try to check the
+	 * direction (if .get_direction() is supported) else we silently
+	 * assume we are in input mode after this.
+	 */
 	if (chip->direction_input) {
 		status = chip->direction_input(chip, gpio_chip_hwgpio(desc));
 	} else if (chip->get_direction &&
 		  (chip->get_direction(chip, gpio_chip_hwgpio(desc)) != 1)) {
 		gpiod_warn(desc,
-			"%s: missing direction_input() operation\n",
-			__func__);
+			   "%s: missing direction_input() operation and line is output\n",
+			   __func__);
 		return -EIO;
 	}
 	if (status == 0)
@@ -2587,16 +2598,22 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value)
 	int val = !!value;
 	int ret = 0;
 
+	/*
+	 * It's OK not to specify .direction_output() if the gpiochip is
+	 * output-only, but if there is then not even a .set() operation it
+	 * is pretty tricky to drive the output line.
+	 */
 	if (!gc->set && !gc->direction_output) {
 		gpiod_warn(desc,
-		       "%s: missing set() and direction_output() operations\n",
-		       __func__);
+			   "%s: missing set() and direction_output() operations\n",
+			   __func__);
 		return -EIO;
 	}
 
 	if (gc->direction_output) {
 		ret = gc->direction_output(gc, gpio_chip_hwgpio(desc), val);
 	} else {
+		/* Check that we are in output mode if we can */
 		if (gc->get_direction &&
 		    gc->get_direction(gc, gpio_chip_hwgpio(desc))) {
 			gpiod_warn(desc,
@@ -2604,6 +2621,10 @@ static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value)
 				__func__);
 			return -EIO;
 		}
+		/*
+		 * If we can't actively set the direction, we are some
+		 * output-only chip, so just drive the output as desired.
+		 */
 		gc->set(gc, gpio_chip_hwgpio(desc), val);
 	}
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index d8dcd0e44cab..f6b95734073f 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -178,9 +178,13 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip)
  * @free: optional hook for chip-specific deactivation, such as
  *	disabling module power and clock; may sleep
  * @get_direction: returns direction for signal "offset", 0=out, 1=in,
- *	(same as GPIOF_DIR_XXX), or negative error
+ *	(same as GPIOF_DIR_XXX), or negative error.
+ *	It is recommended to always implement this function, even on
+ *	input-only or output-only gpio chips.
  * @direction_input: configures signal "offset" as input, or returns error
+ *	This can be omitted on input-only or output-only gpio chips.
  * @direction_output: configures signal "offset" as output, or returns error
+ *	This can be omitted on input-only or output-only gpio chips.
  * @get: returns value for signal "offset", 0=low, 1=high, or negative error
  * @get_multiple: reads values for multiple signals defined by "mask" and
  *	stores them in "bits", returns 0 on success or negative error
-- 
cgit v1.2.3


From bee60e94a1e20ec0b8ffdafae270731d8fda4551 Mon Sep 17 00:00:00 2001
From: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Date: Wed, 5 Sep 2018 09:57:36 +0530
Subject: iommu/iova: Optimise attempts to allocate iova from 32bit address
 range

As an optimisation for PCI devices, there is always first attempt
been made to allocate iova from SAC address range. This will lead
to unnecessary attempts, when there are no free ranges
available. Adding fix to track recently failed iova address size and
allow further attempts, only if requested size is lesser than a failed
size. The size is updated when any replenish happens.

Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Ganapatrao Kulkarni <ganapatrao.kulkarni@cavium.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iova.c | 22 +++++++++++++++-------
 include/linux/iova.h |  1 +
 2 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 83fe2621effe..f8d3ba247523 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -56,6 +56,7 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
 	iovad->granule = granule;
 	iovad->start_pfn = start_pfn;
 	iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
+	iovad->max32_alloc_size = iovad->dma_32bit_pfn;
 	iovad->flush_cb = NULL;
 	iovad->fq = NULL;
 	iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
@@ -139,8 +140,10 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free)
 
 	cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
 	if (free->pfn_hi < iovad->dma_32bit_pfn &&
-	    free->pfn_lo >= cached_iova->pfn_lo)
+	    free->pfn_lo >= cached_iova->pfn_lo) {
 		iovad->cached32_node = rb_next(&free->node);
+		iovad->max32_alloc_size = iovad->dma_32bit_pfn;
+	}
 
 	cached_iova = rb_entry(iovad->cached_node, struct iova, node);
 	if (free->pfn_lo >= cached_iova->pfn_lo)
@@ -190,6 +193,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 
 	/* Walk the tree backwards */
 	spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+	if (limit_pfn <= iovad->dma_32bit_pfn &&
+			size >= iovad->max32_alloc_size)
+		goto iova32_full;
+
 	curr = __get_cached_rbnode(iovad, limit_pfn);
 	curr_iova = rb_entry(curr, struct iova, node);
 	do {
@@ -200,10 +207,8 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 		curr_iova = rb_entry(curr, struct iova, node);
 	} while (curr && new_pfn <= curr_iova->pfn_hi);
 
-	if (limit_pfn < size || new_pfn < iovad->start_pfn) {
-		spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-		return -ENOMEM;
-	}
+	if (limit_pfn < size || new_pfn < iovad->start_pfn)
+		goto iova32_full;
 
 	/* pfn_lo will point to size aligned address if size_aligned is set */
 	new->pfn_lo = new_pfn;
@@ -214,9 +219,12 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad,
 	__cached_rbnode_insert_update(iovad, new);
 
 	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-
-
 	return 0;
+
+iova32_full:
+	iovad->max32_alloc_size = size;
+	spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+	return -ENOMEM;
 }
 
 static struct kmem_cache *iova_cache;
diff --git a/include/linux/iova.h b/include/linux/iova.h
index 928442dda565..0b93bf96693e 100644
--- a/include/linux/iova.h
+++ b/include/linux/iova.h
@@ -75,6 +75,7 @@ struct iova_domain {
 	unsigned long	granule;	/* pfn granularity for this domain */
 	unsigned long	start_pfn;	/* Lower limit for this domain */
 	unsigned long	dma_32bit_pfn;
+	unsigned long	max32_alloc_size; /* Size of last failed allocation */
 	struct iova	anchor;		/* rbtree lookup anchor */
 	struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE];	/* IOVA range caches */
 
-- 
cgit v1.2.3


From 6af588fed39178c8e118fcf9cb6664e58a1fbe88 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 12 Sep 2018 16:24:12 +0100
Subject: iommu: Add fast hook for getting DMA domains

While iommu_get_domain_for_dev() is the robust way for arbitrary IOMMU
API callers to retrieve the domain pointer, for DMA ops domains it
doesn't scale well for large systems and multi-queue devices, since the
momentary refcount adjustment will lead to exclusive cacheline contention
when multiple CPUs are operating in parallel on different mappings for
the same device.

In the case of DMA ops domains, however, this refcounting is actually
unnecessary, since they already imply that the group exists and is
managed by platform code and IOMMU internals (by virtue of
iommu_group_get_for_dev()) such that a reference will already be held
for the lifetime of the device. Thus we can avoid the bottleneck by
providing a fast lookup specifically for the DMA code to retrieve the
default domain it already knows it has set up - a simple read-only
dereference plays much nicer with cache-coherency protocols.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 9 +++++++++
 include/linux/iommu.h | 1 +
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8c15c5980299..9d70344204fe 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1415,6 +1415,15 @@ struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
 
+/*
+ * For IOMMU_DOMAIN_DMA implementations which already provide their own
+ * guarantees that the group and its default domain are valid and correct.
+ */
+struct iommu_domain *iommu_get_dma_domain(struct device *dev)
+{
+	return dev->iommu_group->default_domain;
+}
+
 /*
  * IOMMU groups are really the natrual working unit of the IOMMU, but
  * the IOMMU API works on domains and devices.  Bridge that gap by
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 87994c265bf5..c783648d4060 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -293,6 +293,7 @@ extern int iommu_attach_device(struct iommu_domain *domain,
 extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
 extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
+extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		     phys_addr_t paddr, size_t size, int prot);
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-- 
cgit v1.2.3


From 26b86092c4650311256fa2372ced7e1e17d97d7b Mon Sep 17 00:00:00 2001
From: Sohil Mehta <sohil.mehta@intel.com>
Date: Tue, 11 Sep 2018 17:11:36 -0700
Subject: iommu/vt-d: Relocate struct/function declarations to its header files

To reuse the static functions and the struct declarations, move them to
corresponding header files and export the needed functions.

Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com>
Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/x86/include/asm/irq_remapping.h |  2 ++
 drivers/iommu/intel-iommu.c          | 31 +++----------------------------
 drivers/iommu/intel_irq_remapping.c  |  2 +-
 include/linux/intel-iommu.h          | 30 ++++++++++++++++++++++++++++++
 4 files changed, 36 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5f26962eff42..67ed72f31cc2 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -45,6 +45,8 @@ struct vcpu_data {
 
 #ifdef CONFIG_IRQ_REMAP
 
+extern raw_spinlock_t irq_2_ir_lock;
+
 extern bool irq_remapping_cap(enum irq_remap_cap cap);
 extern void set_irq_remapping_broken(void);
 extern int irq_remapping_prepare(void);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 5f3f10cf9d9d..b5868c757995 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -185,16 +185,6 @@ static int rwbf_quirk;
 static int force_on = 0;
 int intel_iommu_tboot_noforce;
 
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
-	u64	lo;
-	u64	hi;
-};
 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
 
 /*
@@ -220,21 +210,6 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
 
 	return re->hi & VTD_PAGE_MASK;
 }
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
-	u64 lo;
-	u64 hi;
-};
 
 static inline void context_clear_pasid_enable(struct context_entry *context)
 {
@@ -261,7 +236,7 @@ static inline bool __context_present(struct context_entry *context)
 	return (context->lo & 1);
 }
 
-static inline bool context_present(struct context_entry *context)
+bool context_present(struct context_entry *context)
 {
 	return context_pasid_enabled(context) ?
 	     __context_present(context) :
@@ -788,8 +763,8 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
 	domain->iommu_superpage = domain_update_iommu_superpage(NULL);
 }
 
-static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
-						       u8 bus, u8 devfn, int alloc)
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+					 u8 devfn, int alloc)
 {
 	struct root_entry *root = &iommu->root_entry[bus];
 	struct context_entry *context;
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 967450bd421a..c2d6c11431de 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -76,7 +76,7 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
  * in single-threaded environment with interrupt disabled, so no need to tabke
  * the dmar_global_lock.
  */
-static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
 static const struct irq_domain_ops intel_ir_domain_ops;
 
 static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 28004d74ae04..b7cf32e8ae1f 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -389,6 +389,33 @@ struct pasid_entry;
 struct pasid_state_entry;
 struct page_req_dsc;
 
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+	u64     lo;
+	u64     hi;
+};
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+	u64 lo;
+	u64 hi;
+};
+
 struct dmar_domain {
 	int	nid;			/* node id */
 
@@ -559,5 +586,8 @@ extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
 #endif
 
 extern const struct attribute_group *intel_iommu_groups[];
+bool context_present(struct context_entry *context);
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+					 u8 devfn, int alloc);
 
 #endif
-- 
cgit v1.2.3


From 4a2d80dbadb72b998641af32d8dd4b7b39e72aa0 Mon Sep 17 00:00:00 2001
From: Sohil Mehta <sohil.mehta@intel.com>
Date: Tue, 11 Sep 2018 17:11:37 -0700
Subject: iommu/vt-d: Update register definitions to VT-d 3.0 specification

Add new register definitions added in the VT-d 3.0 specification. Also
include registers that were missing previously.

Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Gayatri Kammela <gayatri.kammela@intel.com>
Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/intel-iommu.h | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index b7cf32e8ae1f..3bdb9aa198af 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -72,6 +72,42 @@
 #define	DMAR_PEDATA_REG	0xe4	/* Page request event interrupt data register */
 #define	DMAR_PEADDR_REG	0xe8	/* Page request event interrupt addr register */
 #define	DMAR_PEUADDR_REG 0xec	/* Page request event Upper address register */
+#define DMAR_MTRRCAP_REG 0x100	/* MTRR capability register */
+#define DMAR_MTRRDEF_REG 0x108	/* MTRR default type register */
+#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */
+#define DMAR_MTRR_FIX16K_80000_REG 0x128
+#define DMAR_MTRR_FIX16K_A0000_REG 0x130
+#define DMAR_MTRR_FIX4K_C0000_REG 0x138
+#define DMAR_MTRR_FIX4K_C8000_REG 0x140
+#define DMAR_MTRR_FIX4K_D0000_REG 0x148
+#define DMAR_MTRR_FIX4K_D8000_REG 0x150
+#define DMAR_MTRR_FIX4K_E0000_REG 0x158
+#define DMAR_MTRR_FIX4K_E8000_REG 0x160
+#define DMAR_MTRR_FIX4K_F0000_REG 0x168
+#define DMAR_MTRR_FIX4K_F8000_REG 0x170
+#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */
+#define DMAR_MTRR_PHYSMASK0_REG 0x188
+#define DMAR_MTRR_PHYSBASE1_REG 0x190
+#define DMAR_MTRR_PHYSMASK1_REG 0x198
+#define DMAR_MTRR_PHYSBASE2_REG 0x1a0
+#define DMAR_MTRR_PHYSMASK2_REG 0x1a8
+#define DMAR_MTRR_PHYSBASE3_REG 0x1b0
+#define DMAR_MTRR_PHYSMASK3_REG 0x1b8
+#define DMAR_MTRR_PHYSBASE4_REG 0x1c0
+#define DMAR_MTRR_PHYSMASK4_REG 0x1c8
+#define DMAR_MTRR_PHYSBASE5_REG 0x1d0
+#define DMAR_MTRR_PHYSMASK5_REG 0x1d8
+#define DMAR_MTRR_PHYSBASE6_REG 0x1e0
+#define DMAR_MTRR_PHYSMASK6_REG 0x1e8
+#define DMAR_MTRR_PHYSBASE7_REG 0x1f0
+#define DMAR_MTRR_PHYSMASK7_REG 0x1f8
+#define DMAR_MTRR_PHYSBASE8_REG 0x200
+#define DMAR_MTRR_PHYSMASK8_REG 0x208
+#define DMAR_MTRR_PHYSBASE9_REG 0x210
+#define DMAR_MTRR_PHYSMASK9_REG 0x218
+#define DMAR_VCCAP_REG		0xe00 /* Virtual command capability register */
+#define DMAR_VCMD_REG		0xe10 /* Virtual command register */
+#define DMAR_VCRSP_REG		0xe20 /* Virtual command response register */
 
 #define OFFSET_STRIDE		(9)
 
-- 
cgit v1.2.3


From ee2636b8670b1ab2a02a65923a9bef59e9199c37 Mon Sep 17 00:00:00 2001
From: Sohil Mehta <sohil.mehta@intel.com>
Date: Tue, 11 Sep 2018 17:11:38 -0700
Subject: iommu/vt-d: Enable base Intel IOMMU debugfs support

Add a new config option CONFIG_INTEL_IOMMU_DEBUGFS and do the base
enabling for Intel IOMMU debugfs.

Cc: Lu Baolu <baolu.lu@linux.intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Ashok Raj <ashok.raj@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Co-Developed-by: Gayatri Kammela <gayatri.kammela@intel.com>
Signed-off-by: Gayatri Kammela <gayatri.kammela@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Sohil Mehta <sohil.mehta@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig               | 13 +++++++++++++
 drivers/iommu/Makefile              |  1 +
 drivers/iommu/intel-iommu-debugfs.c | 20 ++++++++++++++++++++
 drivers/iommu/intel-iommu.c         |  1 +
 include/linux/intel-iommu.h         |  6 ++++++
 5 files changed, 41 insertions(+)
 create mode 100644 drivers/iommu/intel-iommu-debugfs.c

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index c60395b7470f..51ba19c8847b 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -186,6 +186,19 @@ config INTEL_IOMMU
 	  and include PCI device scope covered by these DMA
 	  remapping devices.
 
+config INTEL_IOMMU_DEBUGFS
+	bool "Export Intel IOMMU internals in Debugfs"
+	depends on INTEL_IOMMU && IOMMU_DEBUGFS
+	help
+	  !!!WARNING!!!
+
+	  DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
+
+	  Expose Intel IOMMU internals in Debugfs.
+
+	  This option is -NOT- intended for production environments, and should
+	  only be enabled for debugging Intel IOMMU.
+
 config INTEL_IOMMU_SVM
 	bool "Support for Shared Virtual Memory with Intel IOMMU"
 	depends on INTEL_IOMMU && X86
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index ab5eba6edf82..a158a68c8ea8 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
 obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
 obj-$(CONFIG_DMAR_TABLE) += dmar.o
 obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
+obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
 obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
 obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
 obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
diff --git a/drivers/iommu/intel-iommu-debugfs.c b/drivers/iommu/intel-iommu-debugfs.c
new file mode 100644
index 000000000000..33e0a6c12d92
--- /dev/null
+++ b/drivers/iommu/intel-iommu-debugfs.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2018 Intel Corporation.
+ *
+ * Authors: Gayatri Kammela <gayatri.kammela@intel.com>
+ *	    Sohil Mehta <sohil.mehta@intel.com>
+ *	    Jacob Pan <jacob.jun.pan@linux.intel.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+#include <linux/pci.h>
+
+#include <asm/irq_remapping.h>
+
+void __init intel_iommu_debugfs_init(void)
+{
+	debugfs_create_dir("intel", iommu_debugfs_dir);
+}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index b5868c757995..b9cf7c8c1616 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4837,6 +4837,7 @@ int __init intel_iommu_init(void)
 	cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
 			  intel_iommu_cpu_dead);
 	intel_iommu_enabled = 1;
+	intel_iommu_debugfs_init();
 
 	return 0;
 
diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
index 3bdb9aa198af..b0ae25837361 100644
--- a/include/linux/intel-iommu.h
+++ b/include/linux/intel-iommu.h
@@ -621,6 +621,12 @@ extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_
 extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
 #endif
 
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+void intel_iommu_debugfs_init(void);
+#else
+static inline void intel_iommu_debugfs_init(void) {}
+#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+
 extern const struct attribute_group *intel_iommu_groups[];
 bool context_present(struct context_entry *context);
 struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
-- 
cgit v1.2.3


From 5131e08cd0f8a3faa30c1cdaf53940d6bce1715a Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Wed, 19 Sep 2018 11:12:58 +0100
Subject: iommu: Remove .domain_{get,set}_windows

Since these are trivially handled by the .domain_{get,set}_attr
callbacks when relevant, we can streamline struct iommu_ops for
everyone.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/fsl_pamu_domain.c | 125 ++++++++++++++++------------------------
 include/linux/iommu.h           |   6 --
 2 files changed, 51 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index f83965ee3095..a906ce8cf83b 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -814,11 +814,59 @@ static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool en
 	return 0;
 }
 
+static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
+{
+	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&dma_domain->domain_lock, flags);
+	/* Ensure domain is inactive i.e. DMA should be disabled for the domain */
+	if (dma_domain->enabled) {
+		pr_debug("Can't set geometry attributes as domain is active\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return  -EBUSY;
+	}
+
+	/* Ensure that the geometry has been set for the domain */
+	if (!dma_domain->geom_size) {
+		pr_debug("Please configure geometry before setting the number of windows\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	/*
+	 * Ensure we have valid window count i.e. it should be less than
+	 * maximum permissible limit and should be a power of two.
+	 */
+	if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
+		pr_debug("Invalid window count\n");
+		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+		return -EINVAL;
+	}
+
+	ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
+				       w_count > 1 ? w_count : 0);
+	if (!ret) {
+		kfree(dma_domain->win_arr);
+		dma_domain->win_arr = kcalloc(w_count,
+					      sizeof(*dma_domain->win_arr),
+					      GFP_ATOMIC);
+		if (!dma_domain->win_arr) {
+			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+			return -ENOMEM;
+		}
+		dma_domain->win_cnt = w_count;
+	}
+	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+	return ret;
+}
+
 static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
 				    enum iommu_attr attr_type, void *data)
 {
 	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-	u32 *count;
 	int ret = 0;
 
 	switch (attr_type) {
@@ -832,13 +880,7 @@ static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
 		ret = configure_domain_dma_state(dma_domain, *(int *)data);
 		break;
 	case DOMAIN_ATTR_WINDOWS:
-		count = data;
-
-		if (domain->ops->domain_set_windows != NULL)
-			ret = domain->ops->domain_set_windows(domain, *count);
-		else
-			ret = -ENODEV;
-
+		ret = fsl_pamu_set_windows(domain, *(u32 *)data);
 		break;
 	default:
 		pr_debug("Unsupported attribute type\n");
@@ -853,7 +895,6 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain,
 				    enum iommu_attr attr_type, void *data)
 {
 	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-	u32 *count;
 	int ret = 0;
 
 	switch (attr_type) {
@@ -868,13 +909,7 @@ static int fsl_pamu_get_domain_attr(struct iommu_domain *domain,
 		*(int *)data = DOMAIN_ATTR_FSL_PAMUV1;
 		break;
 	case DOMAIN_ATTR_WINDOWS:
-		count = data;
-
-		if (domain->ops->domain_get_windows != NULL)
-			*count = domain->ops->domain_get_windows(domain);
-		else
-			ret = -ENODEV;
-
+		*(u32 *)data = dma_domain->win_cnt;
 		break;
 	default:
 		pr_debug("Unsupported attribute type\n");
@@ -1014,62 +1049,6 @@ static void fsl_pamu_remove_device(struct device *dev)
 	iommu_group_remove_device(dev);
 }
 
-static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
-{
-	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&dma_domain->domain_lock, flags);
-	/* Ensure domain is inactive i.e. DMA should be disabled for the domain */
-	if (dma_domain->enabled) {
-		pr_debug("Can't set geometry attributes as domain is active\n");
-		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-		return  -EBUSY;
-	}
-
-	/* Ensure that the geometry has been set for the domain */
-	if (!dma_domain->geom_size) {
-		pr_debug("Please configure geometry before setting the number of windows\n");
-		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-		return -EINVAL;
-	}
-
-	/*
-	 * Ensure we have valid window count i.e. it should be less than
-	 * maximum permissible limit and should be a power of two.
-	 */
-	if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
-		pr_debug("Invalid window count\n");
-		spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-		return -EINVAL;
-	}
-
-	ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
-				       w_count > 1 ? w_count : 0);
-	if (!ret) {
-		kfree(dma_domain->win_arr);
-		dma_domain->win_arr = kcalloc(w_count,
-					      sizeof(*dma_domain->win_arr),
-					      GFP_ATOMIC);
-		if (!dma_domain->win_arr) {
-			spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-			return -ENOMEM;
-		}
-		dma_domain->win_cnt = w_count;
-	}
-	spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-
-	return ret;
-}
-
-static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
-{
-	struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-
-	return dma_domain->win_cnt;
-}
-
 static const struct iommu_ops fsl_pamu_ops = {
 	.capable	= fsl_pamu_capable,
 	.domain_alloc	= fsl_pamu_domain_alloc,
@@ -1078,8 +1057,6 @@ static const struct iommu_ops fsl_pamu_ops = {
 	.detach_dev	= fsl_pamu_detach_device,
 	.domain_window_enable = fsl_pamu_window_enable,
 	.domain_window_disable = fsl_pamu_window_disable,
-	.domain_get_windows = fsl_pamu_get_windows,
-	.domain_set_windows = fsl_pamu_set_windows,
 	.iova_to_phys	= fsl_pamu_iova_to_phys,
 	.domain_set_attr = fsl_pamu_set_domain_attr,
 	.domain_get_attr = fsl_pamu_get_domain_attr,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c783648d4060..c08ba5d2d451 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -181,8 +181,6 @@ struct iommu_resv_region {
  * @apply_resv_region: Temporary helper call-back for iova reserved ranges
  * @domain_window_enable: Configure and enable a particular window for a domain
  * @domain_window_disable: Disable a particular window for a domain
- * @domain_set_windows: Set the number of windows for a domain
- * @domain_get_windows: Return the number of windows for a domain
  * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  */
@@ -223,10 +221,6 @@ struct iommu_ops {
 	int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
 				    phys_addr_t paddr, u64 size, int prot);
 	void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
-	/* Set the number of windows per domain */
-	int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
-	/* Get the number of windows per domain */
-	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
 	bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
-- 
cgit v1.2.3


From c2c729415b2d21329104fecaa86878d295f1041f Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Thu, 20 Sep 2018 13:17:42 -0600
Subject: coresight: platform: Cleanup coresight connection handling

The platform code parses the component connections and populates
a platform-description of the output connections in arrays of fields
(which is never freed). This is later copied in the coresight_register
to a newly allocated area, represented by coresight_connection(s).

This patch cleans up the code dealing with connections by making
use of the "coresight_connection" structure right at the platform
code and lets the generic driver simply re-use information provided
by the platform.

Thus making it reader friendly as well as avoiding the wastage of
unused memory.

Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight.c    | 21 +-----------
 drivers/hwtracing/coresight/of_coresight.c | 53 +++++++++++-------------------
 include/linux/coresight.h                  |  9 ++---
 3 files changed, 22 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 9fd0c387e678..5e8880ca8078 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -995,13 +995,11 @@ postcore_initcall(coresight_init);
 
 struct coresight_device *coresight_register(struct coresight_desc *desc)
 {
-	int i;
 	int ret;
 	int link_subtype;
 	int nr_refcnts = 1;
 	atomic_t *refcnts = NULL;
 	struct coresight_device *csdev;
-	struct coresight_connection *conns = NULL;
 
 	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
 	if (!csdev) {
@@ -1030,22 +1028,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 	csdev->nr_inport = desc->pdata->nr_inport;
 	csdev->nr_outport = desc->pdata->nr_outport;
 
-	/* Initialise connections if there is at least one outport */
-	if (csdev->nr_outport) {
-		conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
-		if (!conns) {
-			ret = -ENOMEM;
-			goto err_free_refcnts;
-		}
-
-		for (i = 0; i < csdev->nr_outport; i++) {
-			conns[i].outport = desc->pdata->outports[i];
-			conns[i].child_name = desc->pdata->child_names[i];
-			conns[i].child_port = desc->pdata->child_ports[i];
-		}
-	}
-
-	csdev->conns = conns;
+	csdev->conns = desc->pdata->conns;
 
 	csdev->type = desc->type;
 	csdev->subtype = desc->subtype;
@@ -1078,8 +1061,6 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
 
 	return csdev;
 
-err_free_refcnts:
-	kfree(refcnts);
 err_free_csdev:
 	kfree(csdev);
 err_out:
diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c
index 44903d35009f..e8fb4e124744 100644
--- a/drivers/hwtracing/coresight/of_coresight.c
+++ b/drivers/hwtracing/coresight/of_coresight.c
@@ -75,29 +75,13 @@ static void of_coresight_get_ports(const struct device_node *node,
 static int of_coresight_alloc_memory(struct device *dev,
 			struct coresight_platform_data *pdata)
 {
-	/* List of output port on this component */
-	pdata->outports = devm_kcalloc(dev,
-				       pdata->nr_outport,
-				       sizeof(*pdata->outports),
-				       GFP_KERNEL);
-	if (!pdata->outports)
-		return -ENOMEM;
-
-	/* Children connected to this component via @outports */
-	pdata->child_names = devm_kcalloc(dev,
-					  pdata->nr_outport,
-					  sizeof(*pdata->child_names),
-					  GFP_KERNEL);
-	if (!pdata->child_names)
-		return -ENOMEM;
-
-	/* Port number on the child this component is connected to */
-	pdata->child_ports = devm_kcalloc(dev,
-					  pdata->nr_outport,
-					  sizeof(*pdata->child_ports),
-					  GFP_KERNEL);
-	if (!pdata->child_ports)
-		return -ENOMEM;
+	if (pdata->nr_outport) {
+		pdata->conns = devm_kzalloc(dev, pdata->nr_outport *
+					    sizeof(*pdata->conns),
+					    GFP_KERNEL);
+		if (!pdata->conns)
+			return -ENOMEM;
+	}
 
 	return 0;
 }
@@ -121,7 +105,7 @@ EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
 
 /*
  * of_coresight_parse_endpoint : Parse the given output endpoint @ep
- * and fill the connection information in @pdata[@i].
+ * and fill the connection information in @conn
  *
  * Parses the local port, remote device name and the remote port.
  *
@@ -133,8 +117,7 @@ EXPORT_SYMBOL_GPL(of_coresight_get_cpu);
  */
 static int of_coresight_parse_endpoint(struct device *dev,
 				       struct device_node *ep,
-				       struct coresight_platform_data *pdata,
-				       int i)
+				       struct coresight_connection *conn)
 {
 	int ret = 0;
 	struct of_endpoint endpoint, rendpoint;
@@ -166,11 +149,11 @@ static int of_coresight_parse_endpoint(struct device *dev,
 			break;
 		}
 
-		pdata->outports[i] = endpoint.port;
-		pdata->child_names[i] = devm_kstrdup(dev,
-						     dev_name(rdev),
-						     GFP_KERNEL);
-		pdata->child_ports[i] = rendpoint.port;
+		conn->outport = endpoint.port;
+		conn->child_name = devm_kstrdup(dev,
+						dev_name(rdev),
+						GFP_KERNEL);
+		conn->child_port = rendpoint.port;
 		/* Connection record updated */
 		ret = 1;
 	} while (0);
@@ -189,8 +172,9 @@ struct coresight_platform_data *
 of_get_coresight_platform_data(struct device *dev,
 			       const struct device_node *node)
 {
-	int i = 0, ret = 0;
+	int ret = 0;
 	struct coresight_platform_data *pdata;
+	struct coresight_connection *conn;
 	struct device_node *ep = NULL;
 
 	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
@@ -212,6 +196,7 @@ of_get_coresight_platform_data(struct device *dev,
 	if (ret)
 		return ERR_PTR(ret);
 
+	conn = pdata->conns;
 	/* Iterate through each port to discover topology */
 	while ((ep = of_graph_get_next_endpoint(node, ep))) {
 		/*
@@ -221,10 +206,10 @@ of_get_coresight_platform_data(struct device *dev,
 		if (of_coresight_ep_is_input(ep))
 			continue;
 
-		ret = of_coresight_parse_endpoint(dev, ep, pdata, i);
+		ret = of_coresight_parse_endpoint(dev, ep, conn);
 		switch (ret) {
 		case 1:
-			i++;		/* Fall through */
+			conn++;		/* Fall through */
 		case 0:
 			break;
 		default:
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index d828a6efe0b1..41e1f4333bf2 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -94,20 +94,15 @@ union coresight_dev_subtype {
  * @cpu:	the CPU a source belongs to. Only applicable for ETM/PTMs.
  * @name:	name of the component as shown under sysfs.
  * @nr_inport:	number of input ports for this component.
- * @outports:	list of remote endpoint port number.
- * @child_names:name of all child components connected to this device.
- * @child_ports:child component port number the current component is
-		connected  to.
  * @nr_outport:	number of output ports for this component.
+ * @conns:	Array of nr_outport connections from this component
  */
 struct coresight_platform_data {
 	int cpu;
 	const char *name;
 	int nr_inport;
-	int *outports;
-	const char **child_names;
-	int *child_ports;
 	int nr_outport;
+	struct coresight_connection *conns;
 };
 
 /**
-- 
cgit v1.2.3


From 7ec786ad193beb5579223174e119805569a7af3b Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Thu, 20 Sep 2018 13:17:54 -0600
Subject: coresight: perf: Remove reset_buffer call back for sinks

Right now we issue an update_buffer() and reset_buffer() call backs
in succession when we stop tracing an event. The update_buffer is
supposed to check the status of the buffer and make sure the ring buffer
is updated with the trace data. And we store information about the
size of the data collected only to be consumed by the reset_buffer
callback which always follows the update_buffer. This was originally
designed for handling future IPs which could trigger a buffer overflow
interrupt. This patch gets rid of the reset_buffer callback altogether
and performs the actions in update_buffer, making it return the size
collected. We can always add the support for handling the overflow
interrupt case later.

This removes some not-so pretty hack (storing the new head in the
size field for snapshot mode) and cleans it up a little bit.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etb10.c    | 56 +++++------------------
 drivers/hwtracing/coresight/coresight-etm-perf.c |  9 +---
 drivers/hwtracing/coresight/coresight-tmc-etf.c  | 58 +++++-------------------
 include/linux/coresight.h                        |  6 +--
 4 files changed, 26 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index 3d4b6df32a06..dba75c905e57 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -319,37 +319,7 @@ static int etb_set_buffer(struct coresight_device *csdev,
 	return ret;
 }
 
-static unsigned long etb_reset_buffer(struct coresight_device *csdev,
-				      struct perf_output_handle *handle,
-				      void *sink_config)
-{
-	unsigned long size = 0;
-	struct cs_buffers *buf = sink_config;
-
-	if (buf) {
-		/*
-		 * In snapshot mode ->data_size holds the new address of the
-		 * ring buffer's head.  The size itself is the whole address
-		 * range since we want the latest information.
-		 */
-		if (buf->snapshot)
-			handle->head = local_xchg(&buf->data_size,
-						  buf->nr_pages << PAGE_SHIFT);
-
-		/*
-		 * Tell the tracer PMU how much we got in this run and if
-		 * something went wrong along the way.  Nobody else can use
-		 * this cs_buffers instance until we are done.  As such
-		 * resetting parameters here and squaring off with the ring
-		 * buffer API in the tracer PMU is fine.
-		 */
-		size = local_xchg(&buf->data_size, 0);
-	}
-
-	return size;
-}
-
-static void etb_update_buffer(struct coresight_device *csdev,
+static unsigned long etb_update_buffer(struct coresight_device *csdev,
 			      struct perf_output_handle *handle,
 			      void *sink_config)
 {
@@ -358,13 +328,13 @@ static void etb_update_buffer(struct coresight_device *csdev,
 	u8 *buf_ptr;
 	const u32 *barrier;
 	u32 read_ptr, write_ptr, capacity;
-	u32 status, read_data, to_read;
-	unsigned long offset;
+	u32 status, read_data;
+	unsigned long offset, to_read;
 	struct cs_buffers *buf = sink_config;
 	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	if (!buf)
-		return;
+		return 0;
 
 	capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS;
 
@@ -469,18 +439,17 @@ static void etb_update_buffer(struct coresight_device *csdev,
 	writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER);
 
 	/*
-	 * In snapshot mode all we have to do is communicate to
-	 * perf_aux_output_end() the address of the current head.  In full
-	 * trace mode the same function expects a size to move rb->aux_head
-	 * forward.
+	 * In snapshot mode we have to update the handle->head to point
+	 * to the new location.
 	 */
-	if (buf->snapshot)
-		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
-	else
-		local_add(to_read, &buf->data_size);
-
+	if (buf->snapshot) {
+		handle->head = (cur * PAGE_SIZE) + offset;
+		to_read = buf->nr_pages << PAGE_SHIFT;
+	}
 	etb_enable_hw(drvdata);
 	CS_LOCK(drvdata->base);
+
+	return to_read;
 }
 
 static const struct coresight_ops_sink etb_sink_ops = {
@@ -489,7 +458,6 @@ static const struct coresight_ops_sink etb_sink_ops = {
 	.alloc_buffer	= etb_alloc_buffer,
 	.free_buffer	= etb_free_buffer,
 	.set_buffer	= etb_set_buffer,
-	.reset_buffer	= etb_reset_buffer,
 	.update_buffer	= etb_update_buffer,
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 6db76ce6ba5f..ad87441f65d7 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -369,15 +369,8 @@ static void etm_event_stop(struct perf_event *event, int mode)
 		if (!sink_ops(sink)->update_buffer)
 			return;
 
-		sink_ops(sink)->update_buffer(sink, handle,
+		size = sink_ops(sink)->update_buffer(sink, handle,
 					      event_data->snk_config);
-
-		if (!sink_ops(sink)->reset_buffer)
-			return;
-
-		size = sink_ops(sink)->reset_buffer(sink, handle,
-						    event_data->snk_config);
-
 		perf_aux_output_end(handle, size);
 	}
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 434003a43346..31a98f915641 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -349,36 +349,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev,
 	return ret;
 }
 
-static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev,
-					  struct perf_output_handle *handle,
-					  void *sink_config)
-{
-	long size = 0;
-	struct cs_buffers *buf = sink_config;
-
-	if (buf) {
-		/*
-		 * In snapshot mode ->data_size holds the new address of the
-		 * ring buffer's head.  The size itself is the whole address
-		 * range since we want the latest information.
-		 */
-		if (buf->snapshot)
-			handle->head = local_xchg(&buf->data_size,
-						  buf->nr_pages << PAGE_SHIFT);
-		/*
-		 * Tell the tracer PMU how much we got in this run and if
-		 * something went wrong along the way.  Nobody else can use
-		 * this cs_buffers instance until we are done.  As such
-		 * resetting parameters here and squaring off with the ring
-		 * buffer API in the tracer PMU is fine.
-		 */
-		size = local_xchg(&buf->data_size, 0);
-	}
-
-	return size;
-}
-
-static void tmc_update_etf_buffer(struct coresight_device *csdev,
+static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev,
 				  struct perf_output_handle *handle,
 				  void *sink_config)
 {
@@ -387,17 +358,17 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 	const u32 *barrier;
 	u32 *buf_ptr;
 	u64 read_ptr, write_ptr;
-	u32 status, to_read;
-	unsigned long offset;
+	u32 status;
+	unsigned long offset, to_read;
 	struct cs_buffers *buf = sink_config;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
 	if (!buf)
-		return;
+		return 0;
 
 	/* This shouldn't happen */
 	if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF))
-		return;
+		return 0;
 
 	CS_UNLOCK(drvdata->base);
 
@@ -486,18 +457,14 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev,
 		}
 	}
 
-	/*
-	 * In snapshot mode all we have to do is communicate to
-	 * perf_aux_output_end() the address of the current head.  In full
-	 * trace mode the same function expects a size to move rb->aux_head
-	 * forward.
-	 */
-	if (buf->snapshot)
-		local_set(&buf->data_size, (cur * PAGE_SIZE) + offset);
-	else
-		local_add(to_read, &buf->data_size);
-
+	/* In snapshot mode we have to update the head */
+	if (buf->snapshot) {
+		handle->head = (cur * PAGE_SIZE) + offset;
+		to_read = buf->nr_pages << PAGE_SHIFT;
+	}
 	CS_LOCK(drvdata->base);
+
+	return to_read;
 }
 
 static const struct coresight_ops_sink tmc_etf_sink_ops = {
@@ -506,7 +473,6 @@ static const struct coresight_ops_sink tmc_etf_sink_ops = {
 	.alloc_buffer	= tmc_alloc_etf_buffer,
 	.free_buffer	= tmc_free_etf_buffer,
 	.set_buffer	= tmc_set_etf_buffer,
-	.reset_buffer	= tmc_reset_etf_buffer,
 	.update_buffer	= tmc_update_etf_buffer,
 };
 
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 41e1f4333bf2..8e52682b1e90 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -186,7 +186,6 @@ struct coresight_device {
  * @alloc_buffer:	initialises perf's ring buffer for trace collection.
  * @free_buffer:	release memory allocated in @get_config.
  * @set_buffer:		initialises buffer mechanic before a trace session.
- * @reset_buffer:	finalises buffer mechanic after a trace session.
  * @update_buffer:	update buffer pointers after a trace session.
  */
 struct coresight_ops_sink {
@@ -198,10 +197,7 @@ struct coresight_ops_sink {
 	int (*set_buffer)(struct coresight_device *csdev,
 			  struct perf_output_handle *handle,
 			  void *sink_config);
-	unsigned long (*reset_buffer)(struct coresight_device *csdev,
-				      struct perf_output_handle *handle,
-				      void *sink_config);
-	void (*update_buffer)(struct coresight_device *csdev,
+	unsigned long (*update_buffer)(struct coresight_device *csdev,
 			      struct perf_output_handle *handle,
 			      void *sink_config);
 };
-- 
cgit v1.2.3


From 3d6e8935758392179645e1b105789b3da329ad38 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Thu, 20 Sep 2018 13:17:56 -0600
Subject: coresight: perf: Remove set_buffer call back

In coresight perf mode, we need to prepare the sink before
starting a session, which is done via set_buffer call back.
We then proceed to enable the tracing. If we fail to start
the session successfully, we leave the sink configuration
unchanged.  In order to make the operation atomic and to
avoid yet another call back to clear the buffer, we get
rid of the "set_buffer" call back and pass the buffer details
via enable() call back to the sink.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-etb10.c    | 32 ++++++++++++++++++------
 drivers/hwtracing/coresight/coresight-etm-perf.c |  9 ++-----
 drivers/hwtracing/coresight/coresight-priv.h     |  2 +-
 drivers/hwtracing/coresight/coresight-tmc-etf.c  | 28 ++++++++++++++-------
 drivers/hwtracing/coresight/coresight-tmc-etr.c  |  7 +++---
 drivers/hwtracing/coresight/coresight-tpiu.c     |  2 +-
 drivers/hwtracing/coresight/coresight.c          | 11 ++++----
 include/linux/coresight.h                        |  6 +----
 8 files changed, 59 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c
index dba75c905e57..9fd77fdc1244 100644
--- a/drivers/hwtracing/coresight/coresight-etb10.c
+++ b/drivers/hwtracing/coresight/coresight-etb10.c
@@ -28,6 +28,7 @@
 
 
 #include "coresight-priv.h"
+#include "coresight-etm-perf.h"
 
 #define ETB_RAM_DEPTH_REG	0x004
 #define ETB_STATUS_REG		0x00c
@@ -90,6 +91,9 @@ struct etb_drvdata {
 	u32			trigger_cntr;
 };
 
+static int etb_set_buffer(struct coresight_device *csdev,
+			  struct perf_output_handle *handle);
+
 static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata)
 {
 	u32 depth = 0;
@@ -131,12 +135,24 @@ static void etb_enable_hw(struct etb_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static int etb_enable(struct coresight_device *csdev, u32 mode)
+static int etb_enable(struct coresight_device *csdev, u32 mode, void *data)
 {
+	int ret = 0;
 	u32 val;
 	unsigned long flags;
 	struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
+	/*
+	 * We don't have an internal state to clean up if we fail to setup
+	 * the perf buffer. So we can perform the step before we turn the
+	 * ETB on and leave without cleaning up.
+	 */
+	if (mode == CS_MODE_PERF) {
+		ret = etb_set_buffer(csdev, (struct perf_output_handle *)data);
+		if (ret)
+			goto out;
+	}
+
 	val = local_cmpxchg(&drvdata->mode,
 			    CS_MODE_DISABLED, mode);
 	/*
@@ -160,8 +176,9 @@ static int etb_enable(struct coresight_device *csdev, u32 mode)
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 out:
-	dev_dbg(drvdata->dev, "ETB enabled\n");
-	return 0;
+	if (!ret)
+		dev_dbg(drvdata->dev, "ETB enabled\n");
+	return ret;
 }
 
 static void etb_disable_hw(struct etb_drvdata *drvdata)
@@ -298,12 +315,14 @@ static void etb_free_buffer(void *config)
 }
 
 static int etb_set_buffer(struct coresight_device *csdev,
-			  struct perf_output_handle *handle,
-			  void *sink_config)
+			  struct perf_output_handle *handle)
 {
 	int ret = 0;
 	unsigned long head;
-	struct cs_buffers *buf = sink_config;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	if (!buf)
+		return -EINVAL;
 
 	/* wrap head around to the amount of space we have */
 	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
@@ -457,7 +476,6 @@ static const struct coresight_ops_sink etb_sink_ops = {
 	.disable	= etb_disable,
 	.alloc_buffer	= etb_alloc_buffer,
 	.free_buffer	= etb_free_buffer,
-	.set_buffer	= etb_set_buffer,
 	.update_buffer	= etb_update_buffer,
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 16b83d8b2ac2..abe8249b893b 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -286,16 +286,11 @@ static void etm_event_start(struct perf_event *event, int flags)
 	path = etm_event_cpu_path(event_data, cpu);
 	/* We need a sink, no need to continue without one */
 	sink = coresight_get_sink(path);
-	if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer))
-		goto fail_end_stop;
-
-	/* Configure the sink */
-	if (sink_ops(sink)->set_buffer(sink, handle,
-				       event_data->snk_config))
+	if (WARN_ON_ONCE(!sink))
 		goto fail_end_stop;
 
 	/* Nothing will happen without a path */
-	if (coresight_enable_path(path, CS_MODE_PERF))
+	if (coresight_enable_path(path, CS_MODE_PERF, handle))
 		goto fail_end_stop;
 
 	/* Tell the perf core the event is alive */
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 1a6cf3589866..c11da5564a67 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -137,7 +137,7 @@ static inline void coresight_write_reg_pair(void __iomem *addr, u64 val,
 }
 
 void coresight_disable_path(struct list_head *path);
-int coresight_enable_path(struct list_head *path, u32 mode);
+int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data);
 struct coresight_device *coresight_get_sink(struct list_head *path);
 struct coresight_device *coresight_get_enabled_sink(bool reset);
 struct list_head *coresight_build_path(struct coresight_device *csdev,
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c
index 31a98f915641..4156c95ce1bb 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etf.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c
@@ -10,6 +10,10 @@
 #include <linux/slab.h>
 #include "coresight-priv.h"
 #include "coresight-tmc.h"
+#include "coresight-etm-perf.h"
+
+static int tmc_set_etf_buffer(struct coresight_device *csdev,
+			      struct perf_output_handle *handle);
 
 static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 {
@@ -182,11 +186,12 @@ out:
 	return ret;
 }
 
-static int tmc_enable_etf_sink_perf(struct coresight_device *csdev)
+static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data)
 {
 	int ret = 0;
 	unsigned long flags;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+	struct perf_output_handle *handle = data;
 
 	spin_lock_irqsave(&drvdata->spinlock, flags);
 	if (drvdata->reading) {
@@ -204,15 +209,19 @@ static int tmc_enable_etf_sink_perf(struct coresight_device *csdev)
 		goto out;
 	}
 
-	drvdata->mode = CS_MODE_PERF;
-	tmc_etb_enable_hw(drvdata);
+	ret = tmc_set_etf_buffer(csdev, handle);
+	if (!ret) {
+		drvdata->mode = CS_MODE_PERF;
+		tmc_etb_enable_hw(drvdata);
+	}
 out:
 	spin_unlock_irqrestore(&drvdata->spinlock, flags);
 
 	return ret;
 }
 
-static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode)
+static int tmc_enable_etf_sink(struct coresight_device *csdev,
+			       u32 mode, void *data)
 {
 	int ret;
 	struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -222,7 +231,7 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode)
 		ret = tmc_enable_etf_sink_sysfs(csdev);
 		break;
 	case CS_MODE_PERF:
-		ret = tmc_enable_etf_sink_perf(csdev);
+		ret = tmc_enable_etf_sink_perf(csdev, data);
 		break;
 	/* We shouldn't be here */
 	default:
@@ -328,12 +337,14 @@ static void tmc_free_etf_buffer(void *config)
 }
 
 static int tmc_set_etf_buffer(struct coresight_device *csdev,
-			      struct perf_output_handle *handle,
-			      void *sink_config)
+			      struct perf_output_handle *handle)
 {
 	int ret = 0;
 	unsigned long head;
-	struct cs_buffers *buf = sink_config;
+	struct cs_buffers *buf = etm_perf_sink_config(handle);
+
+	if (!buf)
+		return -EINVAL;
 
 	/* wrap head around to the amount of space we have */
 	head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
@@ -472,7 +483,6 @@ static const struct coresight_ops_sink tmc_etf_sink_ops = {
 	.disable	= tmc_disable_etf_sink,
 	.alloc_buffer	= tmc_alloc_etf_buffer,
 	.free_buffer	= tmc_free_etf_buffer,
-	.set_buffer	= tmc_set_etf_buffer,
 	.update_buffer	= tmc_update_etf_buffer,
 };
 
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 5e9bb2f0e9c0..1aedfc3629c0 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -1103,19 +1103,20 @@ out:
 	return ret;
 }
 
-static int tmc_enable_etr_sink_perf(struct coresight_device *csdev)
+static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
 {
 	/* We don't support perf mode yet ! */
 	return -EINVAL;
 }
 
-static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode)
+static int tmc_enable_etr_sink(struct coresight_device *csdev,
+			       u32 mode, void *data)
 {
 	switch (mode) {
 	case CS_MODE_SYSFS:
 		return tmc_enable_etr_sink_sysfs(csdev);
 	case CS_MODE_PERF:
-		return tmc_enable_etr_sink_perf(csdev);
+		return tmc_enable_etr_sink_perf(csdev, data);
 	}
 
 	/* We shouldn't be here */
diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c
index ce0b84583861..b2f72a1fa402 100644
--- a/drivers/hwtracing/coresight/coresight-tpiu.c
+++ b/drivers/hwtracing/coresight/coresight-tpiu.c
@@ -68,7 +68,7 @@ static void tpiu_enable_hw(struct tpiu_drvdata *drvdata)
 	CS_LOCK(drvdata->base);
 }
 
-static int tpiu_enable(struct coresight_device *csdev, u32 mode)
+static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused)
 {
 	struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
 
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 07382c55b31d..e73ca6af4765 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -128,7 +128,8 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 	return -ENODEV;
 }
 
-static int coresight_enable_sink(struct coresight_device *csdev, u32 mode)
+static int coresight_enable_sink(struct coresight_device *csdev,
+				 u32 mode, void *data)
 {
 	int ret;
 
@@ -137,7 +138,7 @@ static int coresight_enable_sink(struct coresight_device *csdev, u32 mode)
 	 * existing "mode" of operation.
 	 */
 	if (sink_ops(csdev)->enable) {
-		ret = sink_ops(csdev)->enable(csdev, mode);
+		ret = sink_ops(csdev)->enable(csdev, mode, data);
 		if (ret)
 			return ret;
 		csdev->enable = true;
@@ -315,7 +316,7 @@ void coresight_disable_path(struct list_head *path)
 	}
 }
 
-int coresight_enable_path(struct list_head *path, u32 mode)
+int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data)
 {
 
 	int ret = 0;
@@ -340,7 +341,7 @@ int coresight_enable_path(struct list_head *path, u32 mode)
 
 		switch (type) {
 		case CORESIGHT_DEV_TYPE_SINK:
-			ret = coresight_enable_sink(csdev, mode);
+			ret = coresight_enable_sink(csdev, mode, sink_data);
 			/*
 			 * Sink is the first component turned on. If we
 			 * failed to enable the sink, there are no components
@@ -643,7 +644,7 @@ int coresight_enable(struct coresight_device *csdev)
 		goto out;
 	}
 
-	ret = coresight_enable_path(path, CS_MODE_SYSFS);
+	ret = coresight_enable_path(path, CS_MODE_SYSFS, NULL);
 	if (ret)
 		goto err_path;
 
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 8e52682b1e90..53535821dc25 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -185,18 +185,14 @@ struct coresight_device {
  * @disable:		disables the sink.
  * @alloc_buffer:	initialises perf's ring buffer for trace collection.
  * @free_buffer:	release memory allocated in @get_config.
- * @set_buffer:		initialises buffer mechanic before a trace session.
  * @update_buffer:	update buffer pointers after a trace session.
  */
 struct coresight_ops_sink {
-	int (*enable)(struct coresight_device *csdev, u32 mode);
+	int (*enable)(struct coresight_device *csdev, u32 mode, void *data);
 	void (*disable)(struct coresight_device *csdev);
 	void *(*alloc_buffer)(struct coresight_device *csdev, int cpu,
 			      void **pages, int nr_pages, bool overwrite);
 	void (*free_buffer)(void *config);
-	int (*set_buffer)(struct coresight_device *csdev,
-			  struct perf_output_handle *handle,
-			  void *sink_config);
 	unsigned long (*update_buffer)(struct coresight_device *csdev,
 			      struct perf_output_handle *handle,
 			      void *sink_config);
-- 
cgit v1.2.3


From 2478a6ae4a6a4c8e3f7e9f6f849dffe92e5238e1 Mon Sep 17 00:00:00 2001
From: Suzuki K Poulose <suzuki.poulose@arm.com>
Date: Thu, 20 Sep 2018 13:18:11 -0600
Subject: coresight: Add support for CLAIM tag protocol

Coresight architecture defines CLAIM tags for a device to negotiate
control of the components (external agent vs self-hosted). Each device
has a pair of registers (CLAIMSET & CLAIMCLR) for managing the CLAIM
tags. However, the protocol for the CLAIM tags is IMPLEMENTATION DEFINED.
PSCI has recommendations for the use of the CLAIM tags to negotiate
controls for external agent vs self-hosted use. This patch implements
the recommended protocol by PSCI.

The claim/disclaim operations are performed from the device specific
drivers. The disadvantage is that the calls are sprinkled in each driver,
but this makes the operation much simpler.

Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hwtracing/coresight/coresight-priv.h |  7 +++
 drivers/hwtracing/coresight/coresight.c      | 86 ++++++++++++++++++++++++++++
 include/linux/coresight.h                    | 20 +++++++
 3 files changed, 113 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index c11da5564a67..579f34943bf1 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -25,6 +25,13 @@
 #define CORESIGHT_DEVID		0xfc8
 #define CORESIGHT_DEVTYPE	0xfcc
 
+
+/*
+ * Coresight device CLAIM protocol.
+ * See PSCI - ARM DEN 0022D, Section: 6.8.1 Debug and Trace save and restore.
+ */
+#define CORESIGHT_CLAIM_SELF_HOSTED	BIT(1)
+
 #define TIMEOUT_US		100
 #define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
 
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index f4f50753cf75..2b0df1a0a8df 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -128,6 +128,92 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
 	return -ENODEV;
 }
 
+static inline u32 coresight_read_claim_tags(void __iomem *base)
+{
+	return readl_relaxed(base + CORESIGHT_CLAIMCLR);
+}
+
+static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
+{
+	return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
+}
+
+static inline bool coresight_is_claimed_any(void __iomem *base)
+{
+	return coresight_read_claim_tags(base) != 0;
+}
+
+static inline void coresight_set_claim_tags(void __iomem *base)
+{
+	writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMSET);
+	isb();
+}
+
+static inline void coresight_clear_claim_tags(void __iomem *base)
+{
+	writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMCLR);
+	isb();
+}
+
+/*
+ * coresight_claim_device_unlocked : Claim the device for self-hosted usage
+ * to prevent an external tool from touching this device. As per PSCI
+ * standards, section "Preserving the execution context" => "Debug and Trace
+ * save and Restore", DBGCLAIM[1] is reserved for Self-hosted debug/trace and
+ * DBGCLAIM[0] is reserved for external tools.
+ *
+ * Called with CS_UNLOCKed for the component.
+ * Returns : 0 on success
+ */
+int coresight_claim_device_unlocked(void __iomem *base)
+{
+	if (coresight_is_claimed_any(base))
+		return -EBUSY;
+
+	coresight_set_claim_tags(base);
+	if (coresight_is_claimed_self_hosted(base))
+		return 0;
+	/* There was a race setting the tags, clean up and fail */
+	coresight_clear_claim_tags(base);
+	return -EBUSY;
+}
+
+int coresight_claim_device(void __iomem *base)
+{
+	int rc;
+
+	CS_UNLOCK(base);
+	rc = coresight_claim_device_unlocked(base);
+	CS_LOCK(base);
+
+	return rc;
+}
+
+/*
+ * coresight_disclaim_device_unlocked : Clear the claim tags for the device.
+ * Called with CS_UNLOCKed for the component.
+ */
+void coresight_disclaim_device_unlocked(void __iomem *base)
+{
+
+	if (coresight_is_claimed_self_hosted(base))
+		coresight_clear_claim_tags(base);
+	else
+		/*
+		 * The external agent may have not honoured our claim
+		 * and has manipulated it. Or something else has seriously
+		 * gone wrong in our driver.
+		 */
+		WARN_ON_ONCE(1);
+}
+
+void coresight_disclaim_device(void __iomem *base)
+{
+	CS_UNLOCK(base);
+	coresight_disclaim_device_unlocked(base);
+	CS_LOCK(base);
+}
+
 static int coresight_enable_sink(struct coresight_device *csdev,
 				 u32 mode, void *data)
 {
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 53535821dc25..46c67a764877 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -257,6 +257,13 @@ extern int coresight_enable(struct coresight_device *csdev);
 extern void coresight_disable(struct coresight_device *csdev);
 extern int coresight_timeout(void __iomem *addr, u32 offset,
 			     int position, int value);
+
+extern int coresight_claim_device(void __iomem *base);
+extern int coresight_claim_device_unlocked(void __iomem *base);
+
+extern void coresight_disclaim_device(void __iomem *base);
+extern void coresight_disclaim_device_unlocked(void __iomem *base);
+
 #else
 static inline struct coresight_device *
 coresight_register(struct coresight_desc *desc) { return NULL; }
@@ -266,6 +273,19 @@ coresight_enable(struct coresight_device *csdev) { return -ENOSYS; }
 static inline void coresight_disable(struct coresight_device *csdev) {}
 static inline int coresight_timeout(void __iomem *addr, u32 offset,
 				     int position, int value) { return 1; }
+static inline int coresight_claim_device_unlocked(void __iomem *base)
+{
+	return -EINVAL;
+}
+
+static inline int coresight_claim_device(void __iomem *base)
+{
+	return -EINVAL;
+}
+
+static inline void coresight_disclaim_device(void __iomem *base) {}
+static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
+
 #endif
 
 #ifdef CONFIG_OF
-- 
cgit v1.2.3


From 800b932969c53c4044ff9f9fd1ee793a87fa8ef0 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 14 Sep 2018 09:10:15 -0700
Subject: vmbus: pass channel to hv_process_channel_removal

Rather than passing relid and then looking up the channel.
Pass the channel directly, since caller already knows it.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c      |  3 +--
 drivers/hv/channel_mgmt.c | 17 +++++------------
 drivers/hv/vmbus_drv.c    |  3 +--
 include/linux/hyperv.h    |  2 +-
 4 files changed, 8 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 741857d80da1..33e6db02dbab 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -690,8 +690,7 @@ void vmbus_close(struct vmbus_channel *channel)
 			wait_for_completion(&cur_channel->rescind_event);
 			mutex_lock(&vmbus_connection.channel_mutex);
 			vmbus_close_internal(cur_channel);
-			hv_process_channel_removal(
-					   cur_channel->offermsg.child_relid);
+			hv_process_channel_removal(cur_channel);
 		} else {
 			mutex_lock(&vmbus_connection.channel_mutex);
 			vmbus_close_internal(cur_channel);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 0f0e091c117c..b7c48ebdf6a1 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -385,21 +385,14 @@ static void vmbus_release_relid(u32 relid)
 	trace_vmbus_release_relid(&msg, ret);
 }
 
-void hv_process_channel_removal(u32 relid)
+void hv_process_channel_removal(struct vmbus_channel *channel)
 {
+	struct vmbus_channel *primary_channel;
 	unsigned long flags;
-	struct vmbus_channel *primary_channel, *channel;
 
 	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
-
-	/*
-	 * Make sure channel is valid as we may have raced.
-	 */
-	channel = relid2channel(relid);
-	if (!channel)
-		return;
-
 	BUG_ON(!channel->rescind);
+
 	if (channel->target_cpu != get_cpu()) {
 		put_cpu();
 		smp_call_function_single(channel->target_cpu,
@@ -429,7 +422,7 @@ void hv_process_channel_removal(u32 relid)
 		cpumask_clear_cpu(channel->target_cpu,
 				  &primary_channel->alloced_cpus_in_node);
 
-	vmbus_release_relid(relid);
+	vmbus_release_relid(channel->offermsg.child_relid);
 
 	free_channel(channel);
 }
@@ -943,7 +936,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 			 * The channel is currently not open;
 			 * it is safe for us to cleanup the channel.
 			 */
-			hv_process_channel_removal(rescind->child_relid);
+			hv_process_channel_removal(channel);
 		} else {
 			complete(&channel->rescind_event);
 		}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 4bbc420d1213..283d184280af 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -864,10 +864,9 @@ static void vmbus_device_release(struct device *device)
 	struct vmbus_channel *channel = hv_dev->channel;
 
 	mutex_lock(&vmbus_connection.channel_mutex);
-	hv_process_channel_removal(channel->offermsg.child_relid);
+	hv_process_channel_removal(channel);
 	mutex_unlock(&vmbus_connection.channel_mutex);
 	kfree(hv_dev);
-
 }
 
 /* The one and only one */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 2c3798bcb01c..6c4575c7f46b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1443,7 +1443,7 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
 				const int *srv_version, int srv_vercnt,
 				int *nego_fw_version, int *nego_srv_version);
 
-void hv_process_channel_removal(u32 relid);
+void hv_process_channel_removal(struct vmbus_channel *channel);
 
 void vmbus_setevent(struct vmbus_channel *channel);
 /*
-- 
cgit v1.2.3


From 52a42c2a90226dc61c99bbd0cb096deeb52c334b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 14 Sep 2018 09:10:16 -0700
Subject: vmbus: keep pointer to ring buffer page

Avoid going from struct page to virt address (and back) by just
keeping pointer to the allocated pages instead of virt address.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c         | 20 +++++++++-----------
 drivers/uio/uio_hv_generic.c |  5 +++--
 include/linux/hyperv.h       |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 33e6db02dbab..56ec0d96d876 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -91,11 +91,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	unsigned long flags;
 	int ret, err = 0;
 	struct page *page;
+	unsigned int order;
 
 	if (send_ringbuffer_size % PAGE_SIZE ||
 	    recv_ringbuffer_size % PAGE_SIZE)
 		return -EINVAL;
 
+	order = get_order(send_ringbuffer_size + recv_ringbuffer_size);
+
 	spin_lock_irqsave(&newchannel->lock, flags);
 	if (newchannel->state == CHANNEL_OPEN_STATE) {
 		newchannel->state = CHANNEL_OPENING_STATE;
@@ -110,21 +113,17 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	/* Allocate the ring buffer */
 	page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
-				GFP_KERNEL|__GFP_ZERO,
-				get_order(send_ringbuffer_size +
-				recv_ringbuffer_size));
+				GFP_KERNEL|__GFP_ZERO, order);
 
 	if (!page)
-		page = alloc_pages(GFP_KERNEL|__GFP_ZERO,
-				   get_order(send_ringbuffer_size +
-					     recv_ringbuffer_size));
+		page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
 
 	if (!page) {
 		err = -ENOMEM;
 		goto error_set_chnstate;
 	}
 
-	newchannel->ringbuffer_pages = page_address(page);
+	newchannel->ringbuffer_page = page;
 	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
 					   recv_ringbuffer_size) >> PAGE_SHIFT;
 
@@ -239,8 +238,7 @@ error_free_gpadl:
 error_free_pages:
 	hv_ringbuffer_cleanup(&newchannel->outbound);
 	hv_ringbuffer_cleanup(&newchannel->inbound);
-	__free_pages(page,
-		     get_order(send_ringbuffer_size + recv_ringbuffer_size));
+	__free_pages(page, order);
 error_set_chnstate:
 	newchannel->state = CHANNEL_OPEN_STATE;
 	return err;
@@ -658,8 +656,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	hv_ringbuffer_cleanup(&channel->outbound);
 	hv_ringbuffer_cleanup(&channel->inbound);
 
-	free_pages((unsigned long)channel->ringbuffer_pages,
-		get_order(channel->ringbuffer_pagecount * PAGE_SIZE));
+	__free_pages(channel->ringbuffer_page,
+		     get_order(channel->ringbuffer_pagecount << PAGE_SHIFT));
 
 out:
 	return ret;
diff --git a/drivers/uio/uio_hv_generic.c b/drivers/uio/uio_hv_generic.c
index a08860260f55..ba67a5267557 100644
--- a/drivers/uio/uio_hv_generic.c
+++ b/drivers/uio/uio_hv_generic.c
@@ -130,11 +130,12 @@ static int hv_uio_ring_mmap(struct file *filp, struct kobject *kobj,
 		= container_of(kobj, struct vmbus_channel, kobj);
 	struct hv_device *dev = channel->primary_channel->device_obj;
 	u16 q_idx = channel->offermsg.offer.sub_channel_index;
+	void *ring_buffer = page_address(channel->ringbuffer_page);
 
 	dev_dbg(&dev->device, "mmap channel %u pages %#lx at %#lx\n",
 		q_idx, vma_pages(vma), vma->vm_pgoff);
 
-	return vm_iomap_memory(vma, virt_to_phys(channel->ringbuffer_pages),
+	return vm_iomap_memory(vma, virt_to_phys(ring_buffer),
 			       channel->ringbuffer_pagecount << PAGE_SHIFT);
 }
 
@@ -223,7 +224,7 @@ hv_uio_probe(struct hv_device *dev,
 	/* mem resources */
 	pdata->info.mem[TXRX_RING_MAP].name = "txrx_rings";
 	pdata->info.mem[TXRX_RING_MAP].addr
-		= (uintptr_t)dev->channel->ringbuffer_pages;
+		= (uintptr_t)page_address(dev->channel->ringbuffer_page);
 	pdata->info.mem[TXRX_RING_MAP].size
 		= dev->channel->ringbuffer_pagecount << PAGE_SHIFT;
 	pdata->info.mem[TXRX_RING_MAP].memtype = UIO_MEM_LOGICAL;
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 6c4575c7f46b..a6c32d2d090b 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -739,7 +739,7 @@ struct vmbus_channel {
 	u32 ringbuffer_gpadlhandle;
 
 	/* Allocated memory for ring buffer */
-	void *ringbuffer_pages;
+	struct page *ringbuffer_page;
 	u32 ringbuffer_pagecount;
 	struct hv_ring_buffer_info outbound;	/* send to parent */
 	struct hv_ring_buffer_info inbound;	/* receive from parent */
-- 
cgit v1.2.3


From ae6935ed7d424ffa74d634da00767e7b03c98fd3 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 14 Sep 2018 09:10:17 -0700
Subject: vmbus: split ring buffer allocation from open

The UIO driver needs the ring buffer to be persistent(reused)
across open/close. Split the allocation and setup of ring buffer
out of vmbus_open. For normal usage vmbus_open/vmbus_close there
are no changes; only impacts uio_hv_generic which needs to keep
ring buffer memory and reuse when application restarts.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/channel.c     | 267 +++++++++++++++++++++++++++--------------------
 drivers/hv/ring_buffer.c |   1 +
 include/linux/hyperv.h   |   9 ++
 3 files changed, 162 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 56ec0d96d876..ddadb7efd1cc 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -79,84 +79,96 @@ void vmbus_setevent(struct vmbus_channel *channel)
 }
 EXPORT_SYMBOL_GPL(vmbus_setevent);
 
-/*
- * vmbus_open - Open the specified channel.
- */
-int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
-		     u32 recv_ringbuffer_size, void *userdata, u32 userdatalen,
-		     void (*onchannelcallback)(void *context), void *context)
+/* vmbus_free_ring - drop mapping of ring buffer */
+void vmbus_free_ring(struct vmbus_channel *channel)
 {
-	struct vmbus_channel_open_channel *open_msg;
-	struct vmbus_channel_msginfo *open_info = NULL;
-	unsigned long flags;
-	int ret, err = 0;
-	struct page *page;
-	unsigned int order;
+	hv_ringbuffer_cleanup(&channel->outbound);
+	hv_ringbuffer_cleanup(&channel->inbound);
 
-	if (send_ringbuffer_size % PAGE_SIZE ||
-	    recv_ringbuffer_size % PAGE_SIZE)
-		return -EINVAL;
+	if (channel->ringbuffer_page) {
+		__free_pages(channel->ringbuffer_page,
+			     get_order(channel->ringbuffer_pagecount
+				       << PAGE_SHIFT));
+		channel->ringbuffer_page = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(vmbus_free_ring);
 
-	order = get_order(send_ringbuffer_size + recv_ringbuffer_size);
+/* vmbus_alloc_ring - allocate and map pages for ring buffer */
+int vmbus_alloc_ring(struct vmbus_channel *newchannel,
+		     u32 send_size, u32 recv_size)
+{
+	struct page *page;
+	int order;
 
-	spin_lock_irqsave(&newchannel->lock, flags);
-	if (newchannel->state == CHANNEL_OPEN_STATE) {
-		newchannel->state = CHANNEL_OPENING_STATE;
-	} else {
-		spin_unlock_irqrestore(&newchannel->lock, flags);
+	if (send_size % PAGE_SIZE || recv_size % PAGE_SIZE)
 		return -EINVAL;
-	}
-	spin_unlock_irqrestore(&newchannel->lock, flags);
-
-	newchannel->onchannel_callback = onchannelcallback;
-	newchannel->channel_callback_context = context;
 
 	/* Allocate the ring buffer */
+	order = get_order(send_size + recv_size);
 	page = alloc_pages_node(cpu_to_node(newchannel->target_cpu),
 				GFP_KERNEL|__GFP_ZERO, order);
 
 	if (!page)
 		page = alloc_pages(GFP_KERNEL|__GFP_ZERO, order);
 
-	if (!page) {
-		err = -ENOMEM;
-		goto error_set_chnstate;
-	}
+	if (!page)
+		return -ENOMEM;
 
 	newchannel->ringbuffer_page = page;
-	newchannel->ringbuffer_pagecount = (send_ringbuffer_size +
-					   recv_ringbuffer_size) >> PAGE_SHIFT;
+	newchannel->ringbuffer_pagecount = (send_size + recv_size) >> PAGE_SHIFT;
+	newchannel->ringbuffer_send_offset = send_size >> PAGE_SHIFT;
 
-	ret = hv_ringbuffer_init(&newchannel->outbound, page,
-				 send_ringbuffer_size >> PAGE_SHIFT);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vmbus_alloc_ring);
 
-	if (ret != 0) {
-		err = ret;
-		goto error_free_pages;
-	}
+static int __vmbus_open(struct vmbus_channel *newchannel,
+		       void *userdata, u32 userdatalen,
+		       void (*onchannelcallback)(void *context), void *context)
+{
+	struct vmbus_channel_open_channel *open_msg;
+	struct vmbus_channel_msginfo *open_info = NULL;
+	struct page *page = newchannel->ringbuffer_page;
+	u32 send_pages, recv_pages;
+	unsigned long flags;
+	int err;
 
-	ret = hv_ringbuffer_init(&newchannel->inbound,
-				 &page[send_ringbuffer_size >> PAGE_SHIFT],
-				 recv_ringbuffer_size >> PAGE_SHIFT);
-	if (ret != 0) {
-		err = ret;
-		goto error_free_pages;
+	if (userdatalen > MAX_USER_DEFINED_BYTES)
+		return -EINVAL;
+
+	send_pages = newchannel->ringbuffer_send_offset;
+	recv_pages = newchannel->ringbuffer_pagecount - send_pages;
+
+	spin_lock_irqsave(&newchannel->lock, flags);
+	if (newchannel->state != CHANNEL_OPEN_STATE) {
+		spin_unlock_irqrestore(&newchannel->lock, flags);
+		return -EINVAL;
 	}
+	spin_unlock_irqrestore(&newchannel->lock, flags);
 
+	newchannel->state = CHANNEL_OPENING_STATE;
+	newchannel->onchannel_callback = onchannelcallback;
+	newchannel->channel_callback_context = context;
+
+	err = hv_ringbuffer_init(&newchannel->outbound, page, send_pages);
+	if (err)
+		goto error_clean_ring;
+
+	err = hv_ringbuffer_init(&newchannel->inbound,
+				 &page[send_pages], recv_pages);
+	if (err)
+		goto error_clean_ring;
 
 	/* Establish the gpadl for the ring buffer */
 	newchannel->ringbuffer_gpadlhandle = 0;
 
-	ret = vmbus_establish_gpadl(newchannel,
-				    page_address(page),
-				    send_ringbuffer_size +
-				    recv_ringbuffer_size,
+	err = vmbus_establish_gpadl(newchannel,
+				    page_address(newchannel->ringbuffer_page),
+				    (send_pages + recv_pages) << PAGE_SHIFT,
 				    &newchannel->ringbuffer_gpadlhandle);
-
-	if (ret != 0) {
-		err = ret;
-		goto error_free_pages;
-	}
+	if (err)
+		goto error_clean_ring;
 
 	/* Create and init the channel open message */
 	open_info = kmalloc(sizeof(*open_info) +
@@ -175,15 +187,9 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 	open_msg->openid = newchannel->offermsg.child_relid;
 	open_msg->child_relid = newchannel->offermsg.child_relid;
 	open_msg->ringbuffer_gpadlhandle = newchannel->ringbuffer_gpadlhandle;
-	open_msg->downstream_ringbuffer_pageoffset = send_ringbuffer_size >>
-						  PAGE_SHIFT;
+	open_msg->downstream_ringbuffer_pageoffset = newchannel->ringbuffer_send_offset;
 	open_msg->target_vp = newchannel->target_vp;
 
-	if (userdatalen > MAX_USER_DEFINED_BYTES) {
-		err = -EINVAL;
-		goto error_free_gpadl;
-	}
-
 	if (userdatalen)
 		memcpy(open_msg->userdata, userdata, userdatalen);
 
@@ -194,18 +200,16 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (newchannel->rescind) {
 		err = -ENODEV;
-		goto error_free_gpadl;
+		goto error_free_info;
 	}
 
-	ret = vmbus_post_msg(open_msg,
+	err = vmbus_post_msg(open_msg,
 			     sizeof(struct vmbus_channel_open_channel), true);
 
-	trace_vmbus_open(open_msg, ret);
+	trace_vmbus_open(open_msg, err);
 
-	if (ret != 0) {
-		err = ret;
+	if (err != 0)
 		goto error_clean_msglist;
-	}
 
 	wait_for_completion(&open_info->waitevent);
 
@@ -215,12 +219,12 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size,
 
 	if (newchannel->rescind) {
 		err = -ENODEV;
-		goto error_free_gpadl;
+		goto error_free_info;
 	}
 
 	if (open_info->response.open_result.status) {
 		err = -EAGAIN;
-		goto error_free_gpadl;
+		goto error_free_info;
 	}
 
 	newchannel->state = CHANNEL_OPENED_STATE;
@@ -231,18 +235,50 @@ error_clean_msglist:
 	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
 	list_del(&open_info->msglistentry);
 	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
-
+error_free_info:
+	kfree(open_info);
 error_free_gpadl:
 	vmbus_teardown_gpadl(newchannel, newchannel->ringbuffer_gpadlhandle);
-	kfree(open_info);
-error_free_pages:
+	newchannel->ringbuffer_gpadlhandle = 0;
+error_clean_ring:
 	hv_ringbuffer_cleanup(&newchannel->outbound);
 	hv_ringbuffer_cleanup(&newchannel->inbound);
-	__free_pages(page, order);
-error_set_chnstate:
 	newchannel->state = CHANNEL_OPEN_STATE;
 	return err;
 }
+
+/*
+ * vmbus_connect_ring - Open the channel but reuse ring buffer
+ */
+int vmbus_connect_ring(struct vmbus_channel *newchannel,
+		       void (*onchannelcallback)(void *context), void *context)
+{
+	return  __vmbus_open(newchannel, NULL, 0, onchannelcallback, context);
+}
+EXPORT_SYMBOL_GPL(vmbus_connect_ring);
+
+/*
+ * vmbus_open - Open the specified channel.
+ */
+int vmbus_open(struct vmbus_channel *newchannel,
+	       u32 send_ringbuffer_size, u32 recv_ringbuffer_size,
+	       void *userdata, u32 userdatalen,
+	       void (*onchannelcallback)(void *context), void *context)
+{
+	int err;
+
+	err = vmbus_alloc_ring(newchannel, send_ringbuffer_size,
+			       recv_ringbuffer_size);
+	if (err)
+		return err;
+
+	err = __vmbus_open(newchannel, userdata, userdatalen,
+			   onchannelcallback, context);
+	if (err)
+		vmbus_free_ring(newchannel);
+
+	return err;
+}
 EXPORT_SYMBOL_GPL(vmbus_open);
 
 /* Used for Hyper-V Socket: a guest client's connect() to the host */
@@ -610,10 +646,8 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 	 * in Hyper-V Manager), the driver's remove() invokes vmbus_close():
 	 * here we should skip most of the below cleanup work.
 	 */
-	if (channel->state != CHANNEL_OPENED_STATE) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (channel->state != CHANNEL_OPENED_STATE)
+		return -EINVAL;
 
 	channel->state = CHANNEL_OPEN_STATE;
 
@@ -635,11 +669,10 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 		 * If we failed to post the close msg,
 		 * it is perhaps better to leak memory.
 		 */
-		goto out;
 	}
 
 	/* Tear down the gpadl for the channel's ring buffer */
-	if (channel->ringbuffer_gpadlhandle) {
+	else if (channel->ringbuffer_gpadlhandle) {
 		ret = vmbus_teardown_gpadl(channel,
 					   channel->ringbuffer_gpadlhandle);
 		if (ret) {
@@ -648,59 +681,63 @@ static int vmbus_close_internal(struct vmbus_channel *channel)
 			 * If we failed to teardown gpadl,
 			 * it is perhaps better to leak memory.
 			 */
-			goto out;
 		}
-	}
-
-	/* Cleanup the ring buffers for this channel */
-	hv_ringbuffer_cleanup(&channel->outbound);
-	hv_ringbuffer_cleanup(&channel->inbound);
 
-	__free_pages(channel->ringbuffer_page,
-		     get_order(channel->ringbuffer_pagecount << PAGE_SHIFT));
+		channel->ringbuffer_gpadlhandle = 0;
+	}
 
-out:
 	return ret;
 }
 
-/*
- * vmbus_close - Close the specified channel
- */
-void vmbus_close(struct vmbus_channel *channel)
+/* disconnect ring - close all channels */
+int vmbus_disconnect_ring(struct vmbus_channel *channel)
 {
-	struct list_head *cur, *tmp;
-	struct vmbus_channel *cur_channel;
+	struct vmbus_channel *cur_channel, *tmp;
+	unsigned long flags;
+	LIST_HEAD(list);
+	int ret;
 
-	if (channel->primary_channel != NULL) {
-		/*
-		 * We will only close sub-channels when
-		 * the primary is closed.
-		 */
-		return;
-	}
-	/*
-	 * Close all the sub-channels first and then close the
-	 * primary channel.
-	 */
-	list_for_each_safe(cur, tmp, &channel->sc_list) {
-		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
-		if (cur_channel->rescind) {
+	if (channel->primary_channel != NULL)
+		return -EINVAL;
+
+	/* Snapshot the list of subchannels */
+	spin_lock_irqsave(&channel->lock, flags);
+	list_splice_init(&channel->sc_list, &list);
+	channel->num_sc = 0;
+	spin_unlock_irqrestore(&channel->lock, flags);
+
+	list_for_each_entry_safe(cur_channel, tmp, &list, sc_list) {
+		if (cur_channel->rescind)
 			wait_for_completion(&cur_channel->rescind_event);
-			mutex_lock(&vmbus_connection.channel_mutex);
-			vmbus_close_internal(cur_channel);
-			hv_process_channel_removal(cur_channel);
-		} else {
-			mutex_lock(&vmbus_connection.channel_mutex);
-			vmbus_close_internal(cur_channel);
+
+		mutex_lock(&vmbus_connection.channel_mutex);
+		if (vmbus_close_internal(cur_channel) == 0) {
+			vmbus_free_ring(cur_channel);
+
+			if (cur_channel->rescind)
+				hv_process_channel_removal(cur_channel);
 		}
 		mutex_unlock(&vmbus_connection.channel_mutex);
 	}
+
 	/*
 	 * Now close the primary.
 	 */
 	mutex_lock(&vmbus_connection.channel_mutex);
-	vmbus_close_internal(channel);
+	ret = vmbus_close_internal(channel);
 	mutex_unlock(&vmbus_connection.channel_mutex);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vmbus_disconnect_ring);
+
+/*
+ * vmbus_close - Close the specified channel
+ */
+void vmbus_close(struct vmbus_channel *channel)
+{
+	if (vmbus_disconnect_ring(channel) == 0)
+		vmbus_free_ring(channel);
 }
 EXPORT_SYMBOL_GPL(vmbus_close);
 
diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c
index 3e90eb91db45..64d0c85d5161 100644
--- a/drivers/hv/ring_buffer.c
+++ b/drivers/hv/ring_buffer.c
@@ -241,6 +241,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
 {
 	vunmap(ring_info->ring_buffer);
+	ring_info->ring_buffer = NULL;
 }
 
 /* Write to the ring buffer. */
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index a6c32d2d090b..b3e24368930a 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -741,6 +741,7 @@ struct vmbus_channel {
 	/* Allocated memory for ring buffer */
 	struct page *ringbuffer_page;
 	u32 ringbuffer_pagecount;
+	u32 ringbuffer_send_offset;
 	struct hv_ring_buffer_info outbound;	/* send to parent */
 	struct hv_ring_buffer_info inbound;	/* receive from parent */
 
@@ -1021,6 +1022,14 @@ struct vmbus_packet_mpb_array {
 	struct hv_mpb_array range;
 } __packed;
 
+int vmbus_alloc_ring(struct vmbus_channel *channel,
+		     u32 send_size, u32 recv_size);
+void vmbus_free_ring(struct vmbus_channel *channel);
+
+int vmbus_connect_ring(struct vmbus_channel *channel,
+		       void (*onchannel_callback)(void *context),
+		       void *context);
+int vmbus_disconnect_ring(struct vmbus_channel *channel);
 
 extern int vmbus_open(struct vmbus_channel *channel,
 			    u32 send_ringbuffersize,
-- 
cgit v1.2.3


From bfddabfa230452cea32aae82f9cd85ab22601acf Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Fri, 14 Sep 2018 09:10:18 -0700
Subject: uio: introduce UIO_MEM_IOVA

Introduce the concept of mapping physical memory locations that
are normal memory. The new type UIO_MEM_IOVA are similar to
existing UIO_MEM_PHYS but the backing memory is not marked as uncached.

Also, indent related switch to the currently used style.

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio.c          | 24 +++++++++++++-----------
 include/linux/uio_driver.h |  1 +
 2 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 0ffb324aa038..e601bd3fbae1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -738,7 +738,8 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 		return -EINVAL;
 
 	vma->vm_ops = &uio_physical_vm_ops;
-	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	if (idev->info->mem[mi].memtype == UIO_MEM_PHYS)
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	/*
 	 * We cannot use the vm_iomap_memory() helper here,
@@ -795,18 +796,19 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 	}
 
 	switch (idev->info->mem[mi].memtype) {
-		case UIO_MEM_PHYS:
-			ret = uio_mmap_physical(vma);
-			break;
-		case UIO_MEM_LOGICAL:
-		case UIO_MEM_VIRTUAL:
-			ret = uio_mmap_logical(vma);
-			break;
-		default:
-			ret = -EINVAL;
+	case UIO_MEM_IOVA:
+	case UIO_MEM_PHYS:
+		ret = uio_mmap_physical(vma);
+		break;
+	case UIO_MEM_LOGICAL:
+	case UIO_MEM_VIRTUAL:
+		ret = uio_mmap_logical(vma);
+		break;
+	default:
+		ret = -EINVAL;
 	}
 
-out:
+ out:
 	mutex_unlock(&idev->info_lock);
 	return ret;
 }
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 6f8b68cd460f..a3cd7cb67a69 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -133,6 +133,7 @@ extern void uio_event_notify(struct uio_info *info);
 #define UIO_MEM_PHYS	1
 #define UIO_MEM_LOGICAL	2
 #define UIO_MEM_VIRTUAL 3
+#define UIO_MEM_IOVA	4
 
 /* defines for uio_port->porttype */
 #define UIO_PORT_NONE	0
-- 
cgit v1.2.3


From 1a0afc14b5da329765d6ecd4a79f546b9363ad8c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 Sep 2018 13:16:55 -0700
Subject: Revert "dma-mapping: clear dev->dma_ops in arch_teardown_dma_ops"

This reverts commit 46053c73685411915d3de50c5a0045beef32806b.

This change breaks architectures setting up dma_ops in their own magic
way and not using arch_setup_dma_ops, so revert it.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 arch/arm64/include/asm/dma-mapping.h | 5 +++++
 arch/arm64/mm/dma-mapping.c          | 5 +++++
 include/linux/dma-mapping.h          | 5 +----
 3 files changed, 11 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index 0a2d13332545..b7847eb8a7bb 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h
@@ -39,6 +39,11 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent);
 #define arch_setup_dma_ops	arch_setup_dma_ops
 
+#ifdef CONFIG_IOMMU_DMA
+void arch_teardown_dma_ops(struct device *dev);
+#define arch_teardown_dma_ops	arch_teardown_dma_ops
+#endif
+
 /* do not use this function in a driver */
 static inline bool is_device_dma_coherent(struct device *dev)
 {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index cdcb73db9ea2..072c51fb07d7 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -862,6 +862,11 @@ out_err:
 		 dev_name(dev));
 }
 
+void arch_teardown_dma_ops(struct device *dev)
+{
+	dev->dma_ops = NULL;
+}
+
 #else
 
 static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index bd81e74cca7b..d23fc45c8208 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -665,10 +665,7 @@ static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
 #endif
 
 #ifndef arch_teardown_dma_ops
-static inline void arch_teardown_dma_ops(struct device *dev)
-{
-	dev->dma_ops = NULL;
-}
+static inline void arch_teardown_dma_ops(struct device *dev) { }
 #endif
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
-- 
cgit v1.2.3


From 6f99528e9797794b91b43321fbbc93fe772b0803 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:49 +0300
Subject: net: core: netlink: add helper refcount dec and lock function

Rtnl lock is encapsulated in netlink and cannot be accessed by other
modules directly. This means that reference counted objects that rely on
rtnl lock cannot use it with refcounter helper function that atomically
releases decrements reference and obtains mutex.

This patch implements simple wrapper function around refcount_dec_and_lock
that obtains rtnl lock if reference counter value reached 0.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 2 ++
 net/core/rtnetlink.c      | 6 ++++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5225832bd6ff..9cdd76348d9a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/wait.h>
+#include <linux/refcount.h>
 #include <uapi/linux/rtnetlink.h>
 
 extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
@@ -34,6 +35,7 @@ extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
 extern int rtnl_lock_killable(void);
+extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
 
 extern wait_queue_head_t netdev_unregistering_wq;
 extern struct rw_semaphore pernet_ops_rwsem;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 80a7e18c65fb..35162e1b06ad 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -130,6 +130,12 @@ int rtnl_is_locked(void)
 }
 EXPORT_SYMBOL(rtnl_is_locked);
 
+bool refcount_dec_and_rtnl_lock(refcount_t *r)
+{
+	return refcount_dec_and_mutex_lock(r, &rtnl_mutex);
+}
+EXPORT_SYMBOL(refcount_dec_and_rtnl_lock);
+
 #ifdef CONFIG_PROVE_LOCKING
 bool lockdep_rtnl_is_held(void)
 {
-- 
cgit v1.2.3


From 3a7d0d07a386716b459b00783b11a8211cefcc0f Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 24 Sep 2018 19:22:51 +0300
Subject: net: sched: extend Qdisc with rcu

Currently, Qdisc API functions assume that users have rtnl lock taken. To
implement rtnl unlocked classifiers update interface, Qdisc API must be
extended with functions that do not require rtnl lock.

Extend Qdisc structure with rcu. Implement special version of put function
qdisc_put_unlocked() that is called without rtnl lock taken. This function
only takes rtnl lock if Qdisc reference counter reached zero and is
intended to be used as optimization.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  5 +++++
 include/net/pkt_sched.h   |  1 +
 include/net/sch_generic.h |  2 ++
 net/sched/sch_api.c       | 18 ++++++++++++++++++
 net/sched/sch_generic.c   | 25 ++++++++++++++++++++++++-
 5 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 9cdd76348d9a..bb9cb84114c1 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -85,6 +85,11 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
 	return rtnl_dereference(dev->ingress_queue);
 }
 
+static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev)
+{
+	return rcu_dereference(dev->ingress_queue);
+}
+
 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
 
 #ifdef CONFIG_NET_INGRESS
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 7dc769e5452b..a16fbe9a2a67 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -102,6 +102,7 @@ int qdisc_set_default(const char *id);
 void qdisc_hash_add(struct Qdisc *q, bool invisible);
 void qdisc_hash_del(struct Qdisc *q);
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle);
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 					struct nlattr *tab,
 					struct netlink_ext_ack *extack);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index fadb1a4d4ee8..091b40c198ff 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -105,6 +105,7 @@ struct Qdisc {
 
 	spinlock_t		busylock ____cacheline_aligned_in_smp;
 	spinlock_t		seqlock;
+	struct rcu_head		rcu;
 };
 
 static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
@@ -555,6 +556,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 			      struct Qdisc *qdisc);
 void qdisc_reset(struct Qdisc *qdisc);
 void qdisc_put(struct Qdisc *qdisc);
+void qdisc_put_unlocked(struct Qdisc *qdisc);
 void qdisc_tree_reduce_backlog(struct Qdisc *qdisc, unsigned int n,
 			       unsigned int len);
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2096138c4bf6..22e9799e5b69 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -314,6 +314,24 @@ out:
 	return q;
 }
 
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
+{
+	struct netdev_queue *nq;
+	struct Qdisc *q;
+
+	if (!handle)
+		return NULL;
+	q = qdisc_match_from_root(dev->qdisc, handle);
+	if (q)
+		goto out;
+
+	nq = dev_ingress_queue_rcu(dev);
+	if (nq)
+		q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+out:
+	return q;
+}
+
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 3e7696f3e053..531fac1d2875 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -941,6 +941,13 @@ void qdisc_free(struct Qdisc *qdisc)
 	kfree((char *) qdisc - qdisc->padded);
 }
 
+void qdisc_free_cb(struct rcu_head *head)
+{
+	struct Qdisc *q = container_of(head, struct Qdisc, rcu);
+
+	qdisc_free(q);
+}
+
 static void qdisc_destroy(struct Qdisc *qdisc)
 {
 	const struct Qdisc_ops  *ops = qdisc->ops;
@@ -970,7 +977,7 @@ static void qdisc_destroy(struct Qdisc *qdisc)
 		kfree_skb_list(skb);
 	}
 
-	qdisc_free(qdisc);
+	call_rcu(&qdisc->rcu, qdisc_free_cb);
 }
 
 void qdisc_put(struct Qdisc *qdisc)
@@ -983,6 +990,22 @@ void qdisc_put(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_put);
 
+/* Version of qdisc_put() that is called with rtnl mutex unlocked.
+ * Intended to be used as optimization, this function only takes rtnl lock if
+ * qdisc reference counter reached zero.
+ */
+
+void qdisc_put_unlocked(struct Qdisc *qdisc)
+{
+	if (qdisc->flags & TCQ_F_BUILTIN ||
+	    !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
+		return;
+
+	qdisc_destroy(qdisc);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL(qdisc_put_unlocked);
+
 /* Attach toplevel qdisc to device queue. */
 struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 			      struct Qdisc *qdisc)
-- 
cgit v1.2.3


From 76582671eb5d006a78420776cc5f73195b867e81 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajanv@xilinx.com>
Date: Wed, 12 Sep 2018 12:38:36 -0700
Subject: firmware: xilinx: Add Zynqmp firmware driver

This patch is adding communication layer with firmware.
Firmware driver provides an interface to firmware APIs.
Interface APIs can be used by any driver to communicate to
PMUFW(Platform Management Unit). All requests go through ATF.

Signed-off-by: Rajan Vaja <rajanv@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 arch/arm64/Kconfig.platforms         |   1 +
 drivers/firmware/Kconfig             |   1 +
 drivers/firmware/Makefile            |   1 +
 drivers/firmware/xilinx/Kconfig      |  16 ++
 drivers/firmware/xilinx/Makefile     |   4 +
 drivers/firmware/xilinx/zynqmp.c     | 322 +++++++++++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h |  63 +++++++
 7 files changed, 408 insertions(+)
 create mode 100644 drivers/firmware/xilinx/Kconfig
 create mode 100644 drivers/firmware/xilinx/Makefile
 create mode 100644 drivers/firmware/xilinx/zynqmp.c
 create mode 100644 include/linux/firmware/xlnx-zynqmp.h

(limited to 'include/linux')

diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms
index 393d2b524284..23f3928743c9 100644
--- a/arch/arm64/Kconfig.platforms
+++ b/arch/arm64/Kconfig.platforms
@@ -301,6 +301,7 @@ config ARCH_ZX
 
 config ARCH_ZYNQMP
 	bool "Xilinx ZynqMP Family"
+	select ZYNQMP_FIRMWARE
 	help
 	  This enables support for Xilinx ZynqMP Family
 
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 6e83880046d7..36344cba764e 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -291,5 +291,6 @@ source "drivers/firmware/google/Kconfig"
 source "drivers/firmware/efi/Kconfig"
 source "drivers/firmware/meson/Kconfig"
 source "drivers/firmware/tegra/Kconfig"
+source "drivers/firmware/xilinx/Kconfig"
 
 endmenu
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index e18a041cfc53..99583d3df52f 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -32,3 +32,4 @@ obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
 obj-$(CONFIG_EFI)		+= efi/
 obj-$(CONFIG_UEFI_CPER)		+= efi/
 obj-y				+= tegra/
+obj-y				+= xilinx/
diff --git a/drivers/firmware/xilinx/Kconfig b/drivers/firmware/xilinx/Kconfig
new file mode 100644
index 000000000000..64d976e31010
--- /dev/null
+++ b/drivers/firmware/xilinx/Kconfig
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+# Kconfig for Xilinx firmwares
+
+menu "Zynq MPSoC Firmware Drivers"
+	depends on ARCH_ZYNQMP
+
+config ZYNQMP_FIRMWARE
+	bool "Enable Xilinx Zynq MPSoC firmware interface"
+	help
+	  Firmware interface driver is used by different
+	  drivers to communicate with the firmware for
+	  various platform management services.
+	  Say yes to enable ZynqMP firmware interface driver.
+	  If in doubt, say N.
+
+endmenu
diff --git a/drivers/firmware/xilinx/Makefile b/drivers/firmware/xilinx/Makefile
new file mode 100644
index 000000000000..29f7bf2fd094
--- /dev/null
+++ b/drivers/firmware/xilinx/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for Xilinx firmwares
+
+obj-$(CONFIG_ZYNQMP_FIRMWARE) += zynqmp.o
diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
new file mode 100644
index 000000000000..5bf64acc7b44
--- /dev/null
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Xilinx Zynq MPSoC Firmware layer
+ *
+ *  Copyright (C) 2014-2018 Xilinx, Inc.
+ *
+ *  Michal Simek <michal.simek@xilinx.com>
+ *  Davorin Mista <davorin.mista@aggios.com>
+ *  Jolly Shah <jollys@xilinx.com>
+ *  Rajan Vaja <rajanv@xilinx.com>
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/compiler.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <linux/firmware/xlnx-zynqmp.h>
+
+/**
+ * zynqmp_pm_ret_code() - Convert PMU-FW error codes to Linux error codes
+ * @ret_status:		PMUFW return code
+ *
+ * Return: corresponding Linux error code
+ */
+static int zynqmp_pm_ret_code(u32 ret_status)
+{
+	switch (ret_status) {
+	case XST_PM_SUCCESS:
+	case XST_PM_DOUBLE_REQ:
+		return 0;
+	case XST_PM_NO_ACCESS:
+		return -EACCES;
+	case XST_PM_ABORT_SUSPEND:
+		return -ECANCELED;
+	case XST_PM_INTERNAL:
+	case XST_PM_CONFLICT:
+	case XST_PM_INVALID_NODE:
+	default:
+		return -EINVAL;
+	}
+}
+
+static noinline int do_fw_call_fail(u64 arg0, u64 arg1, u64 arg2,
+				    u32 *ret_payload)
+{
+	return -ENODEV;
+}
+
+/*
+ * PM function call wrapper
+ * Invoke do_fw_call_smc or do_fw_call_hvc, depending on the configuration
+ */
+static int (*do_fw_call)(u64, u64, u64, u32 *ret_payload) = do_fw_call_fail;
+
+/**
+ * do_fw_call_smc() - Call system-level platform management layer (SMC)
+ * @arg0:		Argument 0 to SMC call
+ * @arg1:		Argument 1 to SMC call
+ * @arg2:		Argument 2 to SMC call
+ * @ret_payload:	Returned value array
+ *
+ * Invoke platform management function via SMC call (no hypervisor present).
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static noinline int do_fw_call_smc(u64 arg0, u64 arg1, u64 arg2,
+				   u32 *ret_payload)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_smc(arg0, arg1, arg2, 0, 0, 0, 0, 0, &res);
+
+	if (ret_payload) {
+		ret_payload[0] = lower_32_bits(res.a0);
+		ret_payload[1] = upper_32_bits(res.a0);
+		ret_payload[2] = lower_32_bits(res.a1);
+		ret_payload[3] = upper_32_bits(res.a1);
+	}
+
+	return zynqmp_pm_ret_code((enum pm_ret_status)res.a0);
+}
+
+/**
+ * do_fw_call_hvc() - Call system-level platform management layer (HVC)
+ * @arg0:		Argument 0 to HVC call
+ * @arg1:		Argument 1 to HVC call
+ * @arg2:		Argument 2 to HVC call
+ * @ret_payload:	Returned value array
+ *
+ * Invoke platform management function via HVC
+ * HVC-based for communication through hypervisor
+ * (no direct communication with ATF).
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static noinline int do_fw_call_hvc(u64 arg0, u64 arg1, u64 arg2,
+				   u32 *ret_payload)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_hvc(arg0, arg1, arg2, 0, 0, 0, 0, 0, &res);
+
+	if (ret_payload) {
+		ret_payload[0] = lower_32_bits(res.a0);
+		ret_payload[1] = upper_32_bits(res.a0);
+		ret_payload[2] = lower_32_bits(res.a1);
+		ret_payload[3] = upper_32_bits(res.a1);
+	}
+
+	return zynqmp_pm_ret_code((enum pm_ret_status)res.a0);
+}
+
+/**
+ * zynqmp_pm_invoke_fn() - Invoke the system-level platform management layer
+ *			   caller function depending on the configuration
+ * @pm_api_id:		Requested PM-API call
+ * @arg0:		Argument 0 to requested PM-API call
+ * @arg1:		Argument 1 to requested PM-API call
+ * @arg2:		Argument 2 to requested PM-API call
+ * @arg3:		Argument 3 to requested PM-API call
+ * @ret_payload:	Returned value array
+ *
+ * Invoke platform management function for SMC or HVC call, depending on
+ * configuration.
+ * Following SMC Calling Convention (SMCCC) for SMC64:
+ * Pm Function Identifier,
+ * PM_SIP_SVC + PM_API_ID =
+ *	((SMC_TYPE_FAST << FUNCID_TYPE_SHIFT)
+ *	((SMC_64) << FUNCID_CC_SHIFT)
+ *	((SIP_START) << FUNCID_OEN_SHIFT)
+ *	((PM_API_ID) & FUNCID_NUM_MASK))
+ *
+ * PM_SIP_SVC	- Registered ZynqMP SIP Service Call.
+ * PM_API_ID	- Platform Management API ID.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+int zynqmp_pm_invoke_fn(u32 pm_api_id, u32 arg0, u32 arg1,
+			u32 arg2, u32 arg3, u32 *ret_payload)
+{
+	/*
+	 * Added SIP service call Function Identifier
+	 * Make sure to stay in x0 register
+	 */
+	u64 smc_arg[4];
+
+	smc_arg[0] = PM_SIP_SVC | pm_api_id;
+	smc_arg[1] = ((u64)arg1 << 32) | arg0;
+	smc_arg[2] = ((u64)arg3 << 32) | arg2;
+
+	return do_fw_call(smc_arg[0], smc_arg[1], smc_arg[2], ret_payload);
+}
+
+static u32 pm_api_version;
+static u32 pm_tz_version;
+
+/**
+ * zynqmp_pm_get_api_version() - Get version number of PMU PM firmware
+ * @version:	Returned version value
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_get_api_version(u32 *version)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	if (!version)
+		return -EINVAL;
+
+	/* Check is PM API version already verified */
+	if (pm_api_version > 0) {
+		*version = pm_api_version;
+		return 0;
+	}
+	ret = zynqmp_pm_invoke_fn(PM_GET_API_VERSION, 0, 0, 0, 0, ret_payload);
+	*version = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_get_trustzone_version() - Get secure trustzone firmware version
+ * @version:	Returned version value
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_get_trustzone_version(u32 *version)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	if (!version)
+		return -EINVAL;
+
+	/* Check is PM trustzone version already verified */
+	if (pm_tz_version > 0) {
+		*version = pm_tz_version;
+		return 0;
+	}
+	ret = zynqmp_pm_invoke_fn(PM_GET_TRUSTZONE_VERSION, 0, 0,
+				  0, 0, ret_payload);
+	*version = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * get_set_conduit_method() - Choose SMC or HVC based communication
+ * @np:		Pointer to the device_node structure
+ *
+ * Use SMC or HVC-based functions to communicate with EL2/EL3.
+ *
+ * Return: Returns 0 on success or error code
+ */
+static int get_set_conduit_method(struct device_node *np)
+{
+	const char *method;
+
+	if (of_property_read_string(np, "method", &method)) {
+		pr_warn("%s missing \"method\" property\n", __func__);
+		return -ENXIO;
+	}
+
+	if (!strcmp("hvc", method)) {
+		do_fw_call = do_fw_call_hvc;
+	} else if (!strcmp("smc", method)) {
+		do_fw_call = do_fw_call_smc;
+	} else {
+		pr_warn("%s Invalid \"method\" property: %s\n",
+			__func__, method);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct zynqmp_eemi_ops eemi_ops = {
+	.get_api_version = zynqmp_pm_get_api_version,
+};
+
+/**
+ * zynqmp_pm_get_eemi_ops - Get eemi ops functions
+ *
+ * Return: Pointer of eemi_ops structure
+ */
+const struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void)
+{
+	return &eemi_ops;
+}
+EXPORT_SYMBOL_GPL(zynqmp_pm_get_eemi_ops);
+
+static int zynqmp_firmware_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np;
+	int ret;
+
+	np = of_find_compatible_node(NULL, NULL, "xlnx,zynqmp");
+	if (!np)
+		return 0;
+	of_node_put(np);
+
+	ret = get_set_conduit_method(dev->of_node);
+	if (ret)
+		return ret;
+
+	/* Check PM API version number */
+	zynqmp_pm_get_api_version(&pm_api_version);
+	if (pm_api_version < ZYNQMP_PM_VERSION) {
+		panic("%s Platform Management API version error. Expected: v%d.%d - Found: v%d.%d\n",
+		      __func__,
+		      ZYNQMP_PM_VERSION_MAJOR, ZYNQMP_PM_VERSION_MINOR,
+		      pm_api_version >> 16, pm_api_version & 0xFFFF);
+	}
+
+	pr_info("%s Platform Management API v%d.%d\n", __func__,
+		pm_api_version >> 16, pm_api_version & 0xFFFF);
+
+	/* Check trustzone version number */
+	ret = zynqmp_pm_get_trustzone_version(&pm_tz_version);
+	if (ret)
+		panic("Legacy trustzone found without version support\n");
+
+	if (pm_tz_version < ZYNQMP_TZ_VERSION)
+		panic("%s Trustzone version error. Expected: v%d.%d - Found: v%d.%d\n",
+		      __func__,
+		      ZYNQMP_TZ_VERSION_MAJOR, ZYNQMP_TZ_VERSION_MINOR,
+		      pm_tz_version >> 16, pm_tz_version & 0xFFFF);
+
+	pr_info("%s Trustzone version v%d.%d\n", __func__,
+		pm_tz_version >> 16, pm_tz_version & 0xFFFF);
+
+	return of_platform_populate(dev->of_node, NULL, NULL, dev);
+}
+
+static int zynqmp_firmware_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static const struct of_device_id zynqmp_firmware_of_match[] = {
+	{.compatible = "xlnx,zynqmp-firmware"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, zynqmp_firmware_of_match);
+
+static struct platform_driver zynqmp_firmware_driver = {
+	.driver = {
+		.name = "zynqmp_firmware",
+		.of_match_table = zynqmp_firmware_of_match,
+	},
+	.probe = zynqmp_firmware_probe,
+	.remove = zynqmp_firmware_remove,
+};
+module_platform_driver(zynqmp_firmware_driver);
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
new file mode 100644
index 000000000000..cb63bedf3dcf
--- /dev/null
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Xilinx Zynq MPSoC Firmware layer
+ *
+ *  Copyright (C) 2014-2018 Xilinx
+ *
+ *  Michal Simek <michal.simek@xilinx.com>
+ *  Davorin Mista <davorin.mista@aggios.com>
+ *  Jolly Shah <jollys@xilinx.com>
+ *  Rajan Vaja <rajanv@xilinx.com>
+ */
+
+#ifndef __FIRMWARE_ZYNQMP_H__
+#define __FIRMWARE_ZYNQMP_H__
+
+#define ZYNQMP_PM_VERSION_MAJOR	1
+#define ZYNQMP_PM_VERSION_MINOR	0
+
+#define ZYNQMP_PM_VERSION	((ZYNQMP_PM_VERSION_MAJOR << 16) | \
+					ZYNQMP_PM_VERSION_MINOR)
+
+#define ZYNQMP_TZ_VERSION_MAJOR	1
+#define ZYNQMP_TZ_VERSION_MINOR	0
+
+#define ZYNQMP_TZ_VERSION	((ZYNQMP_TZ_VERSION_MAJOR << 16) | \
+					ZYNQMP_TZ_VERSION_MINOR)
+
+/* SMC SIP service Call Function Identifier Prefix */
+#define PM_SIP_SVC			0xC2000000
+#define PM_GET_TRUSTZONE_VERSION	0xa03
+
+/* Number of 32bits values in payload */
+#define PAYLOAD_ARG_CNT	4U
+
+enum pm_api_id {
+	PM_GET_API_VERSION = 1,
+};
+
+/* PMU-FW return status codes */
+enum pm_ret_status {
+	XST_PM_SUCCESS = 0,
+	XST_PM_INTERNAL = 2000,
+	XST_PM_CONFLICT,
+	XST_PM_NO_ACCESS,
+	XST_PM_INVALID_NODE,
+	XST_PM_DOUBLE_REQ,
+	XST_PM_ABORT_SUSPEND,
+};
+
+struct zynqmp_eemi_ops {
+	int (*get_api_version)(u32 *version);
+};
+
+#if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
+const struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void);
+#else
+static inline struct zynqmp_eemi_ops *zynqmp_pm_get_eemi_ops(void)
+{
+	return NULL;
+}
+#endif
+
+#endif /* __FIRMWARE_ZYNQMP_H__ */
-- 
cgit v1.2.3


From 59ecdd778879f171072b663f91de6c3a595e2ed4 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajanv@xilinx.com>
Date: Wed, 12 Sep 2018 12:38:37 -0700
Subject: firmware: xilinx: Add query data API

Add ZynqMP firmware query data API to query platform
specific information(clocks, pins) from firmware.

Signed-off-by: Rajan Vaja <rajanv@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 14 ++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h | 20 ++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 5bf64acc7b44..2a333c04955b 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -241,8 +241,22 @@ static int get_set_conduit_method(struct device_node *np)
 	return 0;
 }
 
+/**
+ * zynqmp_pm_query_data() - Get query data from firmware
+ * @qdata:	Variable to the zynqmp_pm_query_data structure
+ * @out:	Returned output value
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out)
+{
+	return zynqmp_pm_invoke_fn(PM_QUERY_DATA, qdata.qid, qdata.arg1,
+				   qdata.arg2, qdata.arg3, out);
+}
+
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
+	.query_data = zynqmp_pm_query_data,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index cb63bedf3dcf..287f42caa623 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -34,6 +34,7 @@
 
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
+	PM_QUERY_DATA = 35,
 };
 
 /* PMU-FW return status codes */
@@ -47,8 +48,27 @@ enum pm_ret_status {
 	XST_PM_ABORT_SUSPEND,
 };
 
+enum pm_query_id {
+	PM_QID_INVALID,
+};
+
+/**
+ * struct zynqmp_pm_query_data - PM query data
+ * @qid:	query ID
+ * @arg1:	Argument 1 of query data
+ * @arg2:	Argument 2 of query data
+ * @arg3:	Argument 3 of query data
+ */
+struct zynqmp_pm_query_data {
+	u32 qid;
+	u32 arg1;
+	u32 arg2;
+	u32 arg3;
+};
+
 struct zynqmp_eemi_ops {
 	int (*get_api_version)(u32 *version);
+	int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
 };
 
 #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
-- 
cgit v1.2.3


From f9627312e20721681ea326bd2b7935bf8034b288 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajanv@xilinx.com>
Date: Wed, 12 Sep 2018 12:38:38 -0700
Subject: firmware: xilinx: Add clock APIs

Add clock APIs to control clocks through firmware
interface.

Signed-off-by: Rajan Vaja <rajanv@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 186 ++++++++++++++++++++++++++++++++++-
 include/linux/firmware/xlnx-zynqmp.h |  30 ++++++
 2 files changed, 214 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 2a333c04955b..697f4fa23a45 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -250,13 +250,195 @@ static int get_set_conduit_method(struct device_node *np)
  */
 static int zynqmp_pm_query_data(struct zynqmp_pm_query_data qdata, u32 *out)
 {
-	return zynqmp_pm_invoke_fn(PM_QUERY_DATA, qdata.qid, qdata.arg1,
-				   qdata.arg2, qdata.arg3, out);
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_QUERY_DATA, qdata.qid, qdata.arg1,
+				  qdata.arg2, qdata.arg3, out);
+
+	/*
+	 * For clock name query, all bytes in SMC response are clock name
+	 * characters and return code is always success. For invalid clocks,
+	 * clock name bytes would be zeros.
+	 */
+	return qdata.qid == PM_QID_CLOCK_GET_NAME ? 0 : ret;
+}
+
+/**
+ * zynqmp_pm_clock_enable() - Enable the clock for given id
+ * @clock_id:	ID of the clock to be enabled
+ *
+ * This function is used by master to enable the clock
+ * including peripherals and PLL clocks.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_enable(u32 clock_id)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_ENABLE, clock_id, 0, 0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_disable() - Disable the clock for given id
+ * @clock_id:	ID of the clock to be disable
+ *
+ * This function is used by master to disable the clock
+ * including peripherals and PLL clocks.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_disable(u32 clock_id)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_DISABLE, clock_id, 0, 0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_getstate() - Get the clock state for given id
+ * @clock_id:	ID of the clock to be queried
+ * @state:	1/0 (Enabled/Disabled)
+ *
+ * This function is used by master to get the state of clock
+ * including peripherals and PLL clocks.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_getstate(u32 clock_id, u32 *state)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_CLOCK_GETSTATE, clock_id, 0,
+				  0, 0, ret_payload);
+	*state = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_setdivider() - Set the clock divider for given id
+ * @clock_id:	ID of the clock
+ * @divider:	divider value
+ *
+ * This function is used by master to set divider for any clock
+ * to achieve desired rate.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_setdivider(u32 clock_id, u32 divider)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_SETDIVIDER, clock_id, divider,
+				   0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_getdivider() - Get the clock divider for given id
+ * @clock_id:	ID of the clock
+ * @divider:	divider value
+ *
+ * This function is used by master to get divider values
+ * for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_getdivider(u32 clock_id, u32 *divider)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_CLOCK_GETDIVIDER, clock_id, 0,
+				  0, 0, ret_payload);
+	*divider = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_setrate() - Set the clock rate for given id
+ * @clock_id:	ID of the clock
+ * @rate:	rate value in hz
+ *
+ * This function is used by master to set rate for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_setrate(u32 clock_id, u64 rate)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_SETRATE, clock_id,
+				   lower_32_bits(rate),
+				   upper_32_bits(rate),
+				   0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_getrate() - Get the clock rate for given id
+ * @clock_id:	ID of the clock
+ * @rate:	rate value in hz
+ *
+ * This function is used by master to get rate
+ * for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_getrate(u32 clock_id, u64 *rate)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_CLOCK_GETRATE, clock_id, 0,
+				  0, 0, ret_payload);
+	*rate = ((u64)ret_payload[2] << 32) | ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_setparent() - Set the clock parent for given id
+ * @clock_id:	ID of the clock
+ * @parent_id:	parent id
+ *
+ * This function is used by master to set parent for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_setparent(u32 clock_id, u32 parent_id)
+{
+	return zynqmp_pm_invoke_fn(PM_CLOCK_SETPARENT, clock_id,
+				   parent_id, 0, 0, NULL);
+}
+
+/**
+ * zynqmp_pm_clock_getparent() - Get the clock parent for given id
+ * @clock_id:	ID of the clock
+ * @parent_id:	parent id
+ *
+ * This function is used by master to get parent index
+ * for any clock.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id)
+{
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	ret = zynqmp_pm_invoke_fn(PM_CLOCK_GETPARENT, clock_id, 0,
+				  0, 0, ret_payload);
+	*parent_id = ret_payload[1];
+
+	return ret;
 }
 
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
 	.query_data = zynqmp_pm_query_data,
+	.clock_enable = zynqmp_pm_clock_enable,
+	.clock_disable = zynqmp_pm_clock_disable,
+	.clock_getstate = zynqmp_pm_clock_getstate,
+	.clock_setdivider = zynqmp_pm_clock_setdivider,
+	.clock_getdivider = zynqmp_pm_clock_getdivider,
+	.clock_setrate = zynqmp_pm_clock_setrate,
+	.clock_getrate = zynqmp_pm_clock_getrate,
+	.clock_setparent = zynqmp_pm_clock_setparent,
+	.clock_getparent = zynqmp_pm_clock_getparent,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 287f42caa623..015e130431e6 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -35,6 +35,15 @@
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
 	PM_QUERY_DATA = 35,
+	PM_CLOCK_ENABLE,
+	PM_CLOCK_DISABLE,
+	PM_CLOCK_GETSTATE,
+	PM_CLOCK_SETDIVIDER,
+	PM_CLOCK_GETDIVIDER,
+	PM_CLOCK_SETRATE,
+	PM_CLOCK_GETRATE,
+	PM_CLOCK_SETPARENT,
+	PM_CLOCK_GETPARENT,
 };
 
 /* PMU-FW return status codes */
@@ -48,8 +57,20 @@ enum pm_ret_status {
 	XST_PM_ABORT_SUSPEND,
 };
 
+enum pm_ioctl_id {
+	IOCTL_SET_PLL_FRAC_MODE = 8,
+	IOCTL_GET_PLL_FRAC_MODE,
+	IOCTL_SET_PLL_FRAC_DATA,
+	IOCTL_GET_PLL_FRAC_DATA,
+};
+
 enum pm_query_id {
 	PM_QID_INVALID,
+	PM_QID_CLOCK_GET_NAME,
+	PM_QID_CLOCK_GET_TOPOLOGY,
+	PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS,
+	PM_QID_CLOCK_GET_PARENTS,
+	PM_QID_CLOCK_GET_ATTRIBUTES,
 };
 
 /**
@@ -69,6 +90,15 @@ struct zynqmp_pm_query_data {
 struct zynqmp_eemi_ops {
 	int (*get_api_version)(u32 *version);
 	int (*query_data)(struct zynqmp_pm_query_data qdata, u32 *out);
+	int (*clock_enable)(u32 clock_id);
+	int (*clock_disable)(u32 clock_id);
+	int (*clock_getstate)(u32 clock_id, u32 *state);
+	int (*clock_setdivider)(u32 clock_id, u32 divider);
+	int (*clock_getdivider)(u32 clock_id, u32 *divider);
+	int (*clock_setrate)(u32 clock_id, u64 rate);
+	int (*clock_getrate)(u32 clock_id, u64 *rate);
+	int (*clock_setparent)(u32 clock_id, u32 parent_id);
+	int (*clock_getparent)(u32 clock_id, u32 *parent_id);
 };
 
 #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
-- 
cgit v1.2.3


From 2ba3c43f09c50eb1c0472decdfba71010d8694dc Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Tue, 4 Sep 2018 15:47:17 +0530
Subject: phy: qcom-ufs: Remove stale methods that handle ref clk

Remove ufs_qcom_phy_enable/(disable)_dev_ref_clk() that
are not being used by any code.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/qualcomm/phy-qcom-ufs.c | 50 -------------------------------------
 include/linux/phy/phy-qcom-ufs.h    | 14 -----------
 2 files changed, 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/phy/qualcomm/phy-qcom-ufs.c b/drivers/phy/qualcomm/phy-qcom-ufs.c
index c5493ea51282..f2979ccad00a 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-ufs.c
@@ -431,56 +431,6 @@ static void ufs_qcom_phy_disable_ref_clk(struct ufs_qcom_phy *phy)
 	}
 }
 
-#define UFS_REF_CLK_EN	(1 << 5)
-
-static void ufs_qcom_phy_dev_ref_clk_ctrl(struct phy *generic_phy, bool enable)
-{
-	struct ufs_qcom_phy *phy = get_ufs_qcom_phy(generic_phy);
-
-	if (phy->dev_ref_clk_ctrl_mmio &&
-	    (enable ^ phy->is_dev_ref_clk_enabled)) {
-		u32 temp = readl_relaxed(phy->dev_ref_clk_ctrl_mmio);
-
-		if (enable)
-			temp |= UFS_REF_CLK_EN;
-		else
-			temp &= ~UFS_REF_CLK_EN;
-
-		/*
-		 * If we are here to disable this clock immediately after
-		 * entering into hibern8, we need to make sure that device
-		 * ref_clk is active atleast 1us after the hibern8 enter.
-		 */
-		if (!enable)
-			udelay(1);
-
-		writel_relaxed(temp, phy->dev_ref_clk_ctrl_mmio);
-		/* ensure that ref_clk is enabled/disabled before we return */
-		wmb();
-		/*
-		 * If we call hibern8 exit after this, we need to make sure that
-		 * device ref_clk is stable for atleast 1us before the hibern8
-		 * exit command.
-		 */
-		if (enable)
-			udelay(1);
-
-		phy->is_dev_ref_clk_enabled = enable;
-	}
-}
-
-void ufs_qcom_phy_enable_dev_ref_clk(struct phy *generic_phy)
-{
-	ufs_qcom_phy_dev_ref_clk_ctrl(generic_phy, true);
-}
-EXPORT_SYMBOL_GPL(ufs_qcom_phy_enable_dev_ref_clk);
-
-void ufs_qcom_phy_disable_dev_ref_clk(struct phy *generic_phy)
-{
-	ufs_qcom_phy_dev_ref_clk_ctrl(generic_phy, false);
-}
-EXPORT_SYMBOL_GPL(ufs_qcom_phy_disable_dev_ref_clk);
-
 /* Turn ON M-PHY RMMI interface clocks */
 static int ufs_qcom_phy_enable_iface_clk(struct ufs_qcom_phy *phy)
 {
diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h
index 0a2c18a9771d..9dd85071bcce 100644
--- a/include/linux/phy/phy-qcom-ufs.h
+++ b/include/linux/phy/phy-qcom-ufs.h
@@ -17,20 +17,6 @@
 
 #include "phy.h"
 
-/**
- * ufs_qcom_phy_enable_dev_ref_clk() - Enable the device
- * ref clock.
- * @phy: reference to a generic phy.
- */
-void ufs_qcom_phy_enable_dev_ref_clk(struct phy *phy);
-
-/**
- * ufs_qcom_phy_disable_dev_ref_clk() - Disable the device
- * ref clock.
- * @phy: reference to a generic phy.
- */
-void ufs_qcom_phy_disable_dev_ref_clk(struct phy *phy);
-
 int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes);
 void ufs_qcom_phy_save_controller_version(struct phy *phy,
 			u8 major, u16 minor, u16 step);
-- 
cgit v1.2.3


From 1e1e465c6d23aa7d1858eb2894408f15770af16c Mon Sep 17 00:00:00 2001
From: Vivek Gautam <vivek.gautam@codeaurora.org>
Date: Tue, 4 Sep 2018 15:47:18 +0530
Subject: scsi/ufs: qcom: Remove ufs_qcom_phy_*() calls from host

The host makes direct calls into phy using ufs_qcom_phy_*()
APIs. These APIs are only defined for 20nm qcom-ufs-qmp phy
which is not being used by any architecture as yet. Future
architectures too are not going to use 20nm ufs phy.
So remove these ufs_qcom_phy_*() calls from host to let further
change declare the 20nm phy as broken.
Also remove couple of stale enum defines for ufs phy.

Signed-off-by: Vivek Gautam <vivek.gautam@codeaurora.org>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/qualcomm/phy-qcom-ufs-i.h |  2 +-
 drivers/scsi/ufs/ufs-qcom.c           | 28 +---------------------------
 drivers/scsi/ufs/ufs-qcom.h           |  5 -----
 include/linux/phy/phy-qcom-ufs.h      | 24 ------------------------
 4 files changed, 2 insertions(+), 57 deletions(-)
 delete mode 100644 include/linux/phy/phy-qcom-ufs.h

(limited to 'include/linux')

diff --git a/drivers/phy/qualcomm/phy-qcom-ufs-i.h b/drivers/phy/qualcomm/phy-qcom-ufs-i.h
index 822c83b8efcd..681644e43248 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs-i.h
+++ b/drivers/phy/qualcomm/phy-qcom-ufs-i.h
@@ -17,9 +17,9 @@
 
 #include <linux/module.h>
 #include <linux/clk.h>
+#include <linux/phy/phy.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
-#include <linux/phy/phy-qcom-ufs.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/delay.h>
diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index 75ee5906b966..3dc4501c6945 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -16,7 +16,6 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
-#include <linux/phy/phy-qcom-ufs.h>
 
 #include "ufshcd.h"
 #include "ufshcd-pltfrm.h"
@@ -189,22 +188,9 @@ out:
 
 static int ufs_qcom_link_startup_post_change(struct ufs_hba *hba)
 {
-	struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-	struct phy *phy = host->generic_phy;
 	u32 tx_lanes;
-	int err = 0;
-
-	err = ufs_qcom_get_connected_tx_lanes(hba, &tx_lanes);
-	if (err)
-		goto out;
 
-	err = ufs_qcom_phy_set_tx_lane_enable(phy, tx_lanes);
-	if (err)
-		dev_err(hba->dev, "%s: ufs_qcom_phy_set_tx_lane_enable failed\n",
-			__func__);
-
-out:
-	return err;
+	return ufs_qcom_get_connected_tx_lanes(hba, &tx_lanes);
 }
 
 static int ufs_qcom_check_hibern8(struct ufs_hba *hba)
@@ -932,10 +918,8 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
 {
 	u32 val;
 	struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-	struct phy *phy = host->generic_phy;
 	struct ufs_qcom_dev_params ufs_qcom_cap;
 	int ret = 0;
-	int res = 0;
 
 	if (!dev_req_params) {
 		pr_err("%s: incoming dev_req_params is NULL\n", __func__);
@@ -1002,12 +986,6 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba,
 		}
 
 		val = ~(MAX_U32 << dev_req_params->lane_tx);
-		res = ufs_qcom_phy_set_tx_lane_enable(phy, val);
-		if (res) {
-			dev_err(hba->dev, "%s: ufs_qcom_phy_set_tx_lane_enable() failed res = %d\n",
-				__func__, res);
-			ret = res;
-		}
 
 		/* cache the power mode parameters to use internally */
 		memcpy(&host->dev_req_params,
@@ -1264,10 +1242,6 @@ static int ufs_qcom_init(struct ufs_hba *hba)
 		}
 	}
 
-	/* update phy revision information before calling phy_init() */
-	ufs_qcom_phy_save_controller_version(host->generic_phy,
-		host->hw_ver.major, host->hw_ver.minor, host->hw_ver.step);
-
 	err = ufs_qcom_init_lane_clks(host);
 	if (err)
 		goto out_variant_clear;
diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h
index 295f4bef6a0e..c114826316eb 100644
--- a/drivers/scsi/ufs/ufs-qcom.h
+++ b/drivers/scsi/ufs/ufs-qcom.h
@@ -129,11 +129,6 @@ enum {
 	MASK_CLK_NS_REG                     = 0xFFFC00,
 };
 
-enum ufs_qcom_phy_init_type {
-	UFS_PHY_INIT_FULL,
-	UFS_PHY_INIT_CFG_RESTORE,
-};
-
 /* QCOM UFS debug print bit mask */
 #define UFS_QCOM_DBG_PRINT_REGS_EN	BIT(0)
 #define UFS_QCOM_DBG_PRINT_ICE_REGS_EN	BIT(1)
diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h
deleted file mode 100644
index 9dd85071bcce..000000000000
--- a/include/linux/phy/phy-qcom-ufs.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef PHY_QCOM_UFS_H_
-#define PHY_QCOM_UFS_H_
-
-#include "phy.h"
-
-int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes);
-void ufs_qcom_phy_save_controller_version(struct phy *phy,
-			u8 major, u16 minor, u16 step);
-
-#endif /* PHY_QCOM_UFS_H_ */
-- 
cgit v1.2.3


From 71e0940d52e107748b270213a01d3b1546657d74 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 21 Sep 2018 09:32:44 -0700
Subject: efi: honour memory reservations passed via a linux specific config
 table

In order to allow the OS to reserve memory persistently across a
kexec, introduce a Linux-specific UEFI configuration table that
points to the head of a linked list in memory, allowing each kernel
to add list items describing memory regions that the next kernel
should treat as reserved.

This is useful, e.g., for GICv3 based ARM systems that cannot disable
DMA access to the LPI tables, forcing them to reuse the same memory
region again after a kexec reboot.

Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 drivers/firmware/efi/efi.c | 27 ++++++++++++++++++++++++++-
 include/linux/efi.h        |  8 ++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 2a29dd9c986d..688132ac8a0a 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -52,7 +52,8 @@ struct efi __read_mostly efi = {
 	.properties_table	= EFI_INVALID_TABLE_ADDR,
 	.mem_attr_table		= EFI_INVALID_TABLE_ADDR,
 	.rng_seed		= EFI_INVALID_TABLE_ADDR,
-	.tpm_log		= EFI_INVALID_TABLE_ADDR
+	.tpm_log		= EFI_INVALID_TABLE_ADDR,
+	.mem_reserve		= EFI_INVALID_TABLE_ADDR,
 };
 EXPORT_SYMBOL(efi);
 
@@ -484,6 +485,7 @@ static __initdata efi_config_table_type_t common_tables[] = {
 	{EFI_MEMORY_ATTRIBUTES_TABLE_GUID, "MEMATTR", &efi.mem_attr_table},
 	{LINUX_EFI_RANDOM_SEED_TABLE_GUID, "RNG", &efi.rng_seed},
 	{LINUX_EFI_TPM_EVENT_LOG_GUID, "TPMEventLog", &efi.tpm_log},
+	{LINUX_EFI_MEMRESERVE_TABLE_GUID, "MEMRESERVE", &efi.mem_reserve},
 	{NULL_GUID, NULL, NULL},
 };
 
@@ -591,6 +593,29 @@ int __init efi_config_parse_tables(void *config_tables, int count, int sz,
 		early_memunmap(tbl, sizeof(*tbl));
 	}
 
+	if (efi.mem_reserve != EFI_INVALID_TABLE_ADDR) {
+		unsigned long prsv = efi.mem_reserve;
+
+		while (prsv) {
+			struct linux_efi_memreserve *rsv;
+
+			/* reserve the entry itself */
+			memblock_reserve(prsv, sizeof(*rsv));
+
+			rsv = early_memremap(prsv, sizeof(*rsv));
+			if (rsv == NULL) {
+				pr_err("Could not map UEFI memreserve entry!\n");
+				return -ENOMEM;
+			}
+
+			if (rsv->size)
+				memblock_reserve(rsv->base, rsv->size);
+
+			prsv = rsv->next;
+			early_memunmap(rsv, sizeof(*rsv));
+		}
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 401e4b254e30..a5cb580472c5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -672,6 +672,7 @@ void efi_native_runtime_setup(void);
 #define LINUX_EFI_LOADER_ENTRY_GUID		EFI_GUID(0x4a67b082, 0x0a4c, 0x41cf,  0xb6, 0xc7, 0x44, 0x0b, 0x29, 0xbb, 0x8c, 0x4f)
 #define LINUX_EFI_RANDOM_SEED_TABLE_GUID	EFI_GUID(0x1ce1e5bc, 0x7ceb, 0x42f2,  0x81, 0xe5, 0x8a, 0xad, 0xf1, 0x80, 0xf5, 0x7b)
 #define LINUX_EFI_TPM_EVENT_LOG_GUID		EFI_GUID(0xb7799cb0, 0xeca2, 0x4943,  0x96, 0x67, 0x1f, 0xae, 0x07, 0xb7, 0x47, 0xfa)
+#define LINUX_EFI_MEMRESERVE_TABLE_GUID		EFI_GUID(0x888eb0c6, 0x8ede, 0x4ff5,  0xa8, 0xf0, 0x9a, 0xee, 0x5c, 0xb9, 0x77, 0xc2)
 
 typedef struct {
 	efi_guid_t guid;
@@ -957,6 +958,7 @@ extern struct efi {
 	unsigned long mem_attr_table;	/* memory attributes table */
 	unsigned long rng_seed;		/* UEFI firmware random seed */
 	unsigned long tpm_log;		/* TPM2 Event Log table */
+	unsigned long mem_reserve;	/* Linux EFI memreserve table */
 	efi_get_time_t *get_time;
 	efi_set_time_t *set_time;
 	efi_get_wakeup_time_t *get_wakeup_time;
@@ -1662,4 +1664,10 @@ extern int efi_tpm_eventlog_init(void);
 /* Workqueue to queue EFI Runtime Services */
 extern struct workqueue_struct *efi_rts_wq;
 
+struct linux_efi_memreserve {
+	phys_addr_t	next;
+	phys_addr_t	base;
+	phys_addr_t	size;
+};
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From a23d3bb05ccbd815c79293d2207fedede0b3515d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Fri, 21 Sep 2018 09:32:46 -0700
Subject: efi: add API to reserve memory persistently across kexec reboot

Add kernel plumbing to reserve memory regions persistently on a EFI
system by adding entries to the MEMRESERVE linked list.

Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 drivers/firmware/efi/efi.c | 32 ++++++++++++++++++++++++++++++++
 include/linux/efi.h        |  1 +
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 688132ac8a0a..249eb70691b0 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -962,6 +962,38 @@ bool efi_is_table_address(unsigned long phys_addr)
 	return false;
 }
 
+static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock);
+
+int efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
+{
+	struct linux_efi_memreserve *rsv, *parent;
+
+	if (efi.mem_reserve == EFI_INVALID_TABLE_ADDR)
+		return -ENODEV;
+
+	rsv = kmalloc(sizeof(*rsv), GFP_KERNEL);
+	if (!rsv)
+		return -ENOMEM;
+
+	parent = memremap(efi.mem_reserve, sizeof(*rsv), MEMREMAP_WB);
+	if (!parent) {
+		kfree(rsv);
+		return -ENOMEM;
+	}
+
+	rsv->base = addr;
+	rsv->size = size;
+
+	spin_lock(&efi_mem_reserve_persistent_lock);
+	rsv->next = parent->next;
+	parent->next = __pa(rsv);
+	spin_unlock(&efi_mem_reserve_persistent_lock);
+
+	memunmap(parent);
+
+	return 0;
+}
+
 #ifdef CONFIG_KEXEC
 static int update_efi_random_seed(struct notifier_block *nb,
 				  unsigned long code, void *unused)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index a5cb580472c5..22e4de9d3700 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1043,6 +1043,7 @@ extern int __init efi_uart_console_only (void);
 extern u64 efi_mem_desc_end(efi_memory_desc_t *md);
 extern int efi_mem_desc_lookup(u64 phys_addr, efi_memory_desc_t *out_md);
 extern void efi_mem_reserve(phys_addr_t addr, u64 size);
+extern int efi_mem_reserve_persistent(phys_addr_t addr, u64 size);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
 		struct resource *data_resource, struct resource *bss_resource);
 extern void efi_reserve_boot_services(void);
-- 
cgit v1.2.3


From 9dbbedaa6171247c4c7c40b83f05b200a117c2e0 Mon Sep 17 00:00:00 2001
From: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Date: Tue, 11 Sep 2018 12:15:21 -0700
Subject: efi: Make efi_rts_work accessible to efi page fault handler

After the kernel has booted, if any accesses by firmware causes a page
fault, the efi page fault handler would freeze efi_rts_wq and schedules
a new process. To do this, the efi page fault handler needs
efi_rts_work. Hence, make it accessible.

There will be no race conditions in accessing this structure, because
all the calls to efi runtime services are already serialized.

Tested-by: Bhupesh Sharma <bhsharma@redhat.com>
Suggested-by: Matt Fleming <matt@codeblueprint.co.uk>
Based-on-code-from: Ricardo Neri <ricardo.neri@intel.com>
Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 drivers/firmware/efi/runtime-wrappers.c | 53 ++++++---------------------------
 include/linux/efi.h                     | 36 ++++++++++++++++++++++
 2 files changed, 45 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index aa66cbf23512..b18b2d864c2c 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -45,39 +45,7 @@
 #define __efi_call_virt(f, args...) \
 	__efi_call_virt_pointer(efi.systab->runtime, f, args)
 
-/* efi_runtime_service() function identifiers */
-enum efi_rts_ids {
-	GET_TIME,
-	SET_TIME,
-	GET_WAKEUP_TIME,
-	SET_WAKEUP_TIME,
-	GET_VARIABLE,
-	GET_NEXT_VARIABLE,
-	SET_VARIABLE,
-	QUERY_VARIABLE_INFO,
-	GET_NEXT_HIGH_MONO_COUNT,
-	UPDATE_CAPSULE,
-	QUERY_CAPSULE_CAPS,
-};
-
-/*
- * efi_runtime_work:	Details of EFI Runtime Service work
- * @arg<1-5>:		EFI Runtime Service function arguments
- * @status:		Status of executing EFI Runtime Service
- * @efi_rts_id:		EFI Runtime Service function identifier
- * @efi_rts_comp:	Struct used for handling completions
- */
-struct efi_runtime_work {
-	void *arg1;
-	void *arg2;
-	void *arg3;
-	void *arg4;
-	void *arg5;
-	efi_status_t status;
-	struct work_struct work;
-	enum efi_rts_ids efi_rts_id;
-	struct completion efi_rts_comp;
-};
+struct efi_runtime_work efi_rts_work;
 
 /*
  * efi_queue_work:	Queue efi_runtime_service() and wait until it's done
@@ -91,7 +59,6 @@ struct efi_runtime_work {
  */
 #define efi_queue_work(_rts, _arg1, _arg2, _arg3, _arg4, _arg5)		\
 ({									\
-	struct efi_runtime_work efi_rts_work;				\
 	efi_rts_work.status = EFI_ABORTED;				\
 									\
 	init_completion(&efi_rts_work.efi_rts_comp);			\
@@ -184,18 +151,16 @@ static DEFINE_SEMAPHORE(efi_runtime_lock);
  */
 static void efi_call_rts(struct work_struct *work)
 {
-	struct efi_runtime_work *efi_rts_work;
 	void *arg1, *arg2, *arg3, *arg4, *arg5;
 	efi_status_t status = EFI_NOT_FOUND;
 
-	efi_rts_work = container_of(work, struct efi_runtime_work, work);
-	arg1 = efi_rts_work->arg1;
-	arg2 = efi_rts_work->arg2;
-	arg3 = efi_rts_work->arg3;
-	arg4 = efi_rts_work->arg4;
-	arg5 = efi_rts_work->arg5;
+	arg1 = efi_rts_work.arg1;
+	arg2 = efi_rts_work.arg2;
+	arg3 = efi_rts_work.arg3;
+	arg4 = efi_rts_work.arg4;
+	arg5 = efi_rts_work.arg5;
 
-	switch (efi_rts_work->efi_rts_id) {
+	switch (efi_rts_work.efi_rts_id) {
 	case GET_TIME:
 		status = efi_call_virt(get_time, (efi_time_t *)arg1,
 				       (efi_time_cap_t *)arg2);
@@ -253,8 +218,8 @@ static void efi_call_rts(struct work_struct *work)
 		 */
 		pr_err("Requested executing invalid EFI Runtime Service.\n");
 	}
-	efi_rts_work->status = status;
-	complete(&efi_rts_work->efi_rts_comp);
+	efi_rts_work.status = status;
+	complete(&efi_rts_work.efi_rts_comp);
 }
 
 static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 22e4de9d3700..a929d2bf41fa 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1662,6 +1662,42 @@ struct linux_efi_tpm_eventlog {
 
 extern int efi_tpm_eventlog_init(void);
 
+/* efi_runtime_service() function identifiers */
+enum efi_rts_ids {
+	GET_TIME,
+	SET_TIME,
+	GET_WAKEUP_TIME,
+	SET_WAKEUP_TIME,
+	GET_VARIABLE,
+	GET_NEXT_VARIABLE,
+	SET_VARIABLE,
+	QUERY_VARIABLE_INFO,
+	GET_NEXT_HIGH_MONO_COUNT,
+	UPDATE_CAPSULE,
+	QUERY_CAPSULE_CAPS,
+};
+
+/*
+ * efi_runtime_work:	Details of EFI Runtime Service work
+ * @arg<1-5>:		EFI Runtime Service function arguments
+ * @status:		Status of executing EFI Runtime Service
+ * @efi_rts_id:		EFI Runtime Service function identifier
+ * @efi_rts_comp:	Struct used for handling completions
+ */
+struct efi_runtime_work {
+	void *arg1;
+	void *arg2;
+	void *arg3;
+	void *arg4;
+	void *arg5;
+	efi_status_t status;
+	struct work_struct work;
+	enum efi_rts_ids efi_rts_id;
+	struct completion efi_rts_comp;
+};
+
+extern struct efi_runtime_work efi_rts_work;
+
 /* Workqueue to queue EFI Runtime Services */
 extern struct workqueue_struct *efi_rts_wq;
 
-- 
cgit v1.2.3


From 3425d934fc0312f62024163736a7afe4de20c10f Mon Sep 17 00:00:00 2001
From: Sai Praneeth <sai.praneeth.prakhya@intel.com>
Date: Tue, 11 Sep 2018 12:15:22 -0700
Subject: efi/x86: Handle page faults occurring while running EFI runtime
 services

Memory accesses performed by UEFI runtime services should be limited to:
- reading/executing from EFI_RUNTIME_SERVICES_CODE memory regions
- reading/writing from/to EFI_RUNTIME_SERVICES_DATA memory regions
- reading/writing by-ref arguments
- reading/writing from/to the stack.

Accesses outside these regions may cause the kernel to hang because the
memory region requested by the firmware isn't mapped in efi_pgd, which
causes a page fault in ring 0 and the kernel fails to handle it, leading
to die(). To save kernel from hanging, add an EFI specific page fault
handler which recovers from such faults by
1. If the efi runtime service is efi_reset_system(), reboot the machine
   through BIOS.
2. If the efi runtime service is _not_ efi_reset_system(), then freeze
   efi_rts_wq and schedule a new process.

The EFI page fault handler offers us two advantages:
1. Avoid potential hangs caused by buggy firmware.
2. Shout loud that the firmware is buggy and hence is not a kernel bug.

Tested-by: Bhupesh Sharma <bhsharma@redhat.com>
Suggested-by: Matt Fleming <matt@codeblueprint.co.uk>
Based-on-code-from: Ricardo Neri <ricardo.neri@intel.com>
Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
[ardb: clarify commit log]
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/x86/include/asm/efi.h              |  1 +
 arch/x86/mm/fault.c                     |  9 ++++
 arch/x86/platform/efi/quirks.c          | 78 +++++++++++++++++++++++++++++++++
 drivers/firmware/efi/runtime-wrappers.c |  8 ++++
 include/linux/efi.h                     |  8 +++-
 5 files changed, 103 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index cec5fae23eb3..eea40d52ca78 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -140,6 +140,7 @@ extern void __init efi_apply_memmap_quirks(void);
 extern int __init efi_reuse_config(u64 tables, int nr_tables);
 extern void efi_delete_dummy_variable(void);
 extern void efi_switch_mm(struct mm_struct *mm);
+extern void efi_recover_from_page_fault(unsigned long phys_addr);
 
 struct efi_setup_data {
 	u64 fw_vendor;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 47bebfe6efa7..a5b9ddb0f1fe 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -16,6 +16,7 @@
 #include <linux/prefetch.h>		/* prefetchw			*/
 #include <linux/context_tracking.h>	/* exception_enter(), ...	*/
 #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
+#include <linux/efi.h>			/* efi_recover_from_page_fault()*/
 #include <linux/mm_types.h>
 
 #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
@@ -25,6 +26,7 @@
 #include <asm/vsyscall.h>		/* emulate_vsyscall		*/
 #include <asm/vm86.h>			/* struct vm86			*/
 #include <asm/mmu_context.h>		/* vma_pkey()			*/
+#include <asm/efi.h>			/* efi_recover_from_page_fault()*/
 
 #define CREATE_TRACE_POINTS
 #include <asm/trace/exceptions.h>
@@ -788,6 +790,13 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	if (is_errata93(regs, address))
 		return;
 
+	/*
+	 * Buggy firmware could access regions which might page fault, try to
+	 * recover from such faults.
+	 */
+	if (IS_ENABLED(CONFIG_EFI))
+		efi_recover_from_page_fault(address);
+
 	/*
 	 * Oops. The kernel tried to access some bad page. We'll have to
 	 * terminate things with extreme prejudice:
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 844d31cb8a0c..669babcaf245 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -16,6 +16,7 @@
 #include <asm/efi.h>
 #include <asm/uv/uv.h>
 #include <asm/cpu_device_id.h>
+#include <asm/reboot.h>
 
 #define EFI_MIN_RESERVE 5120
 
@@ -654,3 +655,80 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
 }
 
 #endif
+
+/*
+ * If any access by any efi runtime service causes a page fault, then,
+ * 1. If it's efi_reset_system(), reboot through BIOS.
+ * 2. If any other efi runtime service, then
+ *    a. Return error status to the efi caller process.
+ *    b. Disable EFI Runtime Services forever and
+ *    c. Freeze efi_rts_wq and schedule new process.
+ *
+ * @return: Returns, if the page fault is not handled. This function
+ * will never return if the page fault is handled successfully.
+ */
+void efi_recover_from_page_fault(unsigned long phys_addr)
+{
+	if (!IS_ENABLED(CONFIG_X86_64))
+		return;
+
+	/*
+	 * Make sure that an efi runtime service caused the page fault.
+	 * "efi_mm" cannot be used to check if the page fault had occurred
+	 * in the firmware context because efi=old_map doesn't use efi_pgd.
+	 */
+	if (efi_rts_work.efi_rts_id == NONE)
+		return;
+
+	/*
+	 * Address range 0x0000 - 0x0fff is always mapped in the efi_pgd, so
+	 * page faulting on these addresses isn't expected.
+	 */
+	if (phys_addr >= 0x0000 && phys_addr <= 0x0fff)
+		return;
+
+	/*
+	 * Print stack trace as it might be useful to know which EFI Runtime
+	 * Service is buggy.
+	 */
+	WARN(1, FW_BUG "Page fault caused by firmware at PA: 0x%lx\n",
+	     phys_addr);
+
+	/*
+	 * Buggy efi_reset_system() is handled differently from other EFI
+	 * Runtime Services as it doesn't use efi_rts_wq. Although,
+	 * native_machine_emergency_restart() says that machine_real_restart()
+	 * could fail, it's better not to compilcate this fault handler
+	 * because this case occurs *very* rarely and hence could be improved
+	 * on a need by basis.
+	 */
+	if (efi_rts_work.efi_rts_id == RESET_SYSTEM) {
+		pr_info("efi_reset_system() buggy! Reboot through BIOS\n");
+		machine_real_restart(MRR_BIOS);
+		return;
+	}
+
+	/*
+	 * Before calling EFI Runtime Service, the kernel has switched the
+	 * calling process to efi_mm. Hence, switch back to task_mm.
+	 */
+	arch_efi_call_virt_teardown();
+
+	/* Signal error status to the efi caller process */
+	efi_rts_work.status = EFI_ABORTED;
+	complete(&efi_rts_work.efi_rts_comp);
+
+	clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
+	pr_info("Froze efi_rts_wq and disabled EFI Runtime Services\n");
+
+	/*
+	 * Call schedule() in an infinite loop, so that any spurious wake ups
+	 * will never run efi_rts_wq again.
+	 */
+	for (;;) {
+		set_current_state(TASK_IDLE);
+		schedule();
+	}
+
+	return;
+}
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index b18b2d864c2c..a19d845bdb06 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -61,6 +61,11 @@ struct efi_runtime_work efi_rts_work;
 ({									\
 	efi_rts_work.status = EFI_ABORTED;				\
 									\
+	if (!efi_enabled(EFI_RUNTIME_SERVICES)) {			\
+		pr_warn_once("EFI Runtime Services are disabled!\n");	\
+		goto exit;						\
+	}								\
+									\
 	init_completion(&efi_rts_work.efi_rts_comp);			\
 	INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts);		\
 	efi_rts_work.arg1 = _arg1;					\
@@ -79,6 +84,8 @@ struct efi_runtime_work efi_rts_work;
 	else								\
 		pr_err("Failed to queue work to efi_rts_wq.\n");	\
 									\
+exit:									\
+	efi_rts_work.efi_rts_id = NONE;					\
 	efi_rts_work.status;						\
 })
 
@@ -393,6 +400,7 @@ static void virt_efi_reset_system(int reset_type,
 			"could not get exclusive access to the firmware\n");
 		return;
 	}
+	efi_rts_work.efi_rts_id = RESET_SYSTEM;
 	__efi_call_virt(reset_system, reset_type, status, data_size, data);
 	up(&efi_runtime_lock);
 }
diff --git a/include/linux/efi.h b/include/linux/efi.h
index a929d2bf41fa..845174e113ce 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1662,8 +1662,13 @@ struct linux_efi_tpm_eventlog {
 
 extern int efi_tpm_eventlog_init(void);
 
-/* efi_runtime_service() function identifiers */
+/*
+ * efi_runtime_service() function identifiers.
+ * "NONE" is used by efi_recover_from_page_fault() to check if the page
+ * fault happened while executing an efi runtime service.
+ */
 enum efi_rts_ids {
+	NONE,
 	GET_TIME,
 	SET_TIME,
 	GET_WAKEUP_TIME,
@@ -1673,6 +1678,7 @@ enum efi_rts_ids {
 	SET_VARIABLE,
 	QUERY_VARIABLE_INFO,
 	GET_NEXT_HIGH_MONO_COUNT,
+	RESET_SYSTEM,
 	UPDATE_CAPSULE,
 	QUERY_CAPSULE_CAPS,
 };
-- 
cgit v1.2.3


From dbfe2953f63c640463c630746cd5d9de8b2f63ae Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jkosina@suse.cz>
Date: Tue, 25 Sep 2018 14:38:18 +0200
Subject: x86/speculation: Apply IBPB more strictly to avoid cross-process data
 leak

Currently, IBPB is only issued in cases when switching into a non-dumpable
process, the rationale being to protect such 'important and security
sensitive' processess (such as GPG) from data leaking into a different
userspace process via spectre v2.

This is however completely insufficient to provide proper userspace-to-userpace
spectrev2 protection, as any process can poison branch buffers before being
scheduled out, and the newly scheduled process immediately becomes spectrev2
victim.

In order to minimize the performance impact (for usecases that do require
spectrev2 protection), issue the barrier only in cases when switching between
processess where the victim can't be ptraced by the potential attacker (as in
such cases, the attacker doesn't have to bother with branch buffers at all).

[ tglx: Split up PTRACE_MODE_NOACCESS_CHK into PTRACE_MODE_SCHED and
  PTRACE_MODE_IBPB to be able to do ptrace() context tracking reasonably
  fine-grained ]

Fixes: 18bf3c3ea8 ("x86/speculation: Use Indirect Branch Prediction Barrier in context switch")
Originally-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc:  "WoodhouseDavid" <dwmw@amazon.co.uk>
Cc: Andi Kleen <ak@linux.intel.com>
Cc:  "SchauflerCasey" <casey.schaufler@intel.com>
Link: https://lkml.kernel.org/r/nycvar.YFH.7.76.1809251437340.15880@cbobk.fhfr.pm
---
 arch/x86/mm/tlb.c      | 31 ++++++++++++++++++++-----------
 include/linux/ptrace.h | 21 +++++++++++++++++++--
 kernel/ptrace.c        | 10 ++++++++++
 3 files changed, 49 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index e96b99eb800c..073b8df349a0 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -7,6 +7,7 @@
 #include <linux/export.h>
 #include <linux/cpu.h>
 #include <linux/debugfs.h>
+#include <linux/ptrace.h>
 
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
@@ -180,6 +181,19 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
 	}
 }
 
+static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id)
+{
+	/*
+	 * Check if the current (previous) task has access to the memory
+	 * of the @tsk (next) task. If access is denied, make sure to
+	 * issue a IBPB to stop user->user Spectre-v2 attacks.
+	 *
+	 * Note: __ptrace_may_access() returns 0 or -ERRNO.
+	 */
+	return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id &&
+		ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB));
+}
+
 void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 			struct task_struct *tsk)
 {
@@ -262,18 +276,13 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		 * one process from doing Spectre-v2 attacks on another.
 		 *
 		 * As an optimization, flush indirect branches only when
-		 * switching into processes that disable dumping. This
-		 * protects high value processes like gpg, without having
-		 * too high performance overhead. IBPB is *expensive*!
-		 *
-		 * This will not flush branches when switching into kernel
-		 * threads. It will also not flush if we switch to idle
-		 * thread and back to the same process. It will flush if we
-		 * switch to a different non-dumpable process.
+		 * switching into a processes that can't be ptrace by the
+		 * current one (as in such case, attacker has much more
+		 * convenient way how to tamper with the next process than
+		 * branch buffer poisoning).
 		 */
-		if (tsk && tsk->mm &&
-		    tsk->mm->context.ctx_id != last_ctx_id &&
-		    get_dumpable(tsk->mm) != SUID_DUMP_USER)
+		if (static_cpu_has(X86_FEATURE_USE_IBPB) &&
+				ibpb_needed(tsk, last_ctx_id))
 			indirect_branch_prediction_barrier();
 
 		if (IS_ENABLED(CONFIG_VMAP_STACK)) {
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 4f36431c380b..e5e5ef513df3 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -62,14 +62,17 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ	0x01
 #define PTRACE_MODE_ATTACH	0x02
 #define PTRACE_MODE_NOAUDIT	0x04
-#define PTRACE_MODE_FSCREDS 0x08
-#define PTRACE_MODE_REALCREDS 0x10
+#define PTRACE_MODE_FSCREDS	0x08
+#define PTRACE_MODE_REALCREDS	0x10
+#define PTRACE_MODE_SCHED	0x20
+#define PTRACE_MODE_IBPB	0x40
 
 /* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */
 #define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS)
 #define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS)
 #define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS)
 #define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS)
+#define PTRACE_MODE_SPEC_IBPB (PTRACE_MODE_ATTACH_REALCREDS | PTRACE_MODE_IBPB)
 
 /**
  * ptrace_may_access - check whether the caller is permitted to access
@@ -87,6 +90,20 @@ extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
  */
 extern bool ptrace_may_access(struct task_struct *task, unsigned int mode);
 
+/**
+ * ptrace_may_access - check whether the caller is permitted to access
+ * a target task.
+ * @task: target task
+ * @mode: selects type of access and caller credentials
+ *
+ * Returns true on success, false on denial.
+ *
+ * Similar to ptrace_may_access(). Only to be called from context switch
+ * code. Does not call into audit and the regular LSM hooks due to locking
+ * constraints.
+ */
+extern bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode);
+
 static inline int ptrace_reparented(struct task_struct *child)
 {
 	return !same_thread_group(child->real_parent, child->parent);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 21fec73d45d4..99cfddde6a55 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -261,6 +261,9 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state)
 
 static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode)
 {
+	if (mode & PTRACE_MODE_SCHED)
+		return false;
+
 	if (mode & PTRACE_MODE_NOAUDIT)
 		return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE);
 	else
@@ -328,9 +331,16 @@ ok:
 	     !ptrace_has_cap(mm->user_ns, mode)))
 	    return -EPERM;
 
+	if (mode & PTRACE_MODE_SCHED)
+		return 0;
 	return security_ptrace_access_check(task, mode);
 }
 
+bool ptrace_may_access_sched(struct task_struct *task, unsigned int mode)
+{
+	return __ptrace_may_access(task, mode | PTRACE_MODE_SCHED);
+}
+
 bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 {
 	int err;
-- 
cgit v1.2.3


From bca6b067b0b269a7b8ba129e2a918309ca8b4a55 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 26 Sep 2018 14:01:03 -0700
Subject: block: Move power management code into a new source file

Move the code for runtime power management from blk-core.c into the
new source file blk-pm.c. Move the corresponding declarations from
<linux/blkdev.h> into <linux/blk-pm.h>. For CONFIG_PM=n, leave out
the declarations of the functions that are not used in that mode.
This patch not only reduces the number of #ifdefs in the block layer
core code but also reduces the size of header file <linux/blkdev.h>
and hence should help to reduce the build time of the Linux kernel
if CONFIG_PM is not defined.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig          |   3 +
 block/Makefile         |   1 +
 block/blk-core.c       | 196 +------------------------------------------------
 block/blk-pm.c         | 188 +++++++++++++++++++++++++++++++++++++++++++++++
 block/blk-pm.h         |  43 +++++++++++
 block/elevator.c       |  22 +-----
 drivers/scsi/scsi_pm.c |   1 +
 drivers/scsi/sd.c      |   1 +
 drivers/scsi/sr.c      |   1 +
 include/linux/blk-pm.h |  24 ++++++
 include/linux/blkdev.h |  23 ------
 11 files changed, 264 insertions(+), 239 deletions(-)
 create mode 100644 block/blk-pm.c
 create mode 100644 block/blk-pm.h
 create mode 100644 include/linux/blk-pm.h

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index 1f2469a0123c..85263e7bded6 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -228,4 +228,7 @@ config BLK_MQ_RDMA
 	depends on BLOCK && INFINIBAND
 	default y
 
+config BLK_PM
+	def_bool BLOCK && PM
+
 source block/Kconfig.iosched
diff --git a/block/Makefile b/block/Makefile
index 572b33f32c07..27eac600474f 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_WBT)		+= blk-wbt.o
 obj-$(CONFIG_BLK_DEBUG_FS)	+= blk-mq-debugfs.o
 obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o
 obj-$(CONFIG_BLK_SED_OPAL)	+= sed-opal.o
+obj-$(CONFIG_BLK_PM)		+= blk-pm.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 4dbc93f43b38..6d4dd176bd9d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -42,6 +42,7 @@
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-sched.h"
+#include "blk-pm.h"
 #include "blk-rq-qos.h"
 
 #ifdef CONFIG_DEBUG_FS
@@ -1726,16 +1727,6 @@ void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part)
 }
 EXPORT_SYMBOL_GPL(part_round_stats);
 
-#ifdef CONFIG_PM
-static void blk_pm_put_request(struct request *rq)
-{
-	if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending)
-		pm_runtime_mark_last_busy(rq->q->dev);
-}
-#else
-static inline void blk_pm_put_request(struct request *rq) {}
-#endif
-
 void __blk_put_request(struct request_queue *q, struct request *req)
 {
 	req_flags_t rq_flags = req->rq_flags;
@@ -3757,191 +3748,6 @@ void blk_finish_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
-#ifdef CONFIG_PM
-/**
- * blk_pm_runtime_init - Block layer runtime PM initialization routine
- * @q: the queue of the device
- * @dev: the device the queue belongs to
- *
- * Description:
- *    Initialize runtime-PM-related fields for @q and start auto suspend for
- *    @dev. Drivers that want to take advantage of request-based runtime PM
- *    should call this function after @dev has been initialized, and its
- *    request queue @q has been allocated, and runtime PM for it can not happen
- *    yet(either due to disabled/forbidden or its usage_count > 0). In most
- *    cases, driver should call this function before any I/O has taken place.
- *
- *    This function takes care of setting up using auto suspend for the device,
- *    the autosuspend delay is set to -1 to make runtime suspend impossible
- *    until an updated value is either set by user or by driver. Drivers do
- *    not need to touch other autosuspend settings.
- *
- *    The block layer runtime PM is request based, so only works for drivers
- *    that use request as their IO unit instead of those directly use bio's.
- */
-void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
-{
-	/* Don't enable runtime PM for blk-mq until it is ready */
-	if (q->mq_ops) {
-		pm_runtime_disable(dev);
-		return;
-	}
-
-	q->dev = dev;
-	q->rpm_status = RPM_ACTIVE;
-	pm_runtime_set_autosuspend_delay(q->dev, -1);
-	pm_runtime_use_autosuspend(q->dev);
-}
-EXPORT_SYMBOL(blk_pm_runtime_init);
-
-/**
- * blk_pre_runtime_suspend - Pre runtime suspend check
- * @q: the queue of the device
- *
- * Description:
- *    This function will check if runtime suspend is allowed for the device
- *    by examining if there are any requests pending in the queue. If there
- *    are requests pending, the device can not be runtime suspended; otherwise,
- *    the queue's status will be updated to SUSPENDING and the driver can
- *    proceed to suspend the device.
- *
- *    For the not allowed case, we mark last busy for the device so that
- *    runtime PM core will try to autosuspend it some time later.
- *
- *    This function should be called near the start of the device's
- *    runtime_suspend callback.
- *
- * Return:
- *    0		- OK to runtime suspend the device
- *    -EBUSY	- Device should not be runtime suspended
- */
-int blk_pre_runtime_suspend(struct request_queue *q)
-{
-	int ret = 0;
-
-	if (!q->dev)
-		return ret;
-
-	spin_lock_irq(q->queue_lock);
-	if (q->nr_pending) {
-		ret = -EBUSY;
-		pm_runtime_mark_last_busy(q->dev);
-	} else {
-		q->rpm_status = RPM_SUSPENDING;
-	}
-	spin_unlock_irq(q->queue_lock);
-	return ret;
-}
-EXPORT_SYMBOL(blk_pre_runtime_suspend);
-
-/**
- * blk_post_runtime_suspend - Post runtime suspend processing
- * @q: the queue of the device
- * @err: return value of the device's runtime_suspend function
- *
- * Description:
- *    Update the queue's runtime status according to the return value of the
- *    device's runtime suspend function and mark last busy for the device so
- *    that PM core will try to auto suspend the device at a later time.
- *
- *    This function should be called near the end of the device's
- *    runtime_suspend callback.
- */
-void blk_post_runtime_suspend(struct request_queue *q, int err)
-{
-	if (!q->dev)
-		return;
-
-	spin_lock_irq(q->queue_lock);
-	if (!err) {
-		q->rpm_status = RPM_SUSPENDED;
-	} else {
-		q->rpm_status = RPM_ACTIVE;
-		pm_runtime_mark_last_busy(q->dev);
-	}
-	spin_unlock_irq(q->queue_lock);
-}
-EXPORT_SYMBOL(blk_post_runtime_suspend);
-
-/**
- * blk_pre_runtime_resume - Pre runtime resume processing
- * @q: the queue of the device
- *
- * Description:
- *    Update the queue's runtime status to RESUMING in preparation for the
- *    runtime resume of the device.
- *
- *    This function should be called near the start of the device's
- *    runtime_resume callback.
- */
-void blk_pre_runtime_resume(struct request_queue *q)
-{
-	if (!q->dev)
-		return;
-
-	spin_lock_irq(q->queue_lock);
-	q->rpm_status = RPM_RESUMING;
-	spin_unlock_irq(q->queue_lock);
-}
-EXPORT_SYMBOL(blk_pre_runtime_resume);
-
-/**
- * blk_post_runtime_resume - Post runtime resume processing
- * @q: the queue of the device
- * @err: return value of the device's runtime_resume function
- *
- * Description:
- *    Update the queue's runtime status according to the return value of the
- *    device's runtime_resume function. If it is successfully resumed, process
- *    the requests that are queued into the device's queue when it is resuming
- *    and then mark last busy and initiate autosuspend for it.
- *
- *    This function should be called near the end of the device's
- *    runtime_resume callback.
- */
-void blk_post_runtime_resume(struct request_queue *q, int err)
-{
-	if (!q->dev)
-		return;
-
-	spin_lock_irq(q->queue_lock);
-	if (!err) {
-		q->rpm_status = RPM_ACTIVE;
-		__blk_run_queue(q);
-		pm_runtime_mark_last_busy(q->dev);
-		pm_request_autosuspend(q->dev);
-	} else {
-		q->rpm_status = RPM_SUSPENDED;
-	}
-	spin_unlock_irq(q->queue_lock);
-}
-EXPORT_SYMBOL(blk_post_runtime_resume);
-
-/**
- * blk_set_runtime_active - Force runtime status of the queue to be active
- * @q: the queue of the device
- *
- * If the device is left runtime suspended during system suspend the resume
- * hook typically resumes the device and corrects runtime status
- * accordingly. However, that does not affect the queue runtime PM status
- * which is still "suspended". This prevents processing requests from the
- * queue.
- *
- * This function can be used in driver's resume hook to correct queue
- * runtime PM status and re-enable peeking requests from the queue. It
- * should be called before first request is added to the queue.
- */
-void blk_set_runtime_active(struct request_queue *q)
-{
-	spin_lock_irq(q->queue_lock);
-	q->rpm_status = RPM_ACTIVE;
-	pm_runtime_mark_last_busy(q->dev);
-	pm_request_autosuspend(q->dev);
-	spin_unlock_irq(q->queue_lock);
-}
-EXPORT_SYMBOL(blk_set_runtime_active);
-#endif
-
 int __init blk_dev_init(void)
 {
 	BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
diff --git a/block/blk-pm.c b/block/blk-pm.c
new file mode 100644
index 000000000000..9b636960d285
--- /dev/null
+++ b/block/blk-pm.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/blk-pm.h>
+#include <linux/blkdev.h>
+#include <linux/pm_runtime.h>
+
+/**
+ * blk_pm_runtime_init - Block layer runtime PM initialization routine
+ * @q: the queue of the device
+ * @dev: the device the queue belongs to
+ *
+ * Description:
+ *    Initialize runtime-PM-related fields for @q and start auto suspend for
+ *    @dev. Drivers that want to take advantage of request-based runtime PM
+ *    should call this function after @dev has been initialized, and its
+ *    request queue @q has been allocated, and runtime PM for it can not happen
+ *    yet(either due to disabled/forbidden or its usage_count > 0). In most
+ *    cases, driver should call this function before any I/O has taken place.
+ *
+ *    This function takes care of setting up using auto suspend for the device,
+ *    the autosuspend delay is set to -1 to make runtime suspend impossible
+ *    until an updated value is either set by user or by driver. Drivers do
+ *    not need to touch other autosuspend settings.
+ *
+ *    The block layer runtime PM is request based, so only works for drivers
+ *    that use request as their IO unit instead of those directly use bio's.
+ */
+void blk_pm_runtime_init(struct request_queue *q, struct device *dev)
+{
+	/* Don't enable runtime PM for blk-mq until it is ready */
+	if (q->mq_ops) {
+		pm_runtime_disable(dev);
+		return;
+	}
+
+	q->dev = dev;
+	q->rpm_status = RPM_ACTIVE;
+	pm_runtime_set_autosuspend_delay(q->dev, -1);
+	pm_runtime_use_autosuspend(q->dev);
+}
+EXPORT_SYMBOL(blk_pm_runtime_init);
+
+/**
+ * blk_pre_runtime_suspend - Pre runtime suspend check
+ * @q: the queue of the device
+ *
+ * Description:
+ *    This function will check if runtime suspend is allowed for the device
+ *    by examining if there are any requests pending in the queue. If there
+ *    are requests pending, the device can not be runtime suspended; otherwise,
+ *    the queue's status will be updated to SUSPENDING and the driver can
+ *    proceed to suspend the device.
+ *
+ *    For the not allowed case, we mark last busy for the device so that
+ *    runtime PM core will try to autosuspend it some time later.
+ *
+ *    This function should be called near the start of the device's
+ *    runtime_suspend callback.
+ *
+ * Return:
+ *    0		- OK to runtime suspend the device
+ *    -EBUSY	- Device should not be runtime suspended
+ */
+int blk_pre_runtime_suspend(struct request_queue *q)
+{
+	int ret = 0;
+
+	if (!q->dev)
+		return ret;
+
+	spin_lock_irq(q->queue_lock);
+	if (q->nr_pending) {
+		ret = -EBUSY;
+		pm_runtime_mark_last_busy(q->dev);
+	} else {
+		q->rpm_status = RPM_SUSPENDING;
+	}
+	spin_unlock_irq(q->queue_lock);
+	return ret;
+}
+EXPORT_SYMBOL(blk_pre_runtime_suspend);
+
+/**
+ * blk_post_runtime_suspend - Post runtime suspend processing
+ * @q: the queue of the device
+ * @err: return value of the device's runtime_suspend function
+ *
+ * Description:
+ *    Update the queue's runtime status according to the return value of the
+ *    device's runtime suspend function and mark last busy for the device so
+ *    that PM core will try to auto suspend the device at a later time.
+ *
+ *    This function should be called near the end of the device's
+ *    runtime_suspend callback.
+ */
+void blk_post_runtime_suspend(struct request_queue *q, int err)
+{
+	if (!q->dev)
+		return;
+
+	spin_lock_irq(q->queue_lock);
+	if (!err) {
+		q->rpm_status = RPM_SUSPENDED;
+	} else {
+		q->rpm_status = RPM_ACTIVE;
+		pm_runtime_mark_last_busy(q->dev);
+	}
+	spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_post_runtime_suspend);
+
+/**
+ * blk_pre_runtime_resume - Pre runtime resume processing
+ * @q: the queue of the device
+ *
+ * Description:
+ *    Update the queue's runtime status to RESUMING in preparation for the
+ *    runtime resume of the device.
+ *
+ *    This function should be called near the start of the device's
+ *    runtime_resume callback.
+ */
+void blk_pre_runtime_resume(struct request_queue *q)
+{
+	if (!q->dev)
+		return;
+
+	spin_lock_irq(q->queue_lock);
+	q->rpm_status = RPM_RESUMING;
+	spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_pre_runtime_resume);
+
+/**
+ * blk_post_runtime_resume - Post runtime resume processing
+ * @q: the queue of the device
+ * @err: return value of the device's runtime_resume function
+ *
+ * Description:
+ *    Update the queue's runtime status according to the return value of the
+ *    device's runtime_resume function. If it is successfully resumed, process
+ *    the requests that are queued into the device's queue when it is resuming
+ *    and then mark last busy and initiate autosuspend for it.
+ *
+ *    This function should be called near the end of the device's
+ *    runtime_resume callback.
+ */
+void blk_post_runtime_resume(struct request_queue *q, int err)
+{
+	if (!q->dev)
+		return;
+
+	spin_lock_irq(q->queue_lock);
+	if (!err) {
+		q->rpm_status = RPM_ACTIVE;
+		__blk_run_queue(q);
+		pm_runtime_mark_last_busy(q->dev);
+		pm_request_autosuspend(q->dev);
+	} else {
+		q->rpm_status = RPM_SUSPENDED;
+	}
+	spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_post_runtime_resume);
+
+/**
+ * blk_set_runtime_active - Force runtime status of the queue to be active
+ * @q: the queue of the device
+ *
+ * If the device is left runtime suspended during system suspend the resume
+ * hook typically resumes the device and corrects runtime status
+ * accordingly. However, that does not affect the queue runtime PM status
+ * which is still "suspended". This prevents processing requests from the
+ * queue.
+ *
+ * This function can be used in driver's resume hook to correct queue
+ * runtime PM status and re-enable peeking requests from the queue. It
+ * should be called before first request is added to the queue.
+ */
+void blk_set_runtime_active(struct request_queue *q)
+{
+	spin_lock_irq(q->queue_lock);
+	q->rpm_status = RPM_ACTIVE;
+	pm_runtime_mark_last_busy(q->dev);
+	pm_request_autosuspend(q->dev);
+	spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL(blk_set_runtime_active);
diff --git a/block/blk-pm.h b/block/blk-pm.h
new file mode 100644
index 000000000000..1ffc8ef203ec
--- /dev/null
+++ b/block/blk-pm.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BLOCK_BLK_PM_H_
+#define _BLOCK_BLK_PM_H_
+
+#include <linux/pm_runtime.h>
+
+#ifdef CONFIG_PM
+static inline void blk_pm_requeue_request(struct request *rq)
+{
+	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
+		rq->q->nr_pending--;
+}
+
+static inline void blk_pm_add_request(struct request_queue *q,
+				      struct request *rq)
+{
+	if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
+	    (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
+		pm_request_resume(q->dev);
+}
+
+static inline void blk_pm_put_request(struct request *rq)
+{
+	if (rq->q->dev && !(rq->rq_flags & RQF_PM) && !--rq->q->nr_pending)
+		pm_runtime_mark_last_busy(rq->q->dev);
+}
+#else
+static inline void blk_pm_requeue_request(struct request *rq)
+{
+}
+
+static inline void blk_pm_add_request(struct request_queue *q,
+				      struct request *rq)
+{
+}
+
+static inline void blk_pm_put_request(struct request *rq)
+{
+}
+#endif
+
+#endif /* _BLOCK_BLK_PM_H_ */
diff --git a/block/elevator.c b/block/elevator.c
index 6a06b5d040e5..e18ac68626e3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -41,6 +41,7 @@
 
 #include "blk.h"
 #include "blk-mq-sched.h"
+#include "blk-pm.h"
 #include "blk-wbt.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
@@ -557,27 +558,6 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
 		e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
 }
 
-#ifdef CONFIG_PM
-static void blk_pm_requeue_request(struct request *rq)
-{
-	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
-		rq->q->nr_pending--;
-}
-
-static void blk_pm_add_request(struct request_queue *q, struct request *rq)
-{
-	if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
-	    (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
-		pm_request_resume(q->dev);
-}
-#else
-static inline void blk_pm_requeue_request(struct request *rq) {}
-static inline void blk_pm_add_request(struct request_queue *q,
-				      struct request *rq)
-{
-}
-#endif
-
 void elv_requeue_request(struct request_queue *q, struct request *rq)
 {
 	/*
diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c
index b44c1bb687a2..a2b4179bfdf7 100644
--- a/drivers/scsi/scsi_pm.c
+++ b/drivers/scsi/scsi_pm.c
@@ -8,6 +8,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/export.h>
 #include <linux/async.h>
+#include <linux/blk-pm.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_device.h>
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b79b366a94f7..64514e8359e4 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -45,6 +45,7 @@
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
+#include <linux/blk-pm.h>
 #include <linux/delay.h>
 #include <linux/mutex.h>
 #include <linux/string_helpers.h>
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index d0389b20574d..4f07b3410595 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -43,6 +43,7 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
+#include <linux/blk-pm.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h
new file mode 100644
index 000000000000..b80c65aba249
--- /dev/null
+++ b/include/linux/blk-pm.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BLK_PM_H_
+#define _BLK_PM_H_
+
+struct device;
+struct request_queue;
+
+/*
+ * block layer runtime pm functions
+ */
+#ifdef CONFIG_PM
+extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
+extern int blk_pre_runtime_suspend(struct request_queue *q);
+extern void blk_post_runtime_suspend(struct request_queue *q, int err);
+extern void blk_pre_runtime_resume(struct request_queue *q);
+extern void blk_post_runtime_resume(struct request_queue *q, int err);
+extern void blk_set_runtime_active(struct request_queue *q);
+#else
+static inline void blk_pm_runtime_init(struct request_queue *q,
+				       struct device *dev) {}
+#endif
+
+#endif /* _BLK_PM_H_ */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1d5e14139795..cd863511dedb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1280,29 +1280,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
 extern void blk_put_queue(struct request_queue *);
 extern void blk_set_queue_dying(struct request_queue *);
 
-/*
- * block layer runtime pm functions
- */
-#ifdef CONFIG_PM
-extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
-extern int blk_pre_runtime_suspend(struct request_queue *q);
-extern void blk_post_runtime_suspend(struct request_queue *q, int err);
-extern void blk_pre_runtime_resume(struct request_queue *q);
-extern void blk_post_runtime_resume(struct request_queue *q, int err);
-extern void blk_set_runtime_active(struct request_queue *q);
-#else
-static inline void blk_pm_runtime_init(struct request_queue *q,
-	struct device *dev) {}
-static inline int blk_pre_runtime_suspend(struct request_queue *q)
-{
-	return -ENOSYS;
-}
-static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
-static inline void blk_pre_runtime_resume(struct request_queue *q) {}
-static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
-static inline void blk_set_runtime_active(struct request_queue *q) {}
-#endif
-
 /*
  * blk_plug permits building a queue of related requests by holding the I/O
  * fragments for a short period. This allows merging of sequential requests
-- 
cgit v1.2.3


From cd84a62e0078dce09f4ed349bec84f86c9d54b30 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 26 Sep 2018 14:01:04 -0700
Subject: block, scsi: Change the preempt-only flag into a counter

The RQF_PREEMPT flag is used for three purposes:
- In the SCSI core, for making sure that power management requests
  are executed even if a device is in the "quiesced" state.
- For domain validation by SCSI drivers that use the parallel port.
- In the IDE driver, for IDE preempt requests.
Rename "preempt-only" into "pm-only" because the primary purpose of
this mode is power management. Since the power management core may
but does not have to resume a runtime suspended device before
performing system-wide suspend and since a later patch will set
"pm-only" mode as long as a block device is runtime suspended, make
it possible to set "pm-only" mode from more than one context. Since
with this change scsi_device_quiesce() is no longer idempotent, make
that function return early if it is called for a quiesced queue.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c        | 35 ++++++++++++++++++-----------------
 block/blk-mq-debugfs.c  | 10 +++++++++-
 drivers/scsi/scsi_lib.c | 11 +++++++----
 include/linux/blkdev.h  | 14 +++++++++-----
 4 files changed, 43 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 6d4dd176bd9d..1a691f5269bb 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -422,24 +422,25 @@ void blk_sync_queue(struct request_queue *q)
 EXPORT_SYMBOL(blk_sync_queue);
 
 /**
- * blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
+ * blk_set_pm_only - increment pm_only counter
  * @q: request queue pointer
- *
- * Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
- * set and 1 if the flag was already set.
  */
-int blk_set_preempt_only(struct request_queue *q)
+void blk_set_pm_only(struct request_queue *q)
 {
-	return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
+	atomic_inc(&q->pm_only);
 }
-EXPORT_SYMBOL_GPL(blk_set_preempt_only);
+EXPORT_SYMBOL_GPL(blk_set_pm_only);
 
-void blk_clear_preempt_only(struct request_queue *q)
+void blk_clear_pm_only(struct request_queue *q)
 {
-	blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
-	wake_up_all(&q->mq_freeze_wq);
+	int pm_only;
+
+	pm_only = atomic_dec_return(&q->pm_only);
+	WARN_ON_ONCE(pm_only < 0);
+	if (pm_only == 0)
+		wake_up_all(&q->mq_freeze_wq);
 }
-EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
+EXPORT_SYMBOL_GPL(blk_clear_pm_only);
 
 /**
  * __blk_run_queue_uncond - run a queue whether or not it has been stopped
@@ -918,7 +919,7 @@ EXPORT_SYMBOL(blk_alloc_queue);
  */
 int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 {
-	const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
+	const bool pm = flags & BLK_MQ_REQ_PREEMPT;
 
 	while (true) {
 		bool success = false;
@@ -926,11 +927,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 		rcu_read_lock();
 		if (percpu_ref_tryget_live(&q->q_usage_counter)) {
 			/*
-			 * The code that sets the PREEMPT_ONLY flag is
-			 * responsible for ensuring that that flag is globally
-			 * visible before the queue is unfrozen.
+			 * The code that increments the pm_only counter is
+			 * responsible for ensuring that that counter is
+			 * globally visible before the queue is unfrozen.
 			 */
-			if (preempt || !blk_queue_preempt_only(q)) {
+			if (pm || !blk_queue_pm_only(q)) {
 				success = true;
 			} else {
 				percpu_ref_put(&q->q_usage_counter);
@@ -955,7 +956,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 
 		wait_event(q->mq_freeze_wq,
 			   (atomic_read(&q->mq_freeze_depth) == 0 &&
-			    (preempt || !blk_queue_preempt_only(q))) ||
+			    (pm || !blk_queue_pm_only(q))) ||
 			   blk_queue_dying(q));
 		if (blk_queue_dying(q))
 			return -ENODEV;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index cb1e6cf7ac48..a5ea86835fcb 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
 	return 0;
 }
 
+static int queue_pm_only_show(void *data, struct seq_file *m)
+{
+	struct request_queue *q = data;
+
+	seq_printf(m, "%d\n", atomic_read(&q->pm_only));
+	return 0;
+}
+
 #define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name
 static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(QUEUED),
@@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = {
 	QUEUE_FLAG_NAME(REGISTERED),
 	QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
 	QUEUE_FLAG_NAME(QUIESCED),
-	QUEUE_FLAG_NAME(PREEMPT_ONLY),
 };
 #undef QUEUE_FLAG_NAME
 
@@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
 static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
 	{ "poll_stat", 0400, queue_poll_stat_show },
 	{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
+	{ "pm_only", 0600, queue_pm_only_show, NULL },
 	{ "state", 0600, queue_state_show, queue_state_write },
 	{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
 	{ "zone_wlock", 0400, queue_zone_wlock_show, NULL },
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index eb97d2dd3651..62348412ed1b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -3046,11 +3046,14 @@ scsi_device_quiesce(struct scsi_device *sdev)
 	 */
 	WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
 
-	blk_set_preempt_only(q);
+	if (sdev->quiesced_by == current)
+		return 0;
+
+	blk_set_pm_only(q);
 
 	blk_mq_freeze_queue(q);
 	/*
-	 * Ensure that the effect of blk_set_preempt_only() will be visible
+	 * Ensure that the effect of blk_set_pm_only() will be visible
 	 * for percpu_ref_tryget() callers that occur after the queue
 	 * unfreeze even if the queue was already frozen before this function
 	 * was called. See also https://lwn.net/Articles/573497/.
@@ -3063,7 +3066,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
 	if (err == 0)
 		sdev->quiesced_by = current;
 	else
-		blk_clear_preempt_only(q);
+		blk_clear_pm_only(q);
 	mutex_unlock(&sdev->state_mutex);
 
 	return err;
@@ -3088,7 +3091,7 @@ void scsi_device_resume(struct scsi_device *sdev)
 	mutex_lock(&sdev->state_mutex);
 	WARN_ON_ONCE(!sdev->quiesced_by);
 	sdev->quiesced_by = NULL;
-	blk_clear_preempt_only(sdev->request_queue);
+	blk_clear_pm_only(sdev->request_queue);
 	if (sdev->sdev_state == SDEV_QUIESCE)
 		scsi_device_set_state(sdev, SDEV_RUNNING);
 	mutex_unlock(&sdev->state_mutex);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cd863511dedb..13bb54f26736 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -504,6 +504,12 @@ struct request_queue {
 	 * various queue flags, see QUEUE_* below
 	 */
 	unsigned long		queue_flags;
+	/*
+	 * Number of contexts that have called blk_set_pm_only(). If this
+	 * counter is above zero then only RQF_PM and RQF_PREEMPT requests are
+	 * processed.
+	 */
+	atomic_t		pm_only;
 
 	/*
 	 * ida allocated id for this queue.  Used to index queues from
@@ -698,7 +704,6 @@ struct request_queue {
 #define QUEUE_FLAG_REGISTERED  26	/* queue has been registered to a disk */
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
 #define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */
-#define QUEUE_FLAG_PREEMPT_ONLY	29	/* only process REQ_PREEMPT requests */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
@@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 			     REQ_FAILFAST_DRIVER))
 #define blk_queue_quiesced(q)	test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
-#define blk_queue_preempt_only(q)				\
-	test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
+#define blk_queue_pm_only(q)	atomic_read(&(q)->pm_only)
 #define blk_queue_fua(q)	test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
 
-extern int blk_set_preempt_only(struct request_queue *q);
-extern void blk_clear_preempt_only(struct request_queue *q);
+extern void blk_set_pm_only(struct request_queue *q);
+extern void blk_clear_pm_only(struct request_queue *q);
 
 static inline int queue_in_flight(struct request_queue *q)
 {
-- 
cgit v1.2.3


From 18c9a6bbe0645a05172a900740b9d2d379d54320 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Wed, 26 Sep 2018 14:01:07 -0700
Subject: percpu-refcount: Introduce percpu_ref_resurrect()

This function will be used in a later patch to switch the struct
request_queue q_usage_counter from killed back to live. In contrast
to percpu_ref_reinit(), this new function does not require that the
refcount is zero.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jianchao Wang <jianchao.w.wang@oracle.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/percpu-refcount.h |  1 +
 lib/percpu-refcount.c           | 28 ++++++++++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 009cdf3d65b6..b297cd1cd4f1 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -108,6 +108,7 @@ void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref);
 void percpu_ref_switch_to_percpu(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
+void percpu_ref_resurrect(struct percpu_ref *ref);
 void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 9f96fa7bc000..de10b8c0bff6 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -356,11 +356,35 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
+	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
+
+	percpu_ref_resurrect(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+
+/**
+ * percpu_ref_resurrect - modify a percpu refcount from dead to live
+ * @ref: perpcu_ref to resurrect
+ *
+ * Modify @ref so that it's in the same state as before percpu_ref_kill() was
+ * called. @ref must be dead but must not yet have exited.
+ *
+ * If @ref->release() frees @ref then the caller is responsible for
+ * guaranteeing that @ref->release() does not get called while this
+ * function is in progress.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_resurrect(struct percpu_ref *ref)
+{
+	unsigned long __percpu *percpu_count;
 	unsigned long flags;
 
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
-	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
+	WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD));
+	WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count));
 
 	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
 	percpu_ref_get(ref);
@@ -368,4 +392,4 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 
 	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
 }
-EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+EXPORT_SYMBOL_GPL(percpu_ref_resurrect);
-- 
cgit v1.2.3


From 6194114324139dc16f3251c67ed853bd6d4ae056 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 24 Sep 2018 21:58:59 +0200
Subject: net: core: add member wol_enabled to struct net_device

Add flag wol_enabled to struct net_device indicating whether
Wake-on-LAN is enabled. As first user phy_suspend() will use it to
decide whether PHY can be suspended or not.

Fixes: f1e911d5d0df ("r8169: add basic phylib support")
Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 3 +++
 net/core/ethtool.c        | 9 ++++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ca5ab98053c8..c7861e4b402c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1730,6 +1730,8 @@ enum netdev_priv_flags {
  *			switch driver and used to set the phys state of the
  *			switch port.
  *
+ *	@wol_enabled:	Wake-on-LAN is enabled
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2014,6 +2016,7 @@ struct net_device {
 	struct lock_class_key	*qdisc_tx_busylock;
 	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
+	unsigned		wol_enabled:1;
 };
 #define to_net_dev(d) container_of(d, struct net_device, dev)
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 234a0ec2e932..0762aaf8e964 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1483,6 +1483,7 @@ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
 static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_wolinfo wol;
+	int ret;
 
 	if (!dev->ethtool_ops->set_wol)
 		return -EOPNOTSUPP;
@@ -1490,7 +1491,13 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
 	if (copy_from_user(&wol, useraddr, sizeof(wol)))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_wol(dev, &wol);
+	ret = dev->ethtool_ops->set_wol(dev, &wol);
+	if (ret)
+		return ret;
+
+	dev->wol_enabled = !!wol.wolopts;
+
+	return 0;
 }
 
 static int ethtool_get_eee(struct net_device *dev, char __user *useraddr)
-- 
cgit v1.2.3


From 6b58859419554fb824e09cfdd73151a195473cbc Mon Sep 17 00:00:00 2001
From: Justin Ernst <justin.ernst@hpe.com>
Date: Tue, 25 Sep 2018 09:34:49 -0500
Subject: EDAC: Raise the maximum number of memory controllers

We observe an oops in the skx_edac module during boot:

  EDAC MC0: Giving out device to module skx_edac controller Skylake Socket#0 IMC#0
  EDAC MC1: Giving out device to module skx_edac controller Skylake Socket#0 IMC#1
  EDAC MC2: Giving out device to module skx_edac controller Skylake Socket#1 IMC#0
  ...
  EDAC MC13: Giving out device to module skx_edac controller Skylake Socket#0 IMC#1
  EDAC MC14: Giving out device to module skx_edac controller Skylake Socket#1 IMC#0
  EDAC MC15: Giving out device to module skx_edac controller Skylake Socket#1 IMC#1
  Too many memory controllers: 16
  EDAC MC: Removed device 0 for skx_edac Skylake Socket#0 IMC#0

We observe there are two memory controllers per socket, with a limit
of 16. Raise the maximum number of memory controllers from 16 to 2 *
MAX_NUMNODES (1024).

[ bp: This is just a band-aid fix until we've sorted out the whole issue
  with the bus_type association and handling in EDAC and can get rid of
  this arbitrary limit. ]

Signed-off-by: Justin Ernst <justin.ernst@hpe.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Russ Anderson <russ.anderson@hpe.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac@vger.kernel.org
Link: https://lkml.kernel.org/r/20180925143449.284634-1-justin.ernst@hpe.com
---
 include/linux/edac.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/edac.h b/include/linux/edac.h
index a45ce1f84bfc..1d0c9ea8825d 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -17,6 +17,7 @@
 #include <linux/completion.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
+#include <linux/numa.h>
 
 #define EDAC_DEVICE_NAME_LEN	31
 
@@ -672,6 +673,6 @@ struct mem_ctl_info {
 /*
  * Maximum number of memory controllers in the coherent fabric.
  */
-#define EDAC_MAX_MCS	16
+#define EDAC_MAX_MCS	2 * MAX_NUMNODES
 
 #endif
-- 
cgit v1.2.3


From 96a71f21ef1fcc32bea07c612a332a89a213f054 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 21 Sep 2018 21:20:30 +0300
Subject: fanotify: store fanotify_init() flags in group's fanotify_data

This averts the need to re-generate flags in fanotify_show_fdinfo()
and sets the scene for addition of more upcoming flags without growing
new members to the fanotify_data struct.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c |  8 ++++----
 fs/notify/fdinfo.c                 | 24 +-----------------------
 include/linux/fanotify.h           |  4 ++++
 include/linux/fsnotify_backend.h   |  4 ++--
 4 files changed, 11 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 1347c588f778..15719d4aa4b5 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -191,7 +191,7 @@ static int process_access_response(struct fsnotify_group *group,
 	if (fd < 0)
 		return -EINVAL;
 
-	if ((response & FAN_AUDIT) && !group->fanotify_data.audit)
+	if ((response & FAN_AUDIT) && !FAN_GROUP_FLAG(group, FAN_ENABLE_AUDIT))
 		return -EINVAL;
 
 	event = dequeue_event(group, fd);
@@ -701,8 +701,8 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	struct user_struct *user;
 	struct fanotify_event_info *oevent;
 
-	pr_debug("%s: flags=%d event_f_flags=%d\n",
-		__func__, flags, event_f_flags);
+	pr_debug("%s: flags=%x event_f_flags=%x\n",
+		 __func__, flags, event_f_flags);
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -746,6 +746,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	}
 
 	group->fanotify_data.user = user;
+	group->fanotify_data.flags = flags;
 	atomic_inc(&user->fanotify_listeners);
 	group->memcg = get_mem_cgroup_from_mm(current->mm);
 
@@ -798,7 +799,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		fd = -EPERM;
 		if (!capable(CAP_AUDIT_WRITE))
 			goto out_destroy_group;
-		group->fanotify_data.audit = true;
 	}
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 25385e336ac7..348a184bcdda 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -142,31 +142,9 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct fsnotify_group *group = f->private_data;
-	unsigned int flags = 0;
-
-	switch (group->priority) {
-	case FS_PRIO_0:
-		flags |= FAN_CLASS_NOTIF;
-		break;
-	case FS_PRIO_1:
-		flags |= FAN_CLASS_CONTENT;
-		break;
-	case FS_PRIO_2:
-		flags |= FAN_CLASS_PRE_CONTENT;
-		break;
-	}
-
-	if (group->max_events == UINT_MAX)
-		flags |= FAN_UNLIMITED_QUEUE;
-
-	if (group->fanotify_data.max_marks == UINT_MAX)
-		flags |= FAN_UNLIMITED_MARKS;
-
-	if (group->fanotify_data.audit)
-		flags |= FAN_ENABLE_AUDIT;
 
 	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
-		   flags, group->fanotify_data.f_flags);
+		   group->fanotify_data.flags, group->fanotify_data.f_flags);
 
 	show_fdinfo(m, f, fanotify_fdinfo);
 }
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 096c96f4f16a..9c5ea3bdfaa0 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -6,4 +6,8 @@
 
 /* not valid from userspace, only kernel internal */
 #define FAN_MARK_ONDIR		0x00000100
+
+#define FAN_GROUP_FLAG(group, flag) \
+	((group)->fanotify_data.flags & (flag))
+
 #endif /* _LINUX_FANOTIFY_H */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 81b88fc9df31..8e91341cbd8a 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -189,10 +189,10 @@ struct fsnotify_group {
 			/* allows a group to block waiting for a userspace response */
 			struct list_head access_list;
 			wait_queue_head_t access_waitq;
-			int f_flags;
+			int flags;           /* flags from fanotify_init() */
+			int f_flags; /* event_f_flags from fanotify_init() */
 			unsigned int max_marks;
 			struct user_struct *user;
-			bool audit;
 		} fanotify_data;
 #endif /* CONFIG_FANOTIFY */
 	};
-- 
cgit v1.2.3


From 9ae033aca8d600e36034d4d0743aad624cec92ed Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 18 Sep 2018 23:51:36 -0700
Subject: jump_label: Abstract jump_entry member accessors

In preparation of allowing architectures to use relative references
in jump_label entries [which can dramatically reduce the memory
footprint], introduce abstractions for references to the 'code' and
'key' members of struct jump_entry.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jessica Yu <jeyu@kernel.org>
Link: https://lkml.kernel.org/r/20180919065144.25010-2-ard.biesheuvel@linaro.org
---
 include/linux/jump_label.h | 34 ++++++++++++++++++++++++++++++++++
 kernel/jump_label.c        | 40 +++++++++++++++-------------------------
 2 files changed, 49 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 1a0b6f17a5d6..2eadff9b3b90 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -119,6 +119,40 @@ struct static_key {
 
 #ifdef HAVE_JUMP_LABEL
 #include <asm/jump_label.h>
+
+#ifndef __ASSEMBLY__
+
+static inline unsigned long jump_entry_code(const struct jump_entry *entry)
+{
+	return entry->code;
+}
+
+static inline unsigned long jump_entry_target(const struct jump_entry *entry)
+{
+	return entry->target;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+	return (struct static_key *)((unsigned long)entry->key & ~1UL);
+}
+
+static inline bool jump_entry_is_branch(const struct jump_entry *entry)
+{
+	return (unsigned long)entry->key & 1UL;
+}
+
+static inline bool jump_entry_is_init(const struct jump_entry *entry)
+{
+	return entry->code == 0;
+}
+
+static inline void jump_entry_set_init(struct jump_entry *entry)
+{
+	entry->code = 0;
+}
+
+#endif
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 2e62503bea0d..834e43de0daf 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -38,10 +38,10 @@ static int jump_label_cmp(const void *a, const void *b)
 	const struct jump_entry *jea = a;
 	const struct jump_entry *jeb = b;
 
-	if (jea->key < jeb->key)
+	if (jump_entry_key(jea) < jump_entry_key(jeb))
 		return -1;
 
-	if (jea->key > jeb->key)
+	if (jump_entry_key(jea) > jump_entry_key(jeb))
 		return 1;
 
 	return 0;
@@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit);
 
 static int addr_conflict(struct jump_entry *entry, void *start, void *end)
 {
-	if (entry->code <= (unsigned long)end &&
-		entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
+	if (jump_entry_code(entry) <= (unsigned long)end &&
+	    jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start)
 		return 1;
 
 	return 0;
@@ -321,16 +321,6 @@ static inline void static_key_set_linked(struct static_key *key)
 	key->type |= JUMP_TYPE_LINKED;
 }
 
-static inline struct static_key *jump_entry_key(struct jump_entry *entry)
-{
-	return (struct static_key *)((unsigned long)entry->key & ~1UL);
-}
-
-static bool jump_entry_branch(struct jump_entry *entry)
-{
-	return (unsigned long)entry->key & 1UL;
-}
-
 /***
  * A 'struct static_key' uses a union such that it either points directly
  * to a table of 'struct jump_entry' or to a linked list of modules which in
@@ -355,7 +345,7 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry)
 {
 	struct static_key *key = jump_entry_key(entry);
 	bool enabled = static_key_enabled(key);
-	bool branch = jump_entry_branch(entry);
+	bool branch = jump_entry_is_branch(entry);
 
 	/* See the comment in linux/jump_label.h */
 	return enabled ^ branch;
@@ -370,12 +360,12 @@ static void __jump_label_update(struct static_key *key,
 		 * An entry->code of 0 indicates an entry which has been
 		 * disabled because it was in an init text area.
 		 */
-		if (entry->code) {
-			if (kernel_text_address(entry->code))
+		if (!jump_entry_is_init(entry)) {
+			if (kernel_text_address(jump_entry_code(entry)))
 				arch_jump_label_transform(entry, jump_label_type(entry));
 			else
 				WARN_ONCE(1, "can't patch jump_label at %pS",
-					  (void *)(unsigned long)entry->code);
+					  (void *)jump_entry_code(entry));
 		}
 	}
 }
@@ -430,8 +420,8 @@ void __init jump_label_invalidate_initmem(void)
 	struct jump_entry *iter;
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
-		if (init_section_contains((void *)(unsigned long)iter->code, 1))
-			iter->code = 0;
+		if (init_section_contains((void *)jump_entry_code(iter), 1))
+			jump_entry_set_init(iter);
 	}
 }
 
@@ -441,7 +431,7 @@ static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
 {
 	struct static_key *key = jump_entry_key(entry);
 	bool type = static_key_type(key);
-	bool branch = jump_entry_branch(entry);
+	bool branch = jump_entry_is_branch(entry);
 
 	/* See the comment in linux/jump_label.h */
 	return type ^ branch;
@@ -565,7 +555,7 @@ static int jump_label_add_module(struct module *mod)
 			continue;
 
 		key = iterk;
-		if (within_module(iter->key, mod)) {
+		if (within_module((unsigned long)key, mod)) {
 			static_key_set_entries(key, iter);
 			continue;
 		}
@@ -615,7 +605,7 @@ static void jump_label_del_module(struct module *mod)
 
 		key = jump_entry_key(iter);
 
-		if (within_module(iter->key, mod))
+		if (within_module((unsigned long)key, mod))
 			continue;
 
 		/* No memory during module load */
@@ -659,8 +649,8 @@ static void jump_label_invalidate_module_init(struct module *mod)
 	struct jump_entry *iter;
 
 	for (iter = iter_start; iter < iter_stop; iter++) {
-		if (within_module_init(iter->code, mod))
-			iter->code = 0;
+		if (within_module_init(jump_entry_code(iter), mod))
+			jump_entry_set_init(iter);
 	}
 }
 
-- 
cgit v1.2.3


From 50ff18ab497aa22f6a59444625df7508c8918237 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 18 Sep 2018 23:51:37 -0700
Subject: jump_label: Implement generic support for relative references

To reduce the size taken up by absolute references in jump label
entries themselves and the associated relocation records in the
.init segment, add support for emitting them as relative references
instead.

Note that this requires some extra care in the sorting routine, given
that the offsets change when entries are moved around in the jump_entry
table.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jessica Yu <jeyu@kernel.org>
Link: https://lkml.kernel.org/r/20180919065144.25010-3-ard.biesheuvel@linaro.org
---
 arch/Kconfig               |  3 +++
 include/linux/jump_label.h | 28 ++++++++++++++++++++++++++++
 kernel/jump_label.c        | 22 +++++++++++++++++++++-
 3 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index 6801123932a5..9d329608913e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -359,6 +359,9 @@ config HAVE_PERF_USER_STACK_DUMP
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
+config HAVE_ARCH_JUMP_LABEL_RELATIVE
+	bool
+
 config HAVE_RCU_TABLE_FREE
 	bool
 
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 2eadff9b3b90..2768a925bafa 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -121,6 +121,32 @@ struct static_key {
 #include <asm/jump_label.h>
 
 #ifndef __ASSEMBLY__
+#ifdef CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE
+
+struct jump_entry {
+	s32 code;
+	s32 target;
+	long key;	// key may be far away from the core kernel under KASLR
+};
+
+static inline unsigned long jump_entry_code(const struct jump_entry *entry)
+{
+	return (unsigned long)&entry->code + entry->code;
+}
+
+static inline unsigned long jump_entry_target(const struct jump_entry *entry)
+{
+	return (unsigned long)&entry->target + entry->target;
+}
+
+static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
+{
+	long offset = entry->key & ~1L;
+
+	return (struct static_key *)((unsigned long)&entry->key + offset);
+}
+
+#else
 
 static inline unsigned long jump_entry_code(const struct jump_entry *entry)
 {
@@ -137,6 +163,8 @@ static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
 	return (struct static_key *)((unsigned long)entry->key & ~1UL);
 }
 
+#endif
+
 static inline bool jump_entry_is_branch(const struct jump_entry *entry)
 {
 	return (unsigned long)entry->key & 1UL;
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 834e43de0daf..898a1d0c38dc 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -47,14 +47,34 @@ static int jump_label_cmp(const void *a, const void *b)
 	return 0;
 }
 
+static void jump_label_swap(void *a, void *b, int size)
+{
+	long delta = (unsigned long)a - (unsigned long)b;
+	struct jump_entry *jea = a;
+	struct jump_entry *jeb = b;
+	struct jump_entry tmp = *jea;
+
+	jea->code	= jeb->code - delta;
+	jea->target	= jeb->target - delta;
+	jea->key	= jeb->key - delta;
+
+	jeb->code	= tmp.code + delta;
+	jeb->target	= tmp.target + delta;
+	jeb->key	= tmp.key + delta;
+}
+
 static void
 jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop)
 {
 	unsigned long size;
+	void *swapfn = NULL;
+
+	if (IS_ENABLED(CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE))
+		swapfn = jump_label_swap;
 
 	size = (((unsigned long)stop - (unsigned long)start)
 					/ sizeof(struct jump_entry));
-	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL);
+	sort(start, size, sizeof(struct jump_entry), jump_label_cmp, swapfn);
 }
 
 static void jump_label_update(struct static_key *key);
-- 
cgit v1.2.3


From 19483677684b6ca01606f58503cb79cdfbbc7c72 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 18 Sep 2018 23:51:42 -0700
Subject: jump_label: Annotate entries that operate on __init code earlier

Jump table entries are mostly read-only, with the exception of the
init and module loader code that defuses entries that point into init
code when the code being referred to is freed.

For robustness, it would be better to move these entries into the
ro_after_init section, but clearing the 'code' member of each jump
table entry referring to init code at module load time races with the
module_enable_ro() call that remaps the ro_after_init section read
only, so we'd like to do it earlier.

So given that whether such an entry refers to init code can be decided
much earlier, we can pull this check forward. Since we may still need
the code entry at this point, let's switch to setting a low bit in the
'key' member just like we do to annotate the default state of a jump
table entry.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Jessica Yu <jeyu@kernel.org>
Link: https://lkml.kernel.org/r/20180919065144.25010-8-ard.biesheuvel@linaro.org
---
 include/linux/jump_label.h | 11 ++++-------
 init/main.c                |  1 -
 kernel/jump_label.c        | 48 ++++++++++++++--------------------------------
 3 files changed, 18 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 2768a925bafa..5df6a621e464 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -141,7 +141,7 @@ static inline unsigned long jump_entry_target(const struct jump_entry *entry)
 
 static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
 {
-	long offset = entry->key & ~1L;
+	long offset = entry->key & ~3L;
 
 	return (struct static_key *)((unsigned long)&entry->key + offset);
 }
@@ -160,7 +160,7 @@ static inline unsigned long jump_entry_target(const struct jump_entry *entry)
 
 static inline struct static_key *jump_entry_key(const struct jump_entry *entry)
 {
-	return (struct static_key *)((unsigned long)entry->key & ~1UL);
+	return (struct static_key *)((unsigned long)entry->key & ~3UL);
 }
 
 #endif
@@ -172,12 +172,12 @@ static inline bool jump_entry_is_branch(const struct jump_entry *entry)
 
 static inline bool jump_entry_is_init(const struct jump_entry *entry)
 {
-	return entry->code == 0;
+	return (unsigned long)entry->key & 2UL;
 }
 
 static inline void jump_entry_set_init(struct jump_entry *entry)
 {
-	entry->code = 0;
+	entry->key |= 2;
 }
 
 #endif
@@ -213,7 +213,6 @@ extern struct jump_entry __start___jump_table[];
 extern struct jump_entry __stop___jump_table[];
 
 extern void jump_label_init(void);
-extern void jump_label_invalidate_initmem(void);
 extern void jump_label_lock(void);
 extern void jump_label_unlock(void);
 extern void arch_jump_label_transform(struct jump_entry *entry,
@@ -261,8 +260,6 @@ static __always_inline void jump_label_init(void)
 	static_key_initialized = true;
 }
 
-static inline void jump_label_invalidate_initmem(void) {}
-
 static __always_inline bool static_key_false(struct static_key *key)
 {
 	if (unlikely(static_key_count(key) > 0))
diff --git a/init/main.c b/init/main.c
index 18f8f0140fa0..a664246450d1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1064,7 +1064,6 @@ static int __ref kernel_init(void *unused)
 	/* need to finish all async __init code before freeing the memory */
 	async_synchronize_full();
 	ftrace_free_init_mem();
-	jump_label_invalidate_initmem();
 	free_initmem();
 	mark_readonly();
 
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index 898a1d0c38dc..e8cf3ff3149c 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -373,14 +373,15 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry)
 
 static void __jump_label_update(struct static_key *key,
 				struct jump_entry *entry,
-				struct jump_entry *stop)
+				struct jump_entry *stop,
+				bool init)
 {
 	for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) {
 		/*
 		 * An entry->code of 0 indicates an entry which has been
 		 * disabled because it was in an init text area.
 		 */
-		if (!jump_entry_is_init(entry)) {
+		if (init || !jump_entry_is_init(entry)) {
 			if (kernel_text_address(jump_entry_code(entry)))
 				arch_jump_label_transform(entry, jump_label_type(entry));
 			else
@@ -420,6 +421,9 @@ void __init jump_label_init(void)
 		if (jump_label_type(iter) == JUMP_LABEL_NOP)
 			arch_jump_label_transform_static(iter, JUMP_LABEL_NOP);
 
+		if (init_section_contains((void *)jump_entry_code(iter), 1))
+			jump_entry_set_init(iter);
+
 		iterk = jump_entry_key(iter);
 		if (iterk == key)
 			continue;
@@ -432,19 +436,6 @@ void __init jump_label_init(void)
 	cpus_read_unlock();
 }
 
-/* Disable any jump label entries in __init/__exit code */
-void __init jump_label_invalidate_initmem(void)
-{
-	struct jump_entry *iter_start = __start___jump_table;
-	struct jump_entry *iter_stop = __stop___jump_table;
-	struct jump_entry *iter;
-
-	for (iter = iter_start; iter < iter_stop; iter++) {
-		if (init_section_contains((void *)jump_entry_code(iter), 1))
-			jump_entry_set_init(iter);
-	}
-}
-
 #ifdef CONFIG_MODULES
 
 static enum jump_label_type jump_label_init_type(struct jump_entry *entry)
@@ -524,7 +515,8 @@ static void __jump_label_mod_update(struct static_key *key)
 			stop = __stop___jump_table;
 		else
 			stop = m->jump_entries + m->num_jump_entries;
-		__jump_label_update(key, mod->entries, stop);
+		__jump_label_update(key, mod->entries, stop,
+				    m->state == MODULE_STATE_COMING);
 	}
 }
 
@@ -570,6 +562,9 @@ static int jump_label_add_module(struct module *mod)
 	for (iter = iter_start; iter < iter_stop; iter++) {
 		struct static_key *iterk;
 
+		if (within_module_init(jump_entry_code(iter), mod))
+			jump_entry_set_init(iter);
+
 		iterk = jump_entry_key(iter);
 		if (iterk == key)
 			continue;
@@ -605,7 +600,7 @@ static int jump_label_add_module(struct module *mod)
 
 		/* Only update if we've changed from our initial state */
 		if (jump_label_type(iter) != jump_label_init_type(iter))
-			__jump_label_update(key, iter, iter_stop);
+			__jump_label_update(key, iter, iter_stop, true);
 	}
 
 	return 0;
@@ -661,19 +656,6 @@ static void jump_label_del_module(struct module *mod)
 	}
 }
 
-/* Disable any jump label entries in module init code */
-static void jump_label_invalidate_module_init(struct module *mod)
-{
-	struct jump_entry *iter_start = mod->jump_entries;
-	struct jump_entry *iter_stop = iter_start + mod->num_jump_entries;
-	struct jump_entry *iter;
-
-	for (iter = iter_start; iter < iter_stop; iter++) {
-		if (within_module_init(jump_entry_code(iter), mod))
-			jump_entry_set_init(iter);
-	}
-}
-
 static int
 jump_label_module_notify(struct notifier_block *self, unsigned long val,
 			 void *data)
@@ -695,9 +677,6 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
 	case MODULE_STATE_GOING:
 		jump_label_del_module(mod);
 		break;
-	case MODULE_STATE_LIVE:
-		jump_label_invalidate_module_init(mod);
-		break;
 	}
 
 	jump_label_unlock();
@@ -767,7 +746,8 @@ static void jump_label_update(struct static_key *key)
 	entry = static_key_entries(key);
 	/* if there are no users, entry can be NULL */
 	if (entry)
-		__jump_label_update(key, entry, stop);
+		__jump_label_update(key, entry, stop,
+				    system_state < SYSTEM_RUNNING);
 }
 
 #ifdef CONFIG_STATIC_KEYS_SELFTEST
-- 
cgit v1.2.3


From c6babb5806b77c6ca7078c3487bb0a29704a4e38 Mon Sep 17 00:00:00 2001
From: Pu Wen <puwen@hygon.cn>
Date: Tue, 25 Sep 2018 22:46:11 +0800
Subject: x86/pci, x86/amd_nb: Add Hygon Dhyana support to PCI and northbridge

Hygon's PCI vendor ID is 0x1d94, and there are PCI devices
0x1450/0x1463/0x1464 for the host bridge on the Hygon Dhyana platform.
Add Hygon Dhyana support to the PCI and northbridge subsystems by using
the code path of AMD family 17h.

 [ bp: Massage commit message, sort local vars into reverse xmas tree
   order and move the amd_northbridges.num check up. ]

Signed-off-by: Pu Wen <puwen@hygon.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>	# pci_ids.h
Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Cc: hpa@zytor.com
Cc: x86@kernel.org
Cc: thomas.lendacky@amd.com
Cc: helgaas@kernel.org
Cc: linux-pci@vger.kernel.org
Link: https://lkml.kernel.org/r/5f8877bd413f2ea0833378dd5454df0720e1c0df.1537885177.git.puwen@hygon.cn
---
 arch/x86/kernel/amd_nb.c | 45 +++++++++++++++++++++++++++++++++++++--------
 arch/x86/pci/amd_bus.c   |  6 ++++--
 include/linux/pci_ids.h  |  2 ++
 3 files changed, 43 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index b51c6b183a35..a6eca647bc76 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -61,6 +61,21 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{}
 };
 
+static const struct pci_device_id hygon_root_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_ROOT) },
+	{}
+};
+
+const struct pci_device_id hygon_nb_misc_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+	{}
+};
+
+static const struct pci_device_id hygon_nb_link_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HYGON, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+	{}
+};
+
 const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
 	{ 0x00, 0x18, 0x20 },
 	{ 0xff, 0x00, 0x20 },
@@ -194,15 +209,24 @@ EXPORT_SYMBOL_GPL(amd_df_indirect_read);
 
 int amd_cache_northbridges(void)
 {
-	u16 i = 0;
-	struct amd_northbridge *nb;
+	const struct pci_device_id *misc_ids = amd_nb_misc_ids;
+	const struct pci_device_id *link_ids = amd_nb_link_ids;
+	const struct pci_device_id *root_ids = amd_root_ids;
 	struct pci_dev *root, *misc, *link;
+	struct amd_northbridge *nb;
+	u16 i = 0;
 
 	if (amd_northbridges.num)
 		return 0;
 
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+		root_ids = hygon_root_ids;
+		misc_ids = hygon_nb_misc_ids;
+		link_ids = hygon_nb_link_ids;
+	}
+
 	misc = NULL;
-	while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
+	while ((misc = next_northbridge(misc, misc_ids)) != NULL)
 		i++;
 
 	if (!i)
@@ -218,11 +242,11 @@ int amd_cache_northbridges(void)
 	link = misc = root = NULL;
 	for (i = 0; i != amd_northbridges.num; i++) {
 		node_to_amd_nb(i)->root = root =
-			next_northbridge(root, amd_root_ids);
+			next_northbridge(root, root_ids);
 		node_to_amd_nb(i)->misc = misc =
-			next_northbridge(misc, amd_nb_misc_ids);
+			next_northbridge(misc, misc_ids);
 		node_to_amd_nb(i)->link = link =
-			next_northbridge(link, amd_nb_link_ids);
+			next_northbridge(link, link_ids);
 	}
 
 	if (amd_gart_present())
@@ -261,6 +285,7 @@ EXPORT_SYMBOL_GPL(amd_cache_northbridges);
  */
 bool __init early_is_amd_nb(u32 device)
 {
+	const struct pci_device_id *misc_ids = amd_nb_misc_ids;
 	const struct pci_device_id *id;
 	u32 vendor = device & 0xffff;
 
@@ -268,8 +293,11 @@ bool __init early_is_amd_nb(u32 device)
 	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
 		return false;
 
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
+		misc_ids = hygon_nb_misc_ids;
+
 	device >>= 16;
-	for (id = amd_nb_misc_ids; id->vendor; id++)
+	for (id = misc_ids; id->vendor; id++)
 		if (vendor == id->vendor && device == id->device)
 			return true;
 	return false;
@@ -281,7 +309,8 @@ struct resource *amd_get_mmconfig_range(struct resource *res)
 	u64 base, msr;
 	unsigned int segn_busn_bits;
 
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
 		return NULL;
 
 	/* assume all cpus from fam10h have mmconfig */
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 649bdde63e32..bfa50e65ef6c 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -93,7 +93,8 @@ static int __init early_root_info_init(void)
 		vendor = id & 0xffff;
 		device = (id>>16) & 0xffff;
 
-		if (vendor != PCI_VENDOR_ID_AMD)
+		if (vendor != PCI_VENDOR_ID_AMD &&
+		    vendor != PCI_VENDOR_ID_HYGON)
 			continue;
 
 		if (hb_probes[i].device == device) {
@@ -390,7 +391,8 @@ static int __init pci_io_ecs_init(void)
 
 static int __init amd_postcore_init(void)
 {
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
 		return 0;
 
 	early_root_info_init();
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index d157983b84cf..8a0841c73f81 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2561,6 +2561,8 @@
 
 #define PCI_VENDOR_ID_AMAZON		0x1d0f
 
+#define PCI_VENDOR_ID_HYGON		0x1d94
+
 #define PCI_VENDOR_ID_TEKRAM		0x1de1
 #define PCI_DEVICE_ID_TEKRAM_DC290	0xdc29
 
-- 
cgit v1.2.3


From b445bfcb9081ae90fec90b828f3aacc565776901 Mon Sep 17 00:00:00 2001
From: Marco Felsch <m.felsch@pengutronix.de>
Date: Tue, 25 Sep 2018 11:42:28 +0200
Subject: spi: switch to SPDX license identifier

Use the appropriate SPDX license identifier and drop the previous
license text.

Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 11 +----------
 include/linux/spi/spi.h | 13 ++-----------
 2 files changed, 3 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index a358acdd98d3..2cfc3df821f6 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * SPI init/core code
  *
  * Copyright (C) 2005 David Brownell
  * Copyright (C) 2008 Secret Lab Technologies Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/kernel.h>
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 7bb36145e2ba..f08824ea1968 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1,15 +1,6 @@
-/*
- * Copyright (C) 2005 David Brownell
+/* SPDX-License-Identifier: GPL-2.0-or-later
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * Copyright (C) 2005 David Brownell
  */
 
 #ifndef __LINUX_SPI_H
-- 
cgit v1.2.3


From 5f143af7501e7c435c56e181a655493edaa92509 Mon Sep 17 00:00:00 2001
From: Marco Felsch <m.felsch@pengutronix.de>
Date: Tue, 25 Sep 2018 11:42:29 +0200
Subject: spi: make OF helper available for others

The of_find_spi_device_by_node() helper function is useful for other
modules too. Export the funciton as GPL like all other spi helper
functions and make it available if CONFIG_OF is enabled, because it isn't
related to the CONFIG_OF_DYNAMIC context. Finally add a stub if
CONFIG_OF isn't enabled, so others must not care about it.

Signed-off-by: Marco Felsch <m.felsch@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       |  7 +++++--
 include/linux/spi/spi.h | 17 ++++++++++++++++-
 2 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index ec395a6baf9c..f939f585e917 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -3314,20 +3314,23 @@ EXPORT_SYMBOL_GPL(spi_write_then_read);
 
 /*-------------------------------------------------------------------------*/
 
-#if IS_ENABLED(CONFIG_OF_DYNAMIC)
+#if IS_ENABLED(CONFIG_OF)
 static int __spi_of_device_match(struct device *dev, void *data)
 {
 	return dev->of_node == data;
 }
 
 /* must call put_device() when done with returned spi_device device */
-static struct spi_device *of_find_spi_device_by_node(struct device_node *node)
+struct spi_device *of_find_spi_device_by_node(struct device_node *node)
 {
 	struct device *dev = bus_find_device(&spi_bus_type, NULL, node,
 						__spi_of_device_match);
 	return dev ? to_spi_device(dev) : NULL;
 }
+EXPORT_SYMBOL_GPL(of_find_spi_device_by_node);
+#endif /* IS_ENABLED(CONFIG_OF) */
 
+#if IS_ENABLED(CONFIG_OF_DYNAMIC)
 static int __spi_of_controller_match(struct device *dev, const void *data)
 {
 	return dev->of_node == data;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index a64235e05321..e9ebfcd2e932 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1277,7 +1277,6 @@ spi_register_board_info(struct spi_board_info const *info, unsigned n)
 	{ return 0; }
 #endif
 
-
 /* If you're hotplugging an adapter with devices (parport, usb, etc)
  * use spi_new_device() to describe each device.  You can also call
  * spi_unregister_device() to start making that device vanish, but
@@ -1309,6 +1308,22 @@ spi_transfer_is_last(struct spi_controller *ctlr, struct spi_transfer *xfer)
 	return list_is_last(&xfer->transfer_list, &ctlr->cur_msg->transfers);
 }
 
+/* OF support code */
+#if IS_ENABLED(CONFIG_OF)
+
+/* must call put_device() when done with returned spi_device device */
+extern struct spi_device *
+of_find_spi_device_by_node(struct device_node *node);
+
+#else
+
+static inline struct spi_device *
+of_find_spi_device_by_node(struct device_node *node)
+{
+	return NULL;
+}
+
+#endif /* IS_ENABLED(CONFIG_OF) */
 
 /* Compatibility layer */
 #define spi_master			spi_controller
-- 
cgit v1.2.3


From ed88660a5372faa67c168c3db5201e33e488c9fd Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 27 Sep 2018 15:55:51 -0700
Subject: block: move call of scheduler's ->completed_request() hook

Commit 4bc6339a583c ("block: move blk_stat_add() to
__blk_mq_end_request()") consolidated some calls using ktime_get() so
we'd only need to call it once. Kyber's ->completed_request() hook also
calls ktime_get(), so let's move it to the same place, too.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq-sched.h     | 4 ++--
 block/blk-mq.c           | 5 +++--
 block/kyber-iosched.c    | 5 ++---
 include/linux/elevator.h | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 4e028ee42430..8a9544203173 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -49,12 +49,12 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 	return true;
 }
 
-static inline void blk_mq_sched_completed_request(struct request *rq)
+static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
 {
 	struct elevator_queue *e = rq->q->elevator;
 
 	if (e && e->type->ops.mq.completed_request)
-		e->type->ops.mq.completed_request(rq);
+		e->type->ops.mq.completed_request(rq, now);
 }
 
 static inline void blk_mq_sched_started_request(struct request *rq)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d384ab700afd..1e72d53e8f2d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -528,6 +528,9 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 		blk_stat_add(rq, now);
 	}
 
+	if (rq->internal_tag != -1)
+		blk_mq_sched_completed_request(rq, now);
+
 	blk_account_io_done(rq, now);
 
 	if (rq->end_io) {
@@ -564,8 +567,6 @@ static void __blk_mq_complete_request(struct request *rq)
 
 	if (!blk_mq_mark_complete(rq))
 		return;
-	if (rq->internal_tag != -1)
-		blk_mq_sched_completed_request(rq);
 
 	if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
 		rq->q->softirq_done_fn(rq);
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index a1660bafc912..95d062c07c61 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -558,12 +558,12 @@ static void kyber_finish_request(struct request *rq)
 	rq_clear_domain_token(kqd, rq);
 }
 
-static void kyber_completed_request(struct request *rq)
+static void kyber_completed_request(struct request *rq, u64 now)
 {
 	struct request_queue *q = rq->q;
 	struct kyber_queue_data *kqd = q->elevator->elevator_data;
 	unsigned int sched_domain;
-	u64 now, latency, target;
+	u64 latency, target;
 
 	/*
 	 * Check if this request met our latency goal. If not, quickly gather
@@ -585,7 +585,6 @@ static void kyber_completed_request(struct request *rq)
 	if (blk_stat_is_active(kqd->cb))
 		return;
 
-	now = ktime_get_ns();
 	if (now < rq->io_start_time_ns)
 		return;
 
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index a02deea30185..015bb59c0331 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -111,7 +111,7 @@ struct elevator_mq_ops {
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
-	void (*completed_request)(struct request *);
+	void (*completed_request)(struct request *, u64);
 	void (*started_request)(struct request *);
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
-- 
cgit v1.2.3


From e9e575b8f29445bcde67f421891efa4d6527d987 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 18 Sep 2018 19:10:39 -0700
Subject: gss_krb5: Remove VLA usage of skcipher

In the quest to remove all stack VLA usage from the kernel[1], this
replaces struct crypto_skcipher and SKCIPHER_REQUEST_ON_STACK() usage
with struct crypto_sync_skcipher and SYNC_SKCIPHER_REQUEST_ON_STACK(),
which uses a fixed stack size.

[1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com

Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: YueHaibing <yuehaibing@huawei.com>
Cc: linux-nfs@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/sunrpc/gss_krb5.h       | 30 ++++++------
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 87 ++++++++++++++++++-----------------
 net/sunrpc/auth_gss/gss_krb5_keys.c   |  9 ++--
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 53 ++++++++++-----------
 net/sunrpc/auth_gss/gss_krb5_seqnum.c | 18 ++++----
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 20 ++++----
 6 files changed, 108 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 7df625d41e35..f6e8ceafafd8 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -71,10 +71,10 @@ struct gss_krb5_enctype {
 	const u32		keyed_cksum;	/* is it a keyed cksum? */
 	const u32		keybytes;	/* raw key len, in bytes */
 	const u32		keylength;	/* final key len, in bytes */
-	u32 (*encrypt) (struct crypto_skcipher *tfm,
+	u32 (*encrypt) (struct crypto_sync_skcipher *tfm,
 			void *iv, void *in, void *out,
 			int length);		/* encryption function */
-	u32 (*decrypt) (struct crypto_skcipher *tfm,
+	u32 (*decrypt) (struct crypto_sync_skcipher *tfm,
 			void *iv, void *in, void *out,
 			int length);		/* decryption function */
 	u32 (*mk_key) (const struct gss_krb5_enctype *gk5e,
@@ -98,12 +98,12 @@ struct krb5_ctx {
 	u32			enctype;
 	u32			flags;
 	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
-	struct crypto_skcipher	*enc;
-	struct crypto_skcipher	*seq;
-	struct crypto_skcipher *acceptor_enc;
-	struct crypto_skcipher *initiator_enc;
-	struct crypto_skcipher *acceptor_enc_aux;
-	struct crypto_skcipher *initiator_enc_aux;
+	struct crypto_sync_skcipher *enc;
+	struct crypto_sync_skcipher *seq;
+	struct crypto_sync_skcipher *acceptor_enc;
+	struct crypto_sync_skcipher *initiator_enc;
+	struct crypto_sync_skcipher *acceptor_enc_aux;
+	struct crypto_sync_skcipher *initiator_enc_aux;
 	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
 	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
@@ -262,24 +262,24 @@ gss_unwrap_kerberos(struct gss_ctx *ctx_id, int offset,
 
 
 u32
-krb5_encrypt(struct crypto_skcipher *key,
+krb5_encrypt(struct crypto_sync_skcipher *key,
 	     void *iv, void *in, void *out, int length);
 
 u32
-krb5_decrypt(struct crypto_skcipher *key,
+krb5_decrypt(struct crypto_sync_skcipher *key,
 	     void *iv, void *in, void *out, int length); 
 
 int
-gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *outbuf,
+gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *outbuf,
 		    int offset, struct page **pages);
 
 int
-gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *inbuf,
+gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *inbuf,
 		    int offset);
 
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
-		struct crypto_skcipher *key,
+		struct crypto_sync_skcipher *key,
 		int direction,
 		u32 seqnum, unsigned char *cksum, unsigned char *buf);
 
@@ -320,12 +320,12 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
 
 int
 krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
-		       struct crypto_skcipher *cipher,
+		       struct crypto_sync_skcipher *cipher,
 		       unsigned char *cksum);
 
 int
 krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
-		       struct crypto_skcipher *cipher,
+		       struct crypto_sync_skcipher *cipher,
 		       s32 seqnum);
 void
 gss_krb5_make_confounder(char *p, u32 conflen);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 0220e1ca5280..4f43383971ba 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -53,7 +53,7 @@
 
 u32
 krb5_encrypt(
-	struct crypto_skcipher *tfm,
+	struct crypto_sync_skcipher *tfm,
 	void * iv,
 	void * in,
 	void * out,
@@ -62,24 +62,24 @@ krb5_encrypt(
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	if (length % crypto_skcipher_blocksize(tfm) != 0)
+	if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+	if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
-			crypto_skcipher_ivsize(tfm));
+			crypto_sync_skcipher_ivsize(tfm));
 		goto out;
 	}
 
 	if (iv)
-		memcpy(local_iv, iv, crypto_skcipher_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_init_one(sg, out, length);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, length, local_iv);
 
@@ -92,7 +92,7 @@ out:
 
 u32
 krb5_decrypt(
-     struct crypto_skcipher *tfm,
+     struct crypto_sync_skcipher *tfm,
      void * iv,
      void * in,
      void * out,
@@ -101,23 +101,23 @@ krb5_decrypt(
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
 	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	if (length % crypto_skcipher_blocksize(tfm) != 0)
+	if (length % crypto_sync_skcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
+	if (crypto_sync_skcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
-			crypto_skcipher_ivsize(tfm));
+			crypto_sync_skcipher_ivsize(tfm));
 		goto out;
 	}
 	if (iv)
-		memcpy(local_iv,iv, crypto_skcipher_ivsize(tfm));
+		memcpy(local_iv, iv, crypto_sync_skcipher_ivsize(tfm));
 
 	memcpy(out, in, length);
 	sg_init_one(sg, out, length);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, length, local_iv);
 
@@ -466,7 +466,8 @@ encryptor(struct scatterlist *sg, void *data)
 {
 	struct encryptor_desc *desc = data;
 	struct xdr_buf *outbuf = desc->outbuf;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
+	struct crypto_sync_skcipher *tfm =
+		crypto_sync_skcipher_reqtfm(desc->req);
 	struct page *in_page;
 	int thislen = desc->fraglen + sg->length;
 	int fraglen, ret;
@@ -492,7 +493,7 @@ encryptor(struct scatterlist *sg, void *data)
 	desc->fraglen += sg->length;
 	desc->pos += sg->length;
 
-	fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
+	fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -526,16 +527,16 @@ encryptor(struct scatterlist *sg, void *data)
 }
 
 int
-gss_encrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
+gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
 		    int offset, struct page **pages)
 {
 	int ret;
 	struct encryptor_desc desc;
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
-	BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
@@ -567,7 +568,8 @@ decryptor(struct scatterlist *sg, void *data)
 {
 	struct decryptor_desc *desc = data;
 	int thislen = desc->fraglen + sg->length;
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(desc->req);
+	struct crypto_sync_skcipher *tfm =
+		crypto_sync_skcipher_reqtfm(desc->req);
 	int fraglen, ret;
 
 	/* Worst case is 4 fragments: head, end of page 1, start
@@ -578,7 +580,7 @@ decryptor(struct scatterlist *sg, void *data)
 	desc->fragno++;
 	desc->fraglen += sg->length;
 
-	fraglen = thislen & (crypto_skcipher_blocksize(tfm) - 1);
+	fraglen = thislen & (crypto_sync_skcipher_blocksize(tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -608,17 +610,17 @@ decryptor(struct scatterlist *sg, void *data)
 }
 
 int
-gss_decrypt_xdr_buf(struct crypto_skcipher *tfm, struct xdr_buf *buf,
+gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm, struct xdr_buf *buf,
 		    int offset)
 {
 	int ret;
 	struct decryptor_desc desc;
-	SKCIPHER_REQUEST_ON_STACK(req, tfm);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, tfm);
 
 	/* XXXJBF: */
-	BUG_ON((buf->len - offset) % crypto_skcipher_blocksize(tfm) != 0);
+	BUG_ON((buf->len - offset) % crypto_sync_skcipher_blocksize(tfm) != 0);
 
-	skcipher_request_set_tfm(req, tfm);
+	skcipher_request_set_sync_tfm(req, tfm);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 
 	memset(desc.iv, 0, sizeof(desc.iv));
@@ -672,12 +674,12 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
 }
 
 static u32
-gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
+gss_krb5_cts_crypt(struct crypto_sync_skcipher *cipher, struct xdr_buf *buf,
 		   u32 offset, u8 *iv, struct page **pages, int encrypt)
 {
 	u32 ret;
 	struct scatterlist sg[1];
-	SKCIPHER_REQUEST_ON_STACK(req, cipher);
+	SYNC_SKCIPHER_REQUEST_ON_STACK(req, cipher);
 	u8 *data;
 	struct page **save_pages;
 	u32 len = buf->len - offset;
@@ -706,7 +708,7 @@ gss_krb5_cts_crypt(struct crypto_skcipher *cipher, struct xdr_buf *buf,
 
 	sg_init_one(sg, data, len);
 
-	skcipher_request_set_tfm(req, cipher);
+	skcipher_request_set_sync_tfm(req, cipher);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
 	skcipher_request_set_crypt(req, sg, sg, len, iv);
 
@@ -735,7 +737,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	struct xdr_netobj hmac;
 	u8 *cksumkey;
 	u8 *ecptr;
-	struct crypto_skcipher *cipher, *aux_cipher;
+	struct crypto_sync_skcipher *cipher, *aux_cipher;
 	int blocksize;
 	struct page **save_pages;
 	int nblocks, nbytes;
@@ -754,7 +756,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 		cksumkey = kctx->acceptor_integ;
 		usage = KG_USAGE_ACCEPTOR_SEAL;
 	}
-	blocksize = crypto_skcipher_blocksize(cipher);
+	blocksize = crypto_sync_skcipher_blocksize(cipher);
 
 	/* hide the gss token header and insert the confounder */
 	offset += GSS_KRB5_TOK_HDR_LEN;
@@ -807,7 +809,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	memset(desc.iv, 0, sizeof(desc.iv));
 
 	if (cbcbytes) {
-		SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
 
 		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
 		desc.fragno = 0;
@@ -816,7 +818,7 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 		desc.outbuf = buf;
 		desc.req = req;
 
-		skcipher_request_set_tfm(req, aux_cipher);
+		skcipher_request_set_sync_tfm(req, aux_cipher);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 
 		sg_init_table(desc.infrags, 4);
@@ -855,7 +857,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	struct xdr_buf subbuf;
 	u32 ret = 0;
 	u8 *cksum_key;
-	struct crypto_skcipher *cipher, *aux_cipher;
+	struct crypto_sync_skcipher *cipher, *aux_cipher;
 	struct xdr_netobj our_hmac_obj;
 	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
 	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
@@ -874,7 +876,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 		cksum_key = kctx->initiator_integ;
 		usage = KG_USAGE_INITIATOR_SEAL;
 	}
-	blocksize = crypto_skcipher_blocksize(cipher);
+	blocksize = crypto_sync_skcipher_blocksize(cipher);
 
 
 	/* create a segment skipping the header and leaving out the checksum */
@@ -891,13 +893,13 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	memset(desc.iv, 0, sizeof(desc.iv));
 
 	if (cbcbytes) {
-		SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+		SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
 
 		desc.fragno = 0;
 		desc.fraglen = 0;
 		desc.req = req;
 
-		skcipher_request_set_tfm(req, aux_cipher);
+		skcipher_request_set_sync_tfm(req, aux_cipher);
 		skcipher_request_set_callback(req, 0, NULL, NULL);
 
 		sg_init_table(desc.frags, 4);
@@ -946,7 +948,8 @@ out_err:
  * Set the key of the given cipher.
  */
 int
-krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
+		       struct crypto_sync_skcipher *cipher,
 		       unsigned char *cksum)
 {
 	struct crypto_shash *hmac;
@@ -994,7 +997,7 @@ krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
 	if (err)
 		goto out_err;
 
-	err = crypto_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+	err = crypto_sync_skcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
@@ -1012,7 +1015,8 @@ out_err:
  * Set the key of cipher kctx->enc.
  */
 int
-krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
+		       struct crypto_sync_skcipher *cipher,
 		       s32 seqnum)
 {
 	struct crypto_shash *hmac;
@@ -1069,7 +1073,8 @@ krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_skcipher *cipher,
 	if (err)
 		goto out_err;
 
-	err = crypto_skcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+	err = crypto_sync_skcipher_setkey(cipher, Kcrypt,
+					  kctx->gk5e->keylength);
 	if (err)
 		goto out_err;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index f7fe2d2b851f..550fdf18d3b3 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -147,7 +147,7 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 	size_t blocksize, keybytes, keylength, n;
 	unsigned char *inblockdata, *outblockdata, *rawkey;
 	struct xdr_netobj inblock, outblock;
-	struct crypto_skcipher *cipher;
+	struct crypto_sync_skcipher *cipher;
 	u32 ret = EINVAL;
 
 	blocksize = gk5e->blocksize;
@@ -157,11 +157,10 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 	if ((inkey->len != keylength) || (outkey->len != keylength))
 		goto err_return;
 
-	cipher = crypto_alloc_skcipher(gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		goto err_return;
-	if (crypto_skcipher_setkey(cipher, inkey->data, inkey->len))
+	if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
 		goto err_return;
 
 	/* allocate and set up buffers */
@@ -238,7 +237,7 @@ err_free_in:
 	memset(inblockdata, 0, blocksize);
 	kfree(inblockdata);
 err_free_cipher:
-	crypto_free_skcipher(cipher);
+	crypto_free_sync_skcipher(cipher);
 err_return:
 	return ret;
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7bb2514aadd9..7f0424dfa8f6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -218,7 +218,7 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 
 static inline const void *
 get_key(const void *p, const void *end,
-	struct krb5_ctx *ctx, struct crypto_skcipher **res)
+	struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
 {
 	struct xdr_netobj	key;
 	int			alg;
@@ -246,15 +246,14 @@ get_key(const void *p, const void *end,
 	if (IS_ERR(p))
 		goto out_err;
 
-	*res = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-							CRYPTO_ALG_ASYNC);
+	*res = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(*res)) {
 		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
 			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		*res = NULL;
 		goto out_err_free_key;
 	}
-	if (crypto_skcipher_setkey(*res, key.data, key.len)) {
+	if (crypto_sync_skcipher_setkey(*res, key.data, key.len)) {
 		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
 			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		goto out_err_free_tfm;
@@ -264,7 +263,7 @@ get_key(const void *p, const void *end,
 	return p;
 
 out_err_free_tfm:
-	crypto_free_skcipher(*res);
+	crypto_free_sync_skcipher(*res);
 out_err_free_key:
 	kfree(key.data);
 	p = ERR_PTR(-EINVAL);
@@ -336,30 +335,30 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	return 0;
 
 out_err_free_key2:
-	crypto_free_skcipher(ctx->seq);
+	crypto_free_sync_skcipher(ctx->seq);
 out_err_free_key1:
-	crypto_free_skcipher(ctx->enc);
+	crypto_free_sync_skcipher(ctx->enc);
 out_err_free_mech:
 	kfree(ctx->mech_used.data);
 out_err:
 	return PTR_ERR(p);
 }
 
-static struct crypto_skcipher *
+static struct crypto_sync_skcipher *
 context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
 {
-	struct crypto_skcipher *cp;
+	struct crypto_sync_skcipher *cp;
 
-	cp = crypto_alloc_skcipher(cname, 0, CRYPTO_ALG_ASYNC);
+	cp = crypto_alloc_sync_skcipher(cname, 0, 0);
 	if (IS_ERR(cp)) {
 		dprintk("gss_kerberos_mech: unable to initialize "
 			"crypto algorithm %s\n", cname);
 		return NULL;
 	}
-	if (crypto_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+	if (crypto_sync_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
 		dprintk("gss_kerberos_mech: error setting key for "
 			"crypto algorithm %s\n", cname);
-		crypto_free_skcipher(cp);
+		crypto_free_sync_skcipher(cp);
 		return NULL;
 	}
 	return cp;
@@ -413,9 +412,9 @@ context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
 	return 0;
 
 out_free_enc:
-	crypto_free_skcipher(ctx->enc);
+	crypto_free_sync_skcipher(ctx->enc);
 out_free_seq:
-	crypto_free_skcipher(ctx->seq);
+	crypto_free_sync_skcipher(ctx->seq);
 out_err:
 	return -EINVAL;
 }
@@ -469,17 +468,15 @@ context_derive_keys_rc4(struct krb5_ctx *ctx)
 	/*
 	 * allocate hash, and skciphers for data and seqnum encryption
 	 */
-	ctx->enc = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-					 CRYPTO_ALG_ASYNC);
+	ctx->enc = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(ctx->enc)) {
 		err = PTR_ERR(ctx->enc);
 		goto out_err_free_hmac;
 	}
 
-	ctx->seq = crypto_alloc_skcipher(ctx->gk5e->encrypt_name, 0,
-					 CRYPTO_ALG_ASYNC);
+	ctx->seq = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(ctx->seq)) {
-		crypto_free_skcipher(ctx->enc);
+		crypto_free_sync_skcipher(ctx->enc);
 		err = PTR_ERR(ctx->seq);
 		goto out_err_free_hmac;
 	}
@@ -591,7 +588,7 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
 			context_v2_alloc_cipher(ctx, "cbc(aes)",
 						ctx->acceptor_seal);
 		if (ctx->acceptor_enc_aux == NULL) {
-			crypto_free_skcipher(ctx->initiator_enc_aux);
+			crypto_free_sync_skcipher(ctx->initiator_enc_aux);
 			goto out_free_acceptor_enc;
 		}
 	}
@@ -599,9 +596,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
 	return 0;
 
 out_free_acceptor_enc:
-	crypto_free_skcipher(ctx->acceptor_enc);
+	crypto_free_sync_skcipher(ctx->acceptor_enc);
 out_free_initiator_enc:
-	crypto_free_skcipher(ctx->initiator_enc);
+	crypto_free_sync_skcipher(ctx->initiator_enc);
 out_err:
 	return -EINVAL;
 }
@@ -713,12 +710,12 @@ static void
 gss_delete_sec_context_kerberos(void *internal_ctx) {
 	struct krb5_ctx *kctx = internal_ctx;
 
-	crypto_free_skcipher(kctx->seq);
-	crypto_free_skcipher(kctx->enc);
-	crypto_free_skcipher(kctx->acceptor_enc);
-	crypto_free_skcipher(kctx->initiator_enc);
-	crypto_free_skcipher(kctx->acceptor_enc_aux);
-	crypto_free_skcipher(kctx->initiator_enc_aux);
+	crypto_free_sync_skcipher(kctx->seq);
+	crypto_free_sync_skcipher(kctx->enc);
+	crypto_free_sync_skcipher(kctx->acceptor_enc);
+	crypto_free_sync_skcipher(kctx->initiator_enc);
+	crypto_free_sync_skcipher(kctx->acceptor_enc_aux);
+	crypto_free_sync_skcipher(kctx->initiator_enc_aux);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index c8b9082f4a9d..fb6656295204 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -43,13 +43,12 @@ static s32
 krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
 		      unsigned char *cksum, unsigned char *buf)
 {
-	struct crypto_skcipher *cipher;
+	struct crypto_sync_skcipher *cipher;
 	unsigned char plain[8];
 	s32 code;
 
 	dprintk("RPC:       %s:\n", __func__);
-	cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
@@ -68,12 +67,12 @@ krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
 
 	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
 out:
-	crypto_free_skcipher(cipher);
+	crypto_free_sync_skcipher(cipher);
 	return code;
 }
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
-		struct crypto_skcipher *key,
+		struct crypto_sync_skcipher *key,
 		int direction,
 		u32 seqnum,
 		unsigned char *cksum, unsigned char *buf)
@@ -101,13 +100,12 @@ static s32
 krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
 		     unsigned char *buf, int *direction, s32 *seqnum)
 {
-	struct crypto_skcipher *cipher;
+	struct crypto_sync_skcipher *cipher;
 	unsigned char plain[8];
 	s32 code;
 
 	dprintk("RPC:       %s:\n", __func__);
-	cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-				       CRYPTO_ALG_ASYNC);
+	cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name, 0, 0);
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
@@ -130,7 +128,7 @@ krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
 	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
 					(plain[2] << 8) | (plain[3]));
 out:
-	crypto_free_skcipher(cipher);
+	crypto_free_sync_skcipher(cipher);
 	return code;
 }
 
@@ -142,7 +140,7 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
 {
 	s32 code;
 	unsigned char plain[8];
-	struct crypto_skcipher *key = kctx->seq;
+	struct crypto_sync_skcipher *key = kctx->seq;
 
 	dprintk("RPC:       krb5_get_seq_num:\n");
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 39a2e672900b..3d975a4013d2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -174,7 +174,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	now = get_seconds();
 
-	blocksize = crypto_skcipher_blocksize(kctx->enc);
+	blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
 	gss_krb5_add_padding(buf, offset, blocksize);
 	BUG_ON((buf->len - offset) % blocksize);
 	plainlen = conflen + buf->len - offset;
@@ -239,10 +239,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 		return GSS_S_FAILURE;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
-		struct crypto_skcipher *cipher;
+		struct crypto_sync_skcipher *cipher;
 		int err;
-		cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-					       CRYPTO_ALG_ASYNC);
+		cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name,
+						    0, 0);
 		if (IS_ERR(cipher))
 			return GSS_S_FAILURE;
 
@@ -250,7 +250,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 		err = gss_encrypt_xdr_buf(cipher, buf,
 					  offset + headlen - conflen, pages);
-		crypto_free_skcipher(cipher);
+		crypto_free_sync_skcipher(cipher);
 		if (err)
 			return GSS_S_FAILURE;
 	} else {
@@ -327,18 +327,18 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 		return GSS_S_BAD_SIG;
 
 	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
-		struct crypto_skcipher *cipher;
+		struct crypto_sync_skcipher *cipher;
 		int err;
 
-		cipher = crypto_alloc_skcipher(kctx->gk5e->encrypt_name, 0,
-					       CRYPTO_ALG_ASYNC);
+		cipher = crypto_alloc_sync_skcipher(kctx->gk5e->encrypt_name,
+						    0, 0);
 		if (IS_ERR(cipher))
 			return GSS_S_FAILURE;
 
 		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
 
 		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
-		crypto_free_skcipher(cipher);
+		crypto_free_sync_skcipher(cipher);
 		if (err)
 			return GSS_S_DEFECTIVE_TOKEN;
 	} else {
@@ -371,7 +371,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	/* Copy the data back to the right position.  XXX: Would probably be
 	 * better to copy and encrypt at the same time. */
 
-	blocksize = crypto_skcipher_blocksize(kctx->enc);
+	blocksize = crypto_sync_skcipher_blocksize(kctx->enc);
 	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
 					conflen;
 	orig_start = buf->head[0].iov_base + offset;
-- 
cgit v1.2.3


From cac5818c25d0423bda73e2b6997404ed0a7ed9e3 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 19 Sep 2018 10:10:54 +0000
Subject: crypto: user - Implement a generic crypto statistics

This patch implement a generic way to get statistics about all crypto
usages.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/Kconfig                       |  11 +
 crypto/Makefile                      |   1 +
 crypto/ahash.c                       |  21 +-
 crypto/algapi.c                      |   8 +
 crypto/crypto_user.c                 | 566 ----------------------------------
 crypto/crypto_user_base.c            | 571 +++++++++++++++++++++++++++++++++++
 crypto/crypto_user_stat.c            | 463 ++++++++++++++++++++++++++++
 crypto/rng.c                         |   1 +
 include/crypto/acompress.h           |  38 ++-
 include/crypto/aead.h                |  51 +++-
 include/crypto/akcipher.h            |  76 ++++-
 include/crypto/hash.h                |  32 +-
 include/crypto/internal/cryptouser.h |   8 +
 include/crypto/kpp.h                 |  51 +++-
 include/crypto/rng.h                 |  29 +-
 include/crypto/skcipher.h            |  44 ++-
 include/linux/crypto.h               | 110 ++++++-
 include/uapi/linux/cryptouser.h      |  52 ++++
 18 files changed, 1534 insertions(+), 599 deletions(-)
 delete mode 100644 crypto/crypto_user.c
 create mode 100644 crypto/crypto_user_base.c
 create mode 100644 crypto/crypto_user_stat.c
 create mode 100644 include/crypto/internal/cryptouser.h

(limited to 'include/linux')

diff --git a/crypto/Kconfig b/crypto/Kconfig
index 90f2811fac5f..4ef95b0b25a3 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1799,6 +1799,17 @@ config CRYPTO_USER_API_AEAD
 	  This option enables the user-spaces interface for AEAD
 	  cipher algorithms.
 
+config CRYPTO_STATS
+	bool "Crypto usage statistics for User-space"
+	help
+	  This option enables the gathering of crypto stats.
+	  This will collect:
+	  - encrypt/decrypt size and numbers of symmeric operations
+	  - compress/decompress size and numbers of compress operations
+	  - size and numbers of hash operations
+	  - encrypt/decrypt/sign/verify numbers for asymmetric operations
+	  - generate/seed numbers for rng operations
+
 config CRYPTO_HASH_INFO
 	bool
 
diff --git a/crypto/Makefile b/crypto/Makefile
index d719843f8b6e..ff5c2bbda04a 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -54,6 +54,7 @@ cryptomgr-y := algboss.o testmgr.o
 
 obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o
 obj-$(CONFIG_CRYPTO_USER) += crypto_user.o
+crypto_user-y := crypto_user_base.o crypto_user_stat.o
 obj-$(CONFIG_CRYPTO_CMAC) += cmac.o
 obj-$(CONFIG_CRYPTO_HMAC) += hmac.o
 obj-$(CONFIG_CRYPTO_VMAC) += vmac.o
diff --git a/crypto/ahash.c b/crypto/ahash.c
index 78aaf2158c43..e21667b4e10a 100644
--- a/crypto/ahash.c
+++ b/crypto/ahash.c
@@ -364,24 +364,35 @@ static int crypto_ahash_op(struct ahash_request *req,
 
 int crypto_ahash_final(struct ahash_request *req)
 {
-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
+	int ret;
+
+	ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->final);
+	crypto_stat_ahash_final(req, ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_final);
 
 int crypto_ahash_finup(struct ahash_request *req)
 {
-	return crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
+	int ret;
+
+	ret = crypto_ahash_op(req, crypto_ahash_reqtfm(req)->finup);
+	crypto_stat_ahash_final(req, ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_finup);
 
 int crypto_ahash_digest(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	int ret;
 
 	if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	return crypto_ahash_op(req, tfm->digest);
+		ret = -ENOKEY;
+	else
+		ret = crypto_ahash_op(req, tfm->digest);
+	crypto_stat_ahash_final(req, ret);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(crypto_ahash_digest);
 
diff --git a/crypto/algapi.c b/crypto/algapi.c
index 38daa8677da9..2545c5f89c4c 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -258,6 +258,14 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
 	list_add(&alg->cra_list, &crypto_alg_list);
 	list_add(&larval->alg.cra_list, &crypto_alg_list);
 
+	atomic_set(&alg->encrypt_cnt, 0);
+	atomic_set(&alg->decrypt_cnt, 0);
+	atomic64_set(&alg->encrypt_tlen, 0);
+	atomic64_set(&alg->decrypt_tlen, 0);
+	atomic_set(&alg->verify_cnt, 0);
+	atomic_set(&alg->cipher_err_cnt, 0);
+	atomic_set(&alg->sign_cnt, 0);
+
 out:
 	return larval;
 
diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
deleted file mode 100644
index 0e89b5457cab..000000000000
--- a/crypto/crypto_user.c
+++ /dev/null
@@ -1,566 +0,0 @@
-/*
- * Crypto user configuration API.
- *
- * Copyright (C) 2011 secunet Security Networks AG
- * Copyright (C) 2011 Steffen Klassert <steffen.klassert@secunet.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include <linux/module.h>
-#include <linux/crypto.h>
-#include <linux/cryptouser.h>
-#include <linux/sched.h>
-#include <net/netlink.h>
-#include <linux/security.h>
-#include <net/net_namespace.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/internal/rng.h>
-#include <crypto/akcipher.h>
-#include <crypto/kpp.h>
-
-#include "internal.h"
-
-#define null_terminated(x)	(strnlen(x, sizeof(x)) < sizeof(x))
-
-static DEFINE_MUTEX(crypto_cfg_mutex);
-
-/* The crypto netlink socket */
-static struct sock *crypto_nlsk;
-
-struct crypto_dump_info {
-	struct sk_buff *in_skb;
-	struct sk_buff *out_skb;
-	u32 nlmsg_seq;
-	u16 nlmsg_flags;
-};
-
-static struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
-{
-	struct crypto_alg *q, *alg = NULL;
-
-	down_read(&crypto_alg_sem);
-
-	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		int match = 0;
-
-		if ((q->cra_flags ^ p->cru_type) & p->cru_mask)
-			continue;
-
-		if (strlen(p->cru_driver_name))
-			match = !strcmp(q->cra_driver_name,
-					p->cru_driver_name);
-		else if (!exact)
-			match = !strcmp(q->cra_name, p->cru_name);
-
-		if (!match)
-			continue;
-
-		if (unlikely(!crypto_mod_get(q)))
-			continue;
-
-		alg = q;
-		break;
-	}
-
-	up_read(&crypto_alg_sem);
-
-	return alg;
-}
-
-static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	struct crypto_report_cipher rcipher;
-
-	strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
-
-	rcipher.blocksize = alg->cra_blocksize;
-	rcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
-	rcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
-
-	if (nla_put(skb, CRYPTOCFGA_REPORT_CIPHER,
-		    sizeof(struct crypto_report_cipher), &rcipher))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
-static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	struct crypto_report_comp rcomp;
-
-	strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
-	if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
-		    sizeof(struct crypto_report_comp), &rcomp))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
-static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	struct crypto_report_acomp racomp;
-
-	strlcpy(racomp.type, "acomp", sizeof(racomp.type));
-
-	if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
-		    sizeof(struct crypto_report_acomp), &racomp))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
-static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	struct crypto_report_akcipher rakcipher;
-
-	strlcpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
-
-	if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER,
-		    sizeof(struct crypto_report_akcipher), &rakcipher))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
-static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
-{
-	struct crypto_report_kpp rkpp;
-
-	strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
-
-	if (nla_put(skb, CRYPTOCFGA_REPORT_KPP,
-		    sizeof(struct crypto_report_kpp), &rkpp))
-		goto nla_put_failure;
-	return 0;
-
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
-static int crypto_report_one(struct crypto_alg *alg,
-			     struct crypto_user_alg *ualg, struct sk_buff *skb)
-{
-	strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
-	strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
-		sizeof(ualg->cru_driver_name));
-	strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
-		sizeof(ualg->cru_module_name));
-
-	ualg->cru_type = 0;
-	ualg->cru_mask = 0;
-	ualg->cru_flags = alg->cra_flags;
-	ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
-
-	if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
-		goto nla_put_failure;
-	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
-		struct crypto_report_larval rl;
-
-		strlcpy(rl.type, "larval", sizeof(rl.type));
-		if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL,
-			    sizeof(struct crypto_report_larval), &rl))
-			goto nla_put_failure;
-		goto out;
-	}
-
-	if (alg->cra_type && alg->cra_type->report) {
-		if (alg->cra_type->report(skb, alg))
-			goto nla_put_failure;
-
-		goto out;
-	}
-
-	switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
-	case CRYPTO_ALG_TYPE_CIPHER:
-		if (crypto_report_cipher(skb, alg))
-			goto nla_put_failure;
-
-		break;
-	case CRYPTO_ALG_TYPE_COMPRESS:
-		if (crypto_report_comp(skb, alg))
-			goto nla_put_failure;
-
-		break;
-	case CRYPTO_ALG_TYPE_ACOMPRESS:
-		if (crypto_report_acomp(skb, alg))
-			goto nla_put_failure;
-
-		break;
-	case CRYPTO_ALG_TYPE_AKCIPHER:
-		if (crypto_report_akcipher(skb, alg))
-			goto nla_put_failure;
-
-		break;
-	case CRYPTO_ALG_TYPE_KPP:
-		if (crypto_report_kpp(skb, alg))
-			goto nla_put_failure;
-		break;
-	}
-
-out:
-	return 0;
-
-nla_put_failure:
-	return -EMSGSIZE;
-}
-
-static int crypto_report_alg(struct crypto_alg *alg,
-			     struct crypto_dump_info *info)
-{
-	struct sk_buff *in_skb = info->in_skb;
-	struct sk_buff *skb = info->out_skb;
-	struct nlmsghdr *nlh;
-	struct crypto_user_alg *ualg;
-	int err = 0;
-
-	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
-			CRYPTO_MSG_GETALG, sizeof(*ualg), info->nlmsg_flags);
-	if (!nlh) {
-		err = -EMSGSIZE;
-		goto out;
-	}
-
-	ualg = nlmsg_data(nlh);
-
-	err = crypto_report_one(alg, ualg, skb);
-	if (err) {
-		nlmsg_cancel(skb, nlh);
-		goto out;
-	}
-
-	nlmsg_end(skb, nlh);
-
-out:
-	return err;
-}
-
-static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
-			 struct nlattr **attrs)
-{
-	struct crypto_user_alg *p = nlmsg_data(in_nlh);
-	struct crypto_alg *alg;
-	struct sk_buff *skb;
-	struct crypto_dump_info info;
-	int err;
-
-	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
-		return -EINVAL;
-
-	alg = crypto_alg_match(p, 0);
-	if (!alg)
-		return -ENOENT;
-
-	err = -ENOMEM;
-	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!skb)
-		goto drop_alg;
-
-	info.in_skb = in_skb;
-	info.out_skb = skb;
-	info.nlmsg_seq = in_nlh->nlmsg_seq;
-	info.nlmsg_flags = 0;
-
-	err = crypto_report_alg(alg, &info);
-
-drop_alg:
-	crypto_mod_put(alg);
-
-	if (err)
-		return err;
-
-	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
-}
-
-static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb)
-{
-	struct crypto_alg *alg;
-	struct crypto_dump_info info;
-	int err;
-
-	if (cb->args[0])
-		goto out;
-
-	cb->args[0] = 1;
-
-	info.in_skb = cb->skb;
-	info.out_skb = skb;
-	info.nlmsg_seq = cb->nlh->nlmsg_seq;
-	info.nlmsg_flags = NLM_F_MULTI;
-
-	list_for_each_entry(alg, &crypto_alg_list, cra_list) {
-		err = crypto_report_alg(alg, &info);
-		if (err)
-			goto out_err;
-	}
-
-out:
-	return skb->len;
-out_err:
-	return err;
-}
-
-static int crypto_dump_report_done(struct netlink_callback *cb)
-{
-	return 0;
-}
-
-static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
-			     struct nlattr **attrs)
-{
-	struct crypto_alg *alg;
-	struct crypto_user_alg *p = nlmsg_data(nlh);
-	struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL];
-	LIST_HEAD(list);
-
-	if (!netlink_capable(skb, CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
-		return -EINVAL;
-
-	if (priority && !strlen(p->cru_driver_name))
-		return -EINVAL;
-
-	alg = crypto_alg_match(p, 1);
-	if (!alg)
-		return -ENOENT;
-
-	down_write(&crypto_alg_sem);
-
-	crypto_remove_spawns(alg, &list, NULL);
-
-	if (priority)
-		alg->cra_priority = nla_get_u32(priority);
-
-	up_write(&crypto_alg_sem);
-
-	crypto_mod_put(alg);
-	crypto_remove_final(&list);
-
-	return 0;
-}
-
-static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
-			  struct nlattr **attrs)
-{
-	struct crypto_alg *alg;
-	struct crypto_user_alg *p = nlmsg_data(nlh);
-	int err;
-
-	if (!netlink_capable(skb, CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
-		return -EINVAL;
-
-	alg = crypto_alg_match(p, 1);
-	if (!alg)
-		return -ENOENT;
-
-	/* We can not unregister core algorithms such as aes-generic.
-	 * We would loose the reference in the crypto_alg_list to this algorithm
-	 * if we try to unregister. Unregistering such an algorithm without
-	 * removing the module is not possible, so we restrict to crypto
-	 * instances that are build from templates. */
-	err = -EINVAL;
-	if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
-		goto drop_alg;
-
-	err = -EBUSY;
-	if (refcount_read(&alg->cra_refcnt) > 2)
-		goto drop_alg;
-
-	err = crypto_unregister_instance((struct crypto_instance *)alg);
-
-drop_alg:
-	crypto_mod_put(alg);
-	return err;
-}
-
-static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
-			  struct nlattr **attrs)
-{
-	int exact = 0;
-	const char *name;
-	struct crypto_alg *alg;
-	struct crypto_user_alg *p = nlmsg_data(nlh);
-	struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL];
-
-	if (!netlink_capable(skb, CAP_NET_ADMIN))
-		return -EPERM;
-
-	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
-		return -EINVAL;
-
-	if (strlen(p->cru_driver_name))
-		exact = 1;
-
-	if (priority && !exact)
-		return -EINVAL;
-
-	alg = crypto_alg_match(p, exact);
-	if (alg) {
-		crypto_mod_put(alg);
-		return -EEXIST;
-	}
-
-	if (strlen(p->cru_driver_name))
-		name = p->cru_driver_name;
-	else
-		name = p->cru_name;
-
-	alg = crypto_alg_mod_lookup(name, p->cru_type, p->cru_mask);
-	if (IS_ERR(alg))
-		return PTR_ERR(alg);
-
-	down_write(&crypto_alg_sem);
-
-	if (priority)
-		alg->cra_priority = nla_get_u32(priority);
-
-	up_write(&crypto_alg_sem);
-
-	crypto_mod_put(alg);
-
-	return 0;
-}
-
-static int crypto_del_rng(struct sk_buff *skb, struct nlmsghdr *nlh,
-			  struct nlattr **attrs)
-{
-	if (!netlink_capable(skb, CAP_NET_ADMIN))
-		return -EPERM;
-	return crypto_del_default_rng();
-}
-
-#define MSGSIZE(type) sizeof(struct type)
-
-static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
-	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
-	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
-	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
-	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
-	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
-};
-
-static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
-	[CRYPTOCFGA_PRIORITY_VAL]   = { .type = NLA_U32},
-};
-
-#undef MSGSIZE
-
-static const struct crypto_link {
-	int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
-	int (*dump)(struct sk_buff *, struct netlink_callback *);
-	int (*done)(struct netlink_callback *);
-} crypto_dispatch[CRYPTO_NR_MSGTYPES] = {
-	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = { .doit = crypto_add_alg},
-	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_alg},
-	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = { .doit = crypto_update_alg},
-	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = { .doit = crypto_report,
-						       .dump = crypto_dump_report,
-						       .done = crypto_dump_report_done},
-	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
-};
-
-static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
-			       struct netlink_ext_ack *extack)
-{
-	struct nlattr *attrs[CRYPTOCFGA_MAX+1];
-	const struct crypto_link *link;
-	int type, err;
-
-	type = nlh->nlmsg_type;
-	if (type > CRYPTO_MSG_MAX)
-		return -EINVAL;
-
-	type -= CRYPTO_MSG_BASE;
-	link = &crypto_dispatch[type];
-
-	if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) &&
-	    (nlh->nlmsg_flags & NLM_F_DUMP))) {
-		struct crypto_alg *alg;
-		u16 dump_alloc = 0;
-
-		if (link->dump == NULL)
-			return -EINVAL;
-
-		down_read(&crypto_alg_sem);
-		list_for_each_entry(alg, &crypto_alg_list, cra_list)
-			dump_alloc += CRYPTO_REPORT_MAXSIZE;
-
-		{
-			struct netlink_dump_control c = {
-				.dump = link->dump,
-				.done = link->done,
-				.min_dump_alloc = dump_alloc,
-			};
-			err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
-		}
-		up_read(&crypto_alg_sem);
-
-		return err;
-	}
-
-	err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX,
-			  crypto_policy, extack);
-	if (err < 0)
-		return err;
-
-	if (link->doit == NULL)
-		return -EINVAL;
-
-	return link->doit(skb, nlh, attrs);
-}
-
-static void crypto_netlink_rcv(struct sk_buff *skb)
-{
-	mutex_lock(&crypto_cfg_mutex);
-	netlink_rcv_skb(skb, &crypto_user_rcv_msg);
-	mutex_unlock(&crypto_cfg_mutex);
-}
-
-static int __init crypto_user_init(void)
-{
-	struct netlink_kernel_cfg cfg = {
-		.input	= crypto_netlink_rcv,
-	};
-
-	crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO, &cfg);
-	if (!crypto_nlsk)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void __exit crypto_user_exit(void)
-{
-	netlink_kernel_release(crypto_nlsk);
-}
-
-module_init(crypto_user_init);
-module_exit(crypto_user_exit);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
-MODULE_DESCRIPTION("Crypto userspace configuration API");
-MODULE_ALIAS("net-pf-16-proto-21");
diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
new file mode 100644
index 000000000000..e41f6cc33fff
--- /dev/null
+++ b/crypto/crypto_user_base.c
@@ -0,0 +1,571 @@
+/*
+ * Crypto user configuration API.
+ *
+ * Copyright (C) 2011 secunet Security Networks AG
+ * Copyright (C) 2011 Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/cryptouser.h>
+#include <linux/sched.h>
+#include <net/netlink.h>
+#include <linux/security.h>
+#include <net/net_namespace.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/internal/rng.h>
+#include <crypto/akcipher.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/cryptouser.h>
+
+#include "internal.h"
+
+#define null_terminated(x)	(strnlen(x, sizeof(x)) < sizeof(x))
+
+static DEFINE_MUTEX(crypto_cfg_mutex);
+
+/* The crypto netlink socket */
+struct sock *crypto_nlsk;
+
+struct crypto_dump_info {
+	struct sk_buff *in_skb;
+	struct sk_buff *out_skb;
+	u32 nlmsg_seq;
+	u16 nlmsg_flags;
+};
+
+struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact)
+{
+	struct crypto_alg *q, *alg = NULL;
+
+	down_read(&crypto_alg_sem);
+
+	list_for_each_entry(q, &crypto_alg_list, cra_list) {
+		int match = 0;
+
+		if ((q->cra_flags ^ p->cru_type) & p->cru_mask)
+			continue;
+
+		if (strlen(p->cru_driver_name))
+			match = !strcmp(q->cra_driver_name,
+					p->cru_driver_name);
+		else if (!exact)
+			match = !strcmp(q->cra_name, p->cru_name);
+
+		if (!match)
+			continue;
+
+		if (unlikely(!crypto_mod_get(q)))
+			continue;
+
+		alg = q;
+		break;
+	}
+
+	up_read(&crypto_alg_sem);
+
+	return alg;
+}
+
+static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_cipher rcipher;
+
+	strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
+
+	rcipher.blocksize = alg->cra_blocksize;
+	rcipher.min_keysize = alg->cra_cipher.cia_min_keysize;
+	rcipher.max_keysize = alg->cra_cipher.cia_max_keysize;
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_CIPHER,
+		    sizeof(struct crypto_report_cipher), &rcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_comp rcomp;
+
+	strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
+	if (nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS,
+		    sizeof(struct crypto_report_comp), &rcomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_acomp racomp;
+
+	strlcpy(racomp.type, "acomp", sizeof(racomp.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_ACOMP,
+		    sizeof(struct crypto_report_acomp), &racomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_akcipher rakcipher;
+
+	strlcpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_AKCIPHER,
+		    sizeof(struct crypto_report_akcipher), &rakcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_report_kpp rkpp;
+
+	strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
+
+	if (nla_put(skb, CRYPTOCFGA_REPORT_KPP,
+		    sizeof(struct crypto_report_kpp), &rkpp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_one(struct crypto_alg *alg,
+			     struct crypto_user_alg *ualg, struct sk_buff *skb)
+{
+	strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+	strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
+		sizeof(ualg->cru_driver_name));
+	strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
+		sizeof(ualg->cru_module_name));
+
+	ualg->cru_type = 0;
+	ualg->cru_mask = 0;
+	ualg->cru_flags = alg->cra_flags;
+	ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
+
+	if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
+		goto nla_put_failure;
+	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
+		struct crypto_report_larval rl;
+
+		strlcpy(rl.type, "larval", sizeof(rl.type));
+		if (nla_put(skb, CRYPTOCFGA_REPORT_LARVAL,
+			    sizeof(struct crypto_report_larval), &rl))
+			goto nla_put_failure;
+		goto out;
+	}
+
+	if (alg->cra_type && alg->cra_type->report) {
+		if (alg->cra_type->report(skb, alg))
+			goto nla_put_failure;
+
+		goto out;
+	}
+
+	switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
+	case CRYPTO_ALG_TYPE_CIPHER:
+		if (crypto_report_cipher(skb, alg))
+			goto nla_put_failure;
+
+		break;
+	case CRYPTO_ALG_TYPE_COMPRESS:
+		if (crypto_report_comp(skb, alg))
+			goto nla_put_failure;
+
+		break;
+	case CRYPTO_ALG_TYPE_ACOMPRESS:
+		if (crypto_report_acomp(skb, alg))
+			goto nla_put_failure;
+
+		break;
+	case CRYPTO_ALG_TYPE_AKCIPHER:
+		if (crypto_report_akcipher(skb, alg))
+			goto nla_put_failure;
+
+		break;
+	case CRYPTO_ALG_TYPE_KPP:
+		if (crypto_report_kpp(skb, alg))
+			goto nla_put_failure;
+		break;
+	}
+
+out:
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_alg(struct crypto_alg *alg,
+			     struct crypto_dump_info *info)
+{
+	struct sk_buff *in_skb = info->in_skb;
+	struct sk_buff *skb = info->out_skb;
+	struct nlmsghdr *nlh;
+	struct crypto_user_alg *ualg;
+	int err = 0;
+
+	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
+			CRYPTO_MSG_GETALG, sizeof(*ualg), info->nlmsg_flags);
+	if (!nlh) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	ualg = nlmsg_data(nlh);
+
+	err = crypto_report_one(alg, ualg, skb);
+	if (err) {
+		nlmsg_cancel(skb, nlh);
+		goto out;
+	}
+
+	nlmsg_end(skb, nlh);
+
+out:
+	return err;
+}
+
+static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
+			 struct nlattr **attrs)
+{
+	struct crypto_user_alg *p = nlmsg_data(in_nlh);
+	struct crypto_alg *alg;
+	struct sk_buff *skb;
+	struct crypto_dump_info info;
+	int err;
+
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
+	alg = crypto_alg_match(p, 0);
+	if (!alg)
+		return -ENOENT;
+
+	err = -ENOMEM;
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		goto drop_alg;
+
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = in_nlh->nlmsg_seq;
+	info.nlmsg_flags = 0;
+
+	err = crypto_report_alg(alg, &info);
+
+drop_alg:
+	crypto_mod_put(alg);
+
+	if (err)
+		return err;
+
+	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
+}
+
+static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct crypto_alg *alg;
+	struct crypto_dump_info info;
+	int err;
+
+	if (cb->args[0])
+		goto out;
+
+	cb->args[0] = 1;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.nlmsg_flags = NLM_F_MULTI;
+
+	list_for_each_entry(alg, &crypto_alg_list, cra_list) {
+		err = crypto_report_alg(alg, &info);
+		if (err)
+			goto out_err;
+	}
+
+out:
+	return skb->len;
+out_err:
+	return err;
+}
+
+static int crypto_dump_report_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+static int crypto_update_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			     struct nlattr **attrs)
+{
+	struct crypto_alg *alg;
+	struct crypto_user_alg *p = nlmsg_data(nlh);
+	struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL];
+	LIST_HEAD(list);
+
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
+	if (priority && !strlen(p->cru_driver_name))
+		return -EINVAL;
+
+	alg = crypto_alg_match(p, 1);
+	if (!alg)
+		return -ENOENT;
+
+	down_write(&crypto_alg_sem);
+
+	crypto_remove_spawns(alg, &list, NULL);
+
+	if (priority)
+		alg->cra_priority = nla_get_u32(priority);
+
+	up_write(&crypto_alg_sem);
+
+	crypto_mod_put(alg);
+	crypto_remove_final(&list);
+
+	return 0;
+}
+
+static int crypto_del_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct nlattr **attrs)
+{
+	struct crypto_alg *alg;
+	struct crypto_user_alg *p = nlmsg_data(nlh);
+	int err;
+
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
+	alg = crypto_alg_match(p, 1);
+	if (!alg)
+		return -ENOENT;
+
+	/* We can not unregister core algorithms such as aes-generic.
+	 * We would loose the reference in the crypto_alg_list to this algorithm
+	 * if we try to unregister. Unregistering such an algorithm without
+	 * removing the module is not possible, so we restrict to crypto
+	 * instances that are build from templates. */
+	err = -EINVAL;
+	if (!(alg->cra_flags & CRYPTO_ALG_INSTANCE))
+		goto drop_alg;
+
+	err = -EBUSY;
+	if (refcount_read(&alg->cra_refcnt) > 2)
+		goto drop_alg;
+
+	err = crypto_unregister_instance((struct crypto_instance *)alg);
+
+drop_alg:
+	crypto_mod_put(alg);
+	return err;
+}
+
+static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct nlattr **attrs)
+{
+	int exact = 0;
+	const char *name;
+	struct crypto_alg *alg;
+	struct crypto_user_alg *p = nlmsg_data(nlh);
+	struct nlattr *priority = attrs[CRYPTOCFGA_PRIORITY_VAL];
+
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
+	if (strlen(p->cru_driver_name))
+		exact = 1;
+
+	if (priority && !exact)
+		return -EINVAL;
+
+	alg = crypto_alg_match(p, exact);
+	if (alg) {
+		crypto_mod_put(alg);
+		return -EEXIST;
+	}
+
+	if (strlen(p->cru_driver_name))
+		name = p->cru_driver_name;
+	else
+		name = p->cru_name;
+
+	alg = crypto_alg_mod_lookup(name, p->cru_type, p->cru_mask);
+	if (IS_ERR(alg))
+		return PTR_ERR(alg);
+
+	down_write(&crypto_alg_sem);
+
+	if (priority)
+		alg->cra_priority = nla_get_u32(priority);
+
+	up_write(&crypto_alg_sem);
+
+	crypto_mod_put(alg);
+
+	return 0;
+}
+
+static int crypto_del_rng(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct nlattr **attrs)
+{
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+	return crypto_del_default_rng();
+}
+
+#define MSGSIZE(type) sizeof(struct type)
+
+static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
+	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
+	[CRYPTO_MSG_GETSTAT	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+};
+
+static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
+	[CRYPTOCFGA_PRIORITY_VAL]   = { .type = NLA_U32},
+};
+
+#undef MSGSIZE
+
+static const struct crypto_link {
+	int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **);
+	int (*dump)(struct sk_buff *, struct netlink_callback *);
+	int (*done)(struct netlink_callback *);
+} crypto_dispatch[CRYPTO_NR_MSGTYPES] = {
+	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = { .doit = crypto_add_alg},
+	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_alg},
+	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = { .doit = crypto_update_alg},
+	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = { .doit = crypto_report,
+						       .dump = crypto_dump_report,
+						       .done = crypto_dump_report_done},
+	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
+	[CRYPTO_MSG_GETSTAT	- CRYPTO_MSG_BASE] = { .doit = crypto_reportstat,
+						       .dump = crypto_dump_reportstat,
+						       .done = crypto_dump_reportstat_done},
+};
+
+static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
+			       struct netlink_ext_ack *extack)
+{
+	struct nlattr *attrs[CRYPTOCFGA_MAX+1];
+	const struct crypto_link *link;
+	int type, err;
+
+	type = nlh->nlmsg_type;
+	if (type > CRYPTO_MSG_MAX)
+		return -EINVAL;
+
+	type -= CRYPTO_MSG_BASE;
+	link = &crypto_dispatch[type];
+
+	if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) &&
+	    (nlh->nlmsg_flags & NLM_F_DUMP))) {
+		struct crypto_alg *alg;
+		u16 dump_alloc = 0;
+
+		if (link->dump == NULL)
+			return -EINVAL;
+
+		down_read(&crypto_alg_sem);
+		list_for_each_entry(alg, &crypto_alg_list, cra_list)
+			dump_alloc += CRYPTO_REPORT_MAXSIZE;
+
+		{
+			struct netlink_dump_control c = {
+				.dump = link->dump,
+				.done = link->done,
+				.min_dump_alloc = dump_alloc,
+			};
+			err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
+		}
+		up_read(&crypto_alg_sem);
+
+		return err;
+	}
+
+	err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX,
+			  crypto_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (link->doit == NULL)
+		return -EINVAL;
+
+	return link->doit(skb, nlh, attrs);
+}
+
+static void crypto_netlink_rcv(struct sk_buff *skb)
+{
+	mutex_lock(&crypto_cfg_mutex);
+	netlink_rcv_skb(skb, &crypto_user_rcv_msg);
+	mutex_unlock(&crypto_cfg_mutex);
+}
+
+static int __init crypto_user_init(void)
+{
+	struct netlink_kernel_cfg cfg = {
+		.input	= crypto_netlink_rcv,
+	};
+
+	crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO, &cfg);
+	if (!crypto_nlsk)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __exit crypto_user_exit(void)
+{
+	netlink_kernel_release(crypto_nlsk);
+}
+
+module_init(crypto_user_init);
+module_exit(crypto_user_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>");
+MODULE_DESCRIPTION("Crypto userspace configuration API");
+MODULE_ALIAS("net-pf-16-proto-21");
diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
new file mode 100644
index 000000000000..021ad06bbb62
--- /dev/null
+++ b/crypto/crypto_user_stat.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Crypto user configuration API.
+ *
+ * Copyright (C) 2017-2018 Corentin Labbe <clabbe@baylibre.com>
+ *
+ */
+
+#include <linux/crypto.h>
+#include <linux/cryptouser.h>
+#include <linux/sched.h>
+#include <net/netlink.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/internal/rng.h>
+#include <crypto/akcipher.h>
+#include <crypto/kpp.h>
+#include <crypto/internal/cryptouser.h>
+
+#include "internal.h"
+
+#define null_terminated(x)	(strnlen(x, sizeof(x)) < sizeof(x))
+
+static DEFINE_MUTEX(crypto_cfg_mutex);
+
+extern struct sock *crypto_nlsk;
+
+struct crypto_dump_info {
+	struct sk_buff *in_skb;
+	struct sk_buff *out_skb;
+	u32 nlmsg_seq;
+	u16 nlmsg_flags;
+};
+
+static int crypto_report_aead(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat raead;
+	u64 v64;
+	u32 v32;
+
+	strncpy(raead.type, "aead", sizeof(raead.type));
+
+	v32 = atomic_read(&alg->encrypt_cnt);
+	raead.stat_encrypt_cnt = v32;
+	v64 = atomic64_read(&alg->encrypt_tlen);
+	raead.stat_encrypt_tlen = v64;
+	v32 = atomic_read(&alg->decrypt_cnt);
+	raead.stat_decrypt_cnt = v32;
+	v64 = atomic64_read(&alg->decrypt_tlen);
+	raead.stat_decrypt_tlen = v64;
+	v32 = atomic_read(&alg->aead_err_cnt);
+	raead.stat_aead_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_AEAD,
+		    sizeof(struct crypto_stat), &raead))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_cipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rcipher;
+	u64 v64;
+	u32 v32;
+
+	strlcpy(rcipher.type, "cipher", sizeof(rcipher.type));
+
+	v32 = atomic_read(&alg->encrypt_cnt);
+	rcipher.stat_encrypt_cnt = v32;
+	v64 = atomic64_read(&alg->encrypt_tlen);
+	rcipher.stat_encrypt_tlen = v64;
+	v32 = atomic_read(&alg->decrypt_cnt);
+	rcipher.stat_decrypt_cnt = v32;
+	v64 = atomic64_read(&alg->decrypt_tlen);
+	rcipher.stat_decrypt_tlen = v64;
+	v32 = atomic_read(&alg->cipher_err_cnt);
+	rcipher.stat_cipher_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_CIPHER,
+		    sizeof(struct crypto_stat), &rcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_comp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rcomp;
+	u64 v64;
+	u32 v32;
+
+	strlcpy(rcomp.type, "compression", sizeof(rcomp.type));
+	v32 = atomic_read(&alg->compress_cnt);
+	rcomp.stat_compress_cnt = v32;
+	v64 = atomic64_read(&alg->compress_tlen);
+	rcomp.stat_compress_tlen = v64;
+	v32 = atomic_read(&alg->decompress_cnt);
+	rcomp.stat_decompress_cnt = v32;
+	v64 = atomic64_read(&alg->decompress_tlen);
+	rcomp.stat_decompress_tlen = v64;
+	v32 = atomic_read(&alg->cipher_err_cnt);
+	rcomp.stat_compress_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_COMPRESS,
+		    sizeof(struct crypto_stat), &rcomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_acomp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat racomp;
+	u64 v64;
+	u32 v32;
+
+	strlcpy(racomp.type, "acomp", sizeof(racomp.type));
+	v32 = atomic_read(&alg->compress_cnt);
+	racomp.stat_compress_cnt = v32;
+	v64 = atomic64_read(&alg->compress_tlen);
+	racomp.stat_compress_tlen = v64;
+	v32 = atomic_read(&alg->decompress_cnt);
+	racomp.stat_decompress_cnt = v32;
+	v64 = atomic64_read(&alg->decompress_tlen);
+	racomp.stat_decompress_tlen = v64;
+	v32 = atomic_read(&alg->cipher_err_cnt);
+	racomp.stat_compress_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_ACOMP,
+		    sizeof(struct crypto_stat), &racomp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_akcipher(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rakcipher;
+	u64 v64;
+	u32 v32;
+
+	strncpy(rakcipher.type, "akcipher", sizeof(rakcipher.type));
+	v32 = atomic_read(&alg->encrypt_cnt);
+	rakcipher.stat_encrypt_cnt = v32;
+	v64 = atomic64_read(&alg->encrypt_tlen);
+	rakcipher.stat_encrypt_tlen = v64;
+	v32 = atomic_read(&alg->decrypt_cnt);
+	rakcipher.stat_decrypt_cnt = v32;
+	v64 = atomic64_read(&alg->decrypt_tlen);
+	rakcipher.stat_decrypt_tlen = v64;
+	v32 = atomic_read(&alg->sign_cnt);
+	rakcipher.stat_sign_cnt = v32;
+	v32 = atomic_read(&alg->verify_cnt);
+	rakcipher.stat_verify_cnt = v32;
+	v32 = atomic_read(&alg->akcipher_err_cnt);
+	rakcipher.stat_akcipher_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_AKCIPHER,
+		    sizeof(struct crypto_stat), &rakcipher))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_kpp(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rkpp;
+	u32 v;
+
+	strlcpy(rkpp.type, "kpp", sizeof(rkpp.type));
+
+	v = atomic_read(&alg->setsecret_cnt);
+	rkpp.stat_setsecret_cnt = v;
+	v = atomic_read(&alg->generate_public_key_cnt);
+	rkpp.stat_generate_public_key_cnt = v;
+	v = atomic_read(&alg->compute_shared_secret_cnt);
+	rkpp.stat_compute_shared_secret_cnt = v;
+	v = atomic_read(&alg->kpp_err_cnt);
+	rkpp.stat_kpp_err_cnt = v;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_KPP,
+		    sizeof(struct crypto_stat), &rkpp))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_ahash(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rhash;
+	u64 v64;
+	u32 v32;
+
+	strncpy(rhash.type, "ahash", sizeof(rhash.type));
+
+	v32 = atomic_read(&alg->hash_cnt);
+	rhash.stat_hash_cnt = v32;
+	v64 = atomic64_read(&alg->hash_tlen);
+	rhash.stat_hash_tlen = v64;
+	v32 = atomic_read(&alg->hash_err_cnt);
+	rhash.stat_hash_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
+		    sizeof(struct crypto_stat), &rhash))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_shash(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rhash;
+	u64 v64;
+	u32 v32;
+
+	strncpy(rhash.type, "shash", sizeof(rhash.type));
+
+	v32 = atomic_read(&alg->hash_cnt);
+	rhash.stat_hash_cnt = v32;
+	v64 = atomic64_read(&alg->hash_tlen);
+	rhash.stat_hash_tlen = v64;
+	v32 = atomic_read(&alg->hash_err_cnt);
+	rhash.stat_hash_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_HASH,
+		    sizeof(struct crypto_stat), &rhash))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_report_rng(struct sk_buff *skb, struct crypto_alg *alg)
+{
+	struct crypto_stat rrng;
+	u64 v64;
+	u32 v32;
+
+	strncpy(rrng.type, "rng", sizeof(rrng.type));
+
+	v32 = atomic_read(&alg->generate_cnt);
+	rrng.stat_generate_cnt = v32;
+	v64 = atomic64_read(&alg->generate_tlen);
+	rrng.stat_generate_tlen = v64;
+	v32 = atomic_read(&alg->seed_cnt);
+	rrng.stat_seed_cnt = v32;
+	v32 = atomic_read(&alg->hash_err_cnt);
+	rrng.stat_rng_err_cnt = v32;
+
+	if (nla_put(skb, CRYPTOCFGA_STAT_RNG,
+		    sizeof(struct crypto_stat), &rrng))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_reportstat_one(struct crypto_alg *alg,
+				 struct crypto_user_alg *ualg,
+				 struct sk_buff *skb)
+{
+	strlcpy(ualg->cru_name, alg->cra_name, sizeof(ualg->cru_name));
+	strlcpy(ualg->cru_driver_name, alg->cra_driver_name,
+		sizeof(ualg->cru_driver_name));
+	strlcpy(ualg->cru_module_name, module_name(alg->cra_module),
+		sizeof(ualg->cru_module_name));
+
+	ualg->cru_type = 0;
+	ualg->cru_mask = 0;
+	ualg->cru_flags = alg->cra_flags;
+	ualg->cru_refcnt = refcount_read(&alg->cra_refcnt);
+
+	if (nla_put_u32(skb, CRYPTOCFGA_PRIORITY_VAL, alg->cra_priority))
+		goto nla_put_failure;
+	if (alg->cra_flags & CRYPTO_ALG_LARVAL) {
+		struct crypto_stat rl;
+
+		strlcpy(rl.type, "larval", sizeof(rl.type));
+		if (nla_put(skb, CRYPTOCFGA_STAT_LARVAL,
+			    sizeof(struct crypto_stat), &rl))
+			goto nla_put_failure;
+		goto out;
+	}
+
+	switch (alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL)) {
+	case CRYPTO_ALG_TYPE_AEAD:
+		if (crypto_report_aead(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_SKCIPHER:
+		if (crypto_report_cipher(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_BLKCIPHER:
+		if (crypto_report_cipher(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_CIPHER:
+		if (crypto_report_cipher(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_COMPRESS:
+		if (crypto_report_comp(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_ACOMPRESS:
+		if (crypto_report_acomp(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_SCOMPRESS:
+		if (crypto_report_acomp(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_AKCIPHER:
+		if (crypto_report_akcipher(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_KPP:
+		if (crypto_report_kpp(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_AHASH:
+		if (crypto_report_ahash(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_HASH:
+		if (crypto_report_shash(skb, alg))
+			goto nla_put_failure;
+		break;
+	case CRYPTO_ALG_TYPE_RNG:
+		if (crypto_report_rng(skb, alg))
+			goto nla_put_failure;
+		break;
+	default:
+		pr_err("ERROR: Unhandled alg %d in %s\n",
+		       alg->cra_flags & (CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_LARVAL),
+		       __func__);
+	}
+
+out:
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int crypto_reportstat_alg(struct crypto_alg *alg,
+				 struct crypto_dump_info *info)
+{
+	struct sk_buff *in_skb = info->in_skb;
+	struct sk_buff *skb = info->out_skb;
+	struct nlmsghdr *nlh;
+	struct crypto_user_alg *ualg;
+	int err = 0;
+
+	nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq,
+			CRYPTO_MSG_GETSTAT, sizeof(*ualg), info->nlmsg_flags);
+	if (!nlh) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	ualg = nlmsg_data(nlh);
+
+	err = crypto_reportstat_one(alg, ualg, skb);
+	if (err) {
+		nlmsg_cancel(skb, nlh);
+		goto out;
+	}
+
+	nlmsg_end(skb, nlh);
+
+out:
+	return err;
+}
+
+int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
+		      struct nlattr **attrs)
+{
+	struct crypto_user_alg *p = nlmsg_data(in_nlh);
+	struct crypto_alg *alg;
+	struct sk_buff *skb;
+	struct crypto_dump_info info;
+	int err;
+
+	if (!null_terminated(p->cru_name) || !null_terminated(p->cru_driver_name))
+		return -EINVAL;
+
+	alg = crypto_alg_match(p, 0);
+	if (!alg)
+		return -ENOENT;
+
+	err = -ENOMEM;
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		goto drop_alg;
+
+	info.in_skb = in_skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = in_nlh->nlmsg_seq;
+	info.nlmsg_flags = 0;
+
+	err = crypto_reportstat_alg(alg, &info);
+
+drop_alg:
+	crypto_mod_put(alg);
+
+	if (err)
+		return err;
+
+	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
+}
+
+int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct crypto_alg *alg;
+	struct crypto_dump_info info;
+	int err;
+
+	if (cb->args[0])
+		goto out;
+
+	cb->args[0] = 1;
+
+	info.in_skb = cb->skb;
+	info.out_skb = skb;
+	info.nlmsg_seq = cb->nlh->nlmsg_seq;
+	info.nlmsg_flags = NLM_F_MULTI;
+
+	list_for_each_entry(alg, &crypto_alg_list, cra_list) {
+		err = crypto_reportstat_alg(alg, &info);
+		if (err)
+			goto out_err;
+	}
+
+out:
+	return skb->len;
+out_err:
+	return err;
+}
+
+int crypto_dump_reportstat_done(struct netlink_callback *cb)
+{
+	return 0;
+}
+
+MODULE_LICENSE("GPL");
diff --git a/crypto/rng.c b/crypto/rng.c
index b4a618668161..547f16ecbfb0 100644
--- a/crypto/rng.c
+++ b/crypto/rng.c
@@ -50,6 +50,7 @@ int crypto_rng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen)
 	}
 
 	err = crypto_rng_alg(tfm)->seed(tfm, seed, slen);
+	crypto_stat_rng_seed(tfm, err);
 out:
 	kzfree(buf);
 	return err;
diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index e328b52425a8..22e6f412c595 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -234,6 +234,34 @@ static inline void acomp_request_set_params(struct acomp_req *req,
 		req->flags |= CRYPTO_ACOMP_ALLOC_OUTPUT;
 }
 
+static inline void crypto_stat_compress(struct acomp_req *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->compress_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->compress_cnt);
+		atomic64_add(req->slen, &tfm->base.__crt_alg->compress_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_decompress(struct acomp_req *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->compress_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->decompress_cnt);
+		atomic64_add(req->slen, &tfm->base.__crt_alg->decompress_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_acomp_compress() -- Invoke asynchronous compress operation
  *
@@ -246,8 +274,11 @@ static inline void acomp_request_set_params(struct acomp_req *req,
 static inline int crypto_acomp_compress(struct acomp_req *req)
 {
 	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	int ret;
 
-	return tfm->compress(req);
+	ret = tfm->compress(req);
+	crypto_stat_compress(req, ret);
+	return ret;
 }
 
 /**
@@ -262,8 +293,11 @@ static inline int crypto_acomp_compress(struct acomp_req *req)
 static inline int crypto_acomp_decompress(struct acomp_req *req)
 {
 	struct crypto_acomp *tfm = crypto_acomp_reqtfm(req);
+	int ret;
 
-	return tfm->decompress(req);
+	ret = tfm->decompress(req);
+	crypto_stat_decompress(req, ret);
+	return ret;
 }
 
 #endif
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index 1e26f790b03f..0d765d7bfb82 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -306,6 +306,34 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
 	return __crypto_aead_cast(req->base.tfm);
 }
 
+static inline void crypto_stat_aead_encrypt(struct aead_request *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->aead_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->encrypt_cnt);
+		atomic64_add(req->cryptlen, &tfm->base.__crt_alg->encrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_aead_decrypt(struct aead_request *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->aead_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->decrypt_cnt);
+		atomic64_add(req->cryptlen, &tfm->base.__crt_alg->decrypt_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_aead_encrypt() - encrypt plaintext
  * @req: reference to the aead_request handle that holds all information
@@ -328,11 +356,14 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
 static inline int crypto_aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	int ret;
 
 	if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	return crypto_aead_alg(aead)->encrypt(req);
+		ret = -ENOKEY;
+	else
+		ret = crypto_aead_alg(aead)->encrypt(req);
+	crypto_stat_aead_encrypt(req, ret);
+	return ret;
 }
 
 /**
@@ -360,14 +391,16 @@ static inline int crypto_aead_encrypt(struct aead_request *req)
 static inline int crypto_aead_decrypt(struct aead_request *req)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	int ret;
 
 	if (crypto_aead_get_flags(aead) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	if (req->cryptlen < crypto_aead_authsize(aead))
-		return -EINVAL;
-
-	return crypto_aead_alg(aead)->decrypt(req);
+		ret = -ENOKEY;
+	else if (req->cryptlen < crypto_aead_authsize(aead))
+		ret = -EINVAL;
+	else
+		ret = crypto_aead_alg(aead)->decrypt(req);
+	crypto_stat_aead_decrypt(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index b5e11de4d497..afac71119396 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -271,6 +271,62 @@ static inline unsigned int crypto_akcipher_maxsize(struct crypto_akcipher *tfm)
 	return alg->max_size(tfm);
 }
 
+static inline void crypto_stat_akcipher_encrypt(struct akcipher_request *req,
+						int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->encrypt_cnt);
+		atomic64_add(req->src_len, &tfm->base.__crt_alg->encrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_akcipher_decrypt(struct akcipher_request *req,
+						int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->decrypt_cnt);
+		atomic64_add(req->src_len, &tfm->base.__crt_alg->decrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_akcipher_sign(struct akcipher_request *req,
+					     int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+		atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->sign_cnt);
+#endif
+}
+
+static inline void crypto_stat_akcipher_verify(struct akcipher_request *req,
+					       int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+		atomic_inc(&tfm->base.__crt_alg->akcipher_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->verify_cnt);
+#endif
+}
+
 /**
  * crypto_akcipher_encrypt() - Invoke public key encrypt operation
  *
@@ -285,8 +341,11 @@ static inline int crypto_akcipher_encrypt(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+	int ret;
 
-	return alg->encrypt(req);
+	ret = alg->encrypt(req);
+	crypto_stat_akcipher_encrypt(req, ret);
+	return ret;
 }
 
 /**
@@ -303,8 +362,11 @@ static inline int crypto_akcipher_decrypt(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+	int ret;
 
-	return alg->decrypt(req);
+	ret = alg->decrypt(req);
+	crypto_stat_akcipher_decrypt(req, ret);
+	return ret;
 }
 
 /**
@@ -321,8 +383,11 @@ static inline int crypto_akcipher_sign(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+	int ret;
 
-	return alg->sign(req);
+	ret = alg->sign(req);
+	crypto_stat_akcipher_sign(req, ret);
+	return ret;
 }
 
 /**
@@ -339,8 +404,11 @@ static inline int crypto_akcipher_verify(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct akcipher_alg *alg = crypto_akcipher_alg(tfm);
+	int ret;
 
-	return alg->verify(req);
+	ret = alg->verify(req);
+	crypto_stat_akcipher_verify(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index 21587011ab0f..bc7796600338 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -412,6 +412,32 @@ static inline void *ahash_request_ctx(struct ahash_request *req)
 int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key,
 			unsigned int keylen);
 
+static inline void crypto_stat_ahash_update(struct ahash_request *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+		atomic_inc(&tfm->base.__crt_alg->hash_err_cnt);
+	else
+		atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen);
+#endif
+}
+
+static inline void crypto_stat_ahash_final(struct ahash_request *req, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->hash_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->hash_cnt);
+		atomic64_add(req->nbytes, &tfm->base.__crt_alg->hash_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_ahash_finup() - update and finalize message digest
  * @req: reference to the ahash_request handle that holds all information
@@ -526,7 +552,11 @@ static inline int crypto_ahash_init(struct ahash_request *req)
  */
 static inline int crypto_ahash_update(struct ahash_request *req)
 {
-	return crypto_ahash_reqtfm(req)->update(req);
+	int ret;
+
+	ret = crypto_ahash_reqtfm(req)->update(req);
+	crypto_stat_ahash_update(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h
new file mode 100644
index 000000000000..8db299c25566
--- /dev/null
+++ b/include/crypto/internal/cryptouser.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <net/netlink.h>
+
+struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact);
+
+int crypto_dump_reportstat(struct sk_buff *skb, struct netlink_callback *cb);
+int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs);
+int crypto_dump_reportstat_done(struct netlink_callback *cb);
diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h
index 1bde0a6514fa..f517ba6d3a27 100644
--- a/include/crypto/kpp.h
+++ b/include/crypto/kpp.h
@@ -268,6 +268,42 @@ struct kpp_secret {
 	unsigned short len;
 };
 
+static inline void crypto_stat_kpp_set_secret(struct crypto_kpp *tfm, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret)
+		atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->setsecret_cnt);
+#endif
+}
+
+static inline void crypto_stat_kpp_generate_public_key(struct kpp_request *req,
+						       int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+
+	if (ret)
+		atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->generate_public_key_cnt);
+#endif
+}
+
+static inline void crypto_stat_kpp_compute_shared_secret(struct kpp_request *req,
+							 int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+
+	if (ret)
+		atomic_inc(&tfm->base.__crt_alg->kpp_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->compute_shared_secret_cnt);
+#endif
+}
+
 /**
  * crypto_kpp_set_secret() - Invoke kpp operation
  *
@@ -287,8 +323,11 @@ static inline int crypto_kpp_set_secret(struct crypto_kpp *tfm,
 					const void *buffer, unsigned int len)
 {
 	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+	int ret;
 
-	return alg->set_secret(tfm, buffer, len);
+	ret = alg->set_secret(tfm, buffer, len);
+	crypto_stat_kpp_set_secret(tfm, ret);
+	return ret;
 }
 
 /**
@@ -308,8 +347,11 @@ static inline int crypto_kpp_generate_public_key(struct kpp_request *req)
 {
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+	int ret;
 
-	return alg->generate_public_key(req);
+	ret = alg->generate_public_key(req);
+	crypto_stat_kpp_generate_public_key(req, ret);
+	return ret;
 }
 
 /**
@@ -326,8 +368,11 @@ static inline int crypto_kpp_compute_shared_secret(struct kpp_request *req)
 {
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct kpp_alg *alg = crypto_kpp_alg(tfm);
+	int ret;
 
-	return alg->compute_shared_secret(req);
+	ret = alg->compute_shared_secret(req);
+	crypto_stat_kpp_compute_shared_secret(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/rng.h b/include/crypto/rng.h
index b95ede354a66..6d258f5b68f1 100644
--- a/include/crypto/rng.h
+++ b/include/crypto/rng.h
@@ -122,6 +122,29 @@ static inline void crypto_free_rng(struct crypto_rng *tfm)
 	crypto_destroy_tfm(tfm, crypto_rng_tfm(tfm));
 }
 
+static inline void crypto_stat_rng_seed(struct crypto_rng *tfm, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY)
+		atomic_inc(&tfm->base.__crt_alg->rng_err_cnt);
+	else
+		atomic_inc(&tfm->base.__crt_alg->seed_cnt);
+#endif
+}
+
+static inline void crypto_stat_rng_generate(struct crypto_rng *tfm,
+					    unsigned int dlen, int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&tfm->base.__crt_alg->rng_err_cnt);
+	} else {
+		atomic_inc(&tfm->base.__crt_alg->generate_cnt);
+		atomic64_add(dlen, &tfm->base.__crt_alg->generate_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_rng_generate() - get random number
  * @tfm: cipher handle
@@ -140,7 +163,11 @@ static inline int crypto_rng_generate(struct crypto_rng *tfm,
 				      const u8 *src, unsigned int slen,
 				      u8 *dst, unsigned int dlen)
 {
-	return crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen);
+	int ret;
+
+	ret = crypto_rng_alg(tfm)->generate(tfm, src, slen, dst, dlen);
+	crypto_stat_rng_generate(tfm, dlen, ret);
+	return ret;
 }
 
 /**
diff --git a/include/crypto/skcipher.h b/include/crypto/skcipher.h
index 45ae894fda32..925f547cdcfa 100644
--- a/include/crypto/skcipher.h
+++ b/include/crypto/skcipher.h
@@ -486,6 +486,32 @@ static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm(
 	return container_of(tfm, struct crypto_sync_skcipher, base);
 }
 
+static inline void crypto_stat_skcipher_encrypt(struct skcipher_request *req,
+						int ret, struct crypto_alg *alg)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&alg->cipher_err_cnt);
+	} else {
+		atomic_inc(&alg->encrypt_cnt);
+		atomic64_add(req->cryptlen, &alg->encrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_skcipher_decrypt(struct skcipher_request *req,
+						int ret, struct crypto_alg *alg)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&alg->cipher_err_cnt);
+	} else {
+		atomic_inc(&alg->decrypt_cnt);
+		atomic64_add(req->cryptlen, &alg->decrypt_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_skcipher_encrypt() - encrypt plaintext
  * @req: reference to the skcipher_request handle that holds all information
@@ -500,11 +526,14 @@ static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm(
 static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	int ret;
 
 	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	return tfm->encrypt(req);
+		ret = -ENOKEY;
+	else
+		ret = tfm->encrypt(req);
+	crypto_stat_skcipher_encrypt(req, ret, tfm->base.__crt_alg);
+	return ret;
 }
 
 /**
@@ -521,11 +550,14 @@ static inline int crypto_skcipher_encrypt(struct skcipher_request *req)
 static inline int crypto_skcipher_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	int ret;
 
 	if (crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_NEED_KEY)
-		return -ENOKEY;
-
-	return tfm->decrypt(req);
+		ret = -ENOKEY;
+	else
+		ret = tfm->decrypt(req);
+	crypto_stat_skcipher_decrypt(req, ret, tfm->base.__crt_alg);
+	return ret;
 }
 
 /**
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index e8839d3a7559..3634ad6fe202 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -454,6 +454,33 @@ struct compress_alg {
  * @cra_refcnt: internally used
  * @cra_destroy: internally used
  *
+ * All following statistics are for this crypto_alg
+ * @encrypt_cnt:	number of encrypt requests
+ * @decrypt_cnt:	number of decrypt requests
+ * @compress_cnt:	number of compress requests
+ * @decompress_cnt:	number of decompress requests
+ * @generate_cnt:	number of RNG generate requests
+ * @seed_cnt:		number of times the rng was seeded
+ * @hash_cnt:		number of hash requests
+ * @sign_cnt:		number of sign requests
+ * @setsecret_cnt:	number of setsecrey operation
+ * @generate_public_key_cnt:	number of generate_public_key operation
+ * @verify_cnt:			number of verify operation
+ * @compute_shared_secret_cnt:	number of compute_shared_secret operation
+ * @encrypt_tlen:	total data size handled by encrypt requests
+ * @decrypt_tlen:	total data size handled by decrypt requests
+ * @compress_tlen:	total data size handled by compress requests
+ * @decompress_tlen:	total data size handled by decompress requests
+ * @generate_tlen:	total data size of generated data by the RNG
+ * @hash_tlen:		total data size hashed
+ * @akcipher_err_cnt:	number of error for akcipher requests
+ * @cipher_err_cnt:	number of error for akcipher requests
+ * @compress_err_cnt:	number of error for akcipher requests
+ * @aead_err_cnt:	number of error for akcipher requests
+ * @hash_err_cnt:	number of error for akcipher requests
+ * @rng_err_cnt:	number of error for akcipher requests
+ * @kpp_err_cnt:	number of error for akcipher requests
+ *
  * The struct crypto_alg describes a generic Crypto API algorithm and is common
  * for all of the transformations. Any variable not documented here shall not
  * be used by a cipher implementation as it is internal to the Crypto API.
@@ -487,6 +514,45 @@ struct crypto_alg {
 	void (*cra_destroy)(struct crypto_alg *alg);
 	
 	struct module *cra_module;
+
+	union {
+		atomic_t encrypt_cnt;
+		atomic_t compress_cnt;
+		atomic_t generate_cnt;
+		atomic_t hash_cnt;
+		atomic_t setsecret_cnt;
+	};
+	union {
+		atomic64_t encrypt_tlen;
+		atomic64_t compress_tlen;
+		atomic64_t generate_tlen;
+		atomic64_t hash_tlen;
+	};
+	union {
+		atomic_t akcipher_err_cnt;
+		atomic_t cipher_err_cnt;
+		atomic_t compress_err_cnt;
+		atomic_t aead_err_cnt;
+		atomic_t hash_err_cnt;
+		atomic_t rng_err_cnt;
+		atomic_t kpp_err_cnt;
+	};
+	union {
+		atomic_t decrypt_cnt;
+		atomic_t decompress_cnt;
+		atomic_t seed_cnt;
+		atomic_t generate_public_key_cnt;
+	};
+	union {
+		atomic64_t decrypt_tlen;
+		atomic64_t decompress_tlen;
+	};
+	union {
+		atomic_t verify_cnt;
+		atomic_t compute_shared_secret_cnt;
+	};
+	atomic_t sign_cnt;
+
 } CRYPTO_MINALIGN_ATTR;
 
 /*
@@ -907,6 +973,38 @@ static inline struct crypto_ablkcipher *crypto_ablkcipher_reqtfm(
 	return __crypto_ablkcipher_cast(req->base.tfm);
 }
 
+static inline void crypto_stat_ablkcipher_encrypt(struct ablkcipher_request *req,
+						  int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct ablkcipher_tfm *crt =
+		crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req));
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&crt->base->base.__crt_alg->cipher_err_cnt);
+	} else {
+		atomic_inc(&crt->base->base.__crt_alg->encrypt_cnt);
+		atomic64_add(req->nbytes, &crt->base->base.__crt_alg->encrypt_tlen);
+	}
+#endif
+}
+
+static inline void crypto_stat_ablkcipher_decrypt(struct ablkcipher_request *req,
+						  int ret)
+{
+#ifdef CONFIG_CRYPTO_STATS
+	struct ablkcipher_tfm *crt =
+		crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req));
+
+	if (ret && ret != -EINPROGRESS && ret != -EBUSY) {
+		atomic_inc(&crt->base->base.__crt_alg->cipher_err_cnt);
+	} else {
+		atomic_inc(&crt->base->base.__crt_alg->decrypt_cnt);
+		atomic64_add(req->nbytes, &crt->base->base.__crt_alg->decrypt_tlen);
+	}
+#endif
+}
+
 /**
  * crypto_ablkcipher_encrypt() - encrypt plaintext
  * @req: reference to the ablkcipher_request handle that holds all information
@@ -922,7 +1020,11 @@ static inline int crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
 {
 	struct ablkcipher_tfm *crt =
 		crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req));
-	return crt->encrypt(req);
+	int ret;
+
+	ret = crt->encrypt(req);
+	crypto_stat_ablkcipher_encrypt(req, ret);
+	return ret;
 }
 
 /**
@@ -940,7 +1042,11 @@ static inline int crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
 {
 	struct ablkcipher_tfm *crt =
 		crypto_ablkcipher_crt(crypto_ablkcipher_reqtfm(req));
-	return crt->decrypt(req);
+	int ret;
+
+	ret = crt->decrypt(req);
+	crypto_stat_ablkcipher_decrypt(req, ret);
+	return ret;
 }
 
 /**
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 19bf0ca6d635..6dafbc3e4414 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -29,6 +29,7 @@ enum {
 	CRYPTO_MSG_UPDATEALG,
 	CRYPTO_MSG_GETALG,
 	CRYPTO_MSG_DELRNG,
+	CRYPTO_MSG_GETSTAT,
 	__CRYPTO_MSG_MAX
 };
 #define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
@@ -50,6 +51,16 @@ enum crypto_attr_type_t {
 	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
 	CRYPTOCFGA_REPORT_KPP,		/* struct crypto_report_kpp */
 	CRYPTOCFGA_REPORT_ACOMP,	/* struct crypto_report_acomp */
+	CRYPTOCFGA_STAT_LARVAL,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_HASH,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_BLKCIPHER,	/* struct crypto_stat */
+	CRYPTOCFGA_STAT_AEAD,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_COMPRESS,	/* struct crypto_stat */
+	CRYPTOCFGA_STAT_RNG,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_CIPHER,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_AKCIPHER,	/* struct crypto_stat */
+	CRYPTOCFGA_STAT_KPP,		/* struct crypto_stat */
+	CRYPTOCFGA_STAT_ACOMP,		/* struct crypto_stat */
 	__CRYPTOCFGA_MAX
 
 #define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
@@ -65,6 +76,47 @@ struct crypto_user_alg {
 	__u32 cru_flags;
 };
 
+struct crypto_stat {
+	char type[CRYPTO_MAX_NAME];
+	union {
+		__u32 stat_encrypt_cnt;
+		__u32 stat_compress_cnt;
+		__u32 stat_generate_cnt;
+		__u32 stat_hash_cnt;
+		__u32 stat_setsecret_cnt;
+	};
+	union {
+		__u64 stat_encrypt_tlen;
+		__u64 stat_compress_tlen;
+		__u64 stat_generate_tlen;
+		__u64 stat_hash_tlen;
+	};
+	union {
+		__u32 stat_akcipher_err_cnt;
+		__u32 stat_cipher_err_cnt;
+		__u32 stat_compress_err_cnt;
+		__u32 stat_aead_err_cnt;
+		__u32 stat_hash_err_cnt;
+		__u32 stat_rng_err_cnt;
+		__u32 stat_kpp_err_cnt;
+	};
+	union {
+		__u32 stat_decrypt_cnt;
+		__u32 stat_decompress_cnt;
+		__u32 stat_seed_cnt;
+		__u32 stat_generate_public_key_cnt;
+	};
+	union {
+		__u64 stat_decrypt_tlen;
+		__u64 stat_decompress_tlen;
+	};
+	union {
+		__u32 stat_verify_cnt;
+		__u32 stat_compute_shared_secret_cnt;
+	};
+	__u32 stat_sign_cnt;
+};
+
 struct crypto_report_larval {
 	char type[CRYPTO_MAX_NAME];
 };
-- 
cgit v1.2.3


From eb2bccb70b979d996ecb769d692b92ff12eabbb7 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Wed, 19 Sep 2018 03:13:21 +0200
Subject: rtc: move rtc_add_group/s definitions

Move rtc_add_group and rtc_add_groups definition to rtc.h that is available
for all RTC drivers.

Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-core.h | 14 --------------
 include/linux/rtc.h    | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-core.h b/drivers/rtc/rtc-core.h
index ccc17a2e293d..0abf98983e13 100644
--- a/drivers/rtc/rtc-core.h
+++ b/drivers/rtc/rtc-core.h
@@ -40,23 +40,9 @@ static inline void rtc_proc_del_device(struct rtc_device *rtc)
 
 #ifdef CONFIG_RTC_INTF_SYSFS
 const struct attribute_group **rtc_get_dev_attribute_groups(void);
-int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp);
-int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps);
 #else
 static inline const struct attribute_group **rtc_get_dev_attribute_groups(void)
 {
 	return NULL;
 }
-
-static inline
-int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp)
-{
-	return 0;
-}
-
-static inline
-int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps)
-{
-	return 0;
-}
 #endif
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index faf00a1472d4..c8bb4a2b48c3 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -272,4 +272,20 @@ static inline int rtc_nvmem_register(struct rtc_device *rtc,
 static inline void rtc_nvmem_unregister(struct rtc_device *rtc) {}
 #endif
 
+#ifdef CONFIG_RTC_INTF_SYSFS
+int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp);
+int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps);
+#else
+static inline
+int rtc_add_group(struct rtc_device *rtc, const struct attribute_group *grp)
+{
+	return 0;
+}
+
+static inline
+int rtc_add_groups(struct rtc_device *rtc, const struct attribute_group **grps)
+{
+	return 0;
+}
+#endif
 #endif /* _LINUX_RTC_H_ */
-- 
cgit v1.2.3


From 421c119f558761556afca6a62ad183bc2d8659e0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 24 Sep 2018 14:10:04 +0200
Subject: netfilter: avoid erronous array bounds warning

Unfortunately some versions of gcc emit following warning:
  $ make net/xfrm/xfrm_output.o
  linux/compiler.h:252:20: warning: array subscript is above array bounds [-Warray-bounds]
  hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
                            ^~~~~~~~~~~~~~~~~~~~~
xfrm_output_resume passes skb_dst(skb)->ops->family as its 'pf' arg so compiler
can't know that we'll never access hooks_arp[].
(NFPROTO_IPV4 or NFPROTO_IPV6 are only possible cases).

Avoid this by adding an explicit WARN_ON_ONCE() check.

This patch has no effect if the family is a compile-time constant as gcc
will remove the switch() construct entirely.

Reported-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 07efffd0c759..bbe99d2b28b4 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -215,6 +215,8 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 		break;
 	case NFPROTO_ARP:
 #ifdef CONFIG_NETFILTER_FAMILY_ARP
+		if (WARN_ON_ONCE(hook >= ARRAY_SIZE(net->nf.hooks_arp)))
+			break;
 		hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
 #endif
 		break;
-- 
cgit v1.2.3


From d7b9fd1669d4a4f38281c4e29f1408e1bdce09b3 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 21 Sep 2018 06:40:03 -0700
Subject: nvmem: provide nvmem_dev_name()

Kernel users don't have any means of checking the names of nvmem
devices. Add a routine that returns the name of the nvmem provider.

This will be useful for future nvmem notifier subscribers - otherwise
they can't check what device is being added/removed.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 13 +++++++++++++
 include/linux/nvmem-consumer.h |  8 ++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 6f064002f439..11afa3b6d551 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -1343,6 +1343,19 @@ int nvmem_device_write(struct nvmem_device *nvmem,
 }
 EXPORT_SYMBOL_GPL(nvmem_device_write);
 
+/**
+ * nvmem_dev_name() - Get the name of a given nvmem device.
+ *
+ * @nvmem: nvmem device.
+ *
+ * Return: name of the nvmem device.
+ */
+const char *nvmem_dev_name(struct nvmem_device *nvmem)
+{
+	return dev_name(&nvmem->dev);
+}
+EXPORT_SYMBOL_GPL(nvmem_dev_name);
+
 static int __init nvmem_init(void)
 {
 	return bus_register(&nvmem_bus_type);
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 4e85447f7860..0389fe00b177 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -55,6 +55,8 @@ ssize_t nvmem_device_cell_read(struct nvmem_device *nvmem,
 int nvmem_device_cell_write(struct nvmem_device *nvmem,
 			    struct nvmem_cell_info *info, void *buf);
 
+const char *nvmem_dev_name(struct nvmem_device *nvmem);
+
 #else
 
 static inline struct nvmem_cell *nvmem_cell_get(struct device *dev,
@@ -143,6 +145,12 @@ static inline int nvmem_device_write(struct nvmem_device *nvmem,
 {
 	return -ENOSYS;
 }
+
+static inline const char *nvmem_dev_name(struct nvmem_device *nvmem)
+{
+	return NULL;
+}
+
 #endif /* CONFIG_NVMEM */
 
 #if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF)
-- 
cgit v1.2.3


From bf58e8820c48805394ec9e76339f0c4646050432 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 21 Sep 2018 06:40:13 -0700
Subject: nvmem: change the signature of nvmem_unregister()

We switched the nvmem framework to using kref instead of manually
checking the current number of users in nvmem_unregister() so this
function can no longer fail. We also converted all remaining users
that still checked the return value of nvmem_unregister() to using
devm_nvmem_register(). Make the routine return void.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 8 ++------
 include/linux/nvmem-provider.h | 9 +++------
 2 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 920b56e500cc..bd14d04782cd 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -553,20 +553,16 @@ static void nvmem_device_release(struct kref *kref)
  * nvmem_unregister() - Unregister previously registered nvmem device
  *
  * @nvmem: Pointer to previously registered nvmem device.
- *
- * Return: Will be an negative on error or a zero on success.
  */
-int nvmem_unregister(struct nvmem_device *nvmem)
+void nvmem_unregister(struct nvmem_device *nvmem)
 {
 	kref_put(&nvmem->refcnt, nvmem_device_release);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(nvmem_unregister);
 
 static void devm_nvmem_release(struct device *dev, void *res)
 {
-	WARN_ON(nvmem_unregister(*(struct nvmem_device **)res));
+	nvmem_unregister(*(struct nvmem_device **)res);
 }
 
 /**
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 24def6ad09bb..0f357d0c1e75 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -70,7 +70,7 @@ struct nvmem_config {
 #if IS_ENABLED(CONFIG_NVMEM)
 
 struct nvmem_device *nvmem_register(const struct nvmem_config *cfg);
-int nvmem_unregister(struct nvmem_device *nvmem);
+void nvmem_unregister(struct nvmem_device *nvmem);
 
 struct nvmem_device *devm_nvmem_register(struct device *dev,
 					 const struct nvmem_config *cfg);
@@ -87,10 +87,7 @@ static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c)
 	return ERR_PTR(-ENOSYS);
 }
 
-static inline int nvmem_unregister(struct nvmem_device *nvmem)
-{
-	return -ENOSYS;
-}
+static inline void nvmem_unregister(struct nvmem_device *nvmem) {}
 
 static inline struct nvmem_device *
 devm_nvmem_register(struct device *dev, const struct nvmem_config *c)
@@ -101,7 +98,7 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c)
 static inline int
 devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem)
 {
-	return nvmem_unregister(nvmem);
+	return -ENOSYS;
 
 }
 
-- 
cgit v1.2.3


From b985f4cba6dbb3b60ad119c6f7e5cbad6f0d7e45 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 21 Sep 2018 06:40:15 -0700
Subject: nvmem: add support for cell info

Add new structs and routines allowing users to define nvmem cells from
machine code. This global list of entries is parsed when a provider
is registered and cells are associated with the relevant nvmem_device
struct.

A possible improvement for the future is to allow users to register
cell tables after the nvmem provider has been registered by updating
the cell list at each call to nvmem_(add|del)_cell_table().

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 72 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvmem-provider.h | 33 +++++++++++++------
 2 files changed, 96 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index ee794613024c..8e0108806e65 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -59,6 +59,9 @@ struct nvmem_cell {
 static DEFINE_MUTEX(nvmem_mutex);
 static DEFINE_IDA(nvmem_ida);
 
+static DEFINE_MUTEX(nvmem_cell_mutex);
+static LIST_HEAD(nvmem_cell_tables);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key eeprom_lock_key;
 #endif
@@ -416,6 +419,43 @@ static int nvmem_setup_compat(struct nvmem_device *nvmem,
 	return 0;
 }
 
+static int nvmem_add_cells_from_table(struct nvmem_device *nvmem)
+{
+	const struct nvmem_cell_info *info;
+	struct nvmem_cell_table *table;
+	struct nvmem_cell *cell;
+	int rval = 0, i;
+
+	mutex_lock(&nvmem_cell_mutex);
+	list_for_each_entry(table, &nvmem_cell_tables, node) {
+		if (strcmp(nvmem_dev_name(nvmem), table->nvmem_name) == 0) {
+			for (i = 0; i < table->ncells; i++) {
+				info = &table->cells[i];
+
+				cell = kzalloc(sizeof(*cell), GFP_KERNEL);
+				if (!cell) {
+					rval = -ENOMEM;
+					goto out;
+				}
+
+				rval = nvmem_cell_info_to_nvmem_cell(nvmem,
+								     info,
+								     cell);
+				if (rval) {
+					kfree(cell);
+					goto out;
+				}
+
+				nvmem_cell_add(cell);
+			}
+		}
+	}
+
+out:
+	mutex_unlock(&nvmem_cell_mutex);
+	return rval;
+}
+
 /**
  * nvmem_register() - Register a nvmem device for given nvmem_config.
  * Also creates an binary entry in /sys/bus/nvmem/devices/dev-name/nvmem
@@ -502,8 +542,14 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 			goto err_teardown_compat;
 	}
 
+	rval = nvmem_add_cells_from_table(nvmem);
+	if (rval)
+		goto err_remove_cells;
+
 	return nvmem;
 
+err_remove_cells:
+	nvmem_device_remove_all_cells(nvmem);
 err_teardown_compat:
 	if (config->compat)
 		device_remove_bin_file(nvmem->base_dev, &nvmem->eeprom);
@@ -1306,6 +1352,32 @@ int nvmem_device_write(struct nvmem_device *nvmem,
 }
 EXPORT_SYMBOL_GPL(nvmem_device_write);
 
+/**
+ * nvmem_add_cell_table() - register a table of cell info entries
+ *
+ * @table: table of cell info entries
+ */
+void nvmem_add_cell_table(struct nvmem_cell_table *table)
+{
+	mutex_lock(&nvmem_cell_mutex);
+	list_add_tail(&table->node, &nvmem_cell_tables);
+	mutex_unlock(&nvmem_cell_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_add_cell_table);
+
+/**
+ * nvmem_del_cell_table() - remove a previously registered cell info table
+ *
+ * @table: table of cell info entries
+ */
+void nvmem_del_cell_table(struct nvmem_cell_table *table)
+{
+	mutex_lock(&nvmem_cell_mutex);
+	list_del(&table->node);
+	mutex_unlock(&nvmem_cell_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_del_cell_table);
+
 /**
  * nvmem_dev_name() - Get the name of a given nvmem device.
  *
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 0f357d0c1e75..5c9f205cac8f 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -67,6 +67,25 @@ struct nvmem_config {
 	struct device		*base_dev;
 };
 
+/**
+ * struct nvmem_cell_table - NVMEM cell definitions for given provider
+ *
+ * @nvmem_name:		Provider name.
+ * @cells:		Array of cell definitions.
+ * @ncells:		Number of cell definitions in the array.
+ * @node:		List node.
+ *
+ * This structure together with related helper functions is provided for users
+ * that don't can't access the nvmem provided structure but wish to register
+ * cell definitions for it e.g. board files registering an EEPROM device.
+ */
+struct nvmem_cell_table {
+	const char		*nvmem_name;
+	const struct nvmem_cell_info	*cells;
+	size_t			ncells;
+	struct list_head	node;
+};
+
 #if IS_ENABLED(CONFIG_NVMEM)
 
 struct nvmem_device *nvmem_register(const struct nvmem_config *cfg);
@@ -77,9 +96,9 @@ struct nvmem_device *devm_nvmem_register(struct device *dev,
 
 int devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem);
 
-int nvmem_add_cells(struct nvmem_device *nvmem,
-		    const struct nvmem_cell_info *info,
-		    int ncells);
+void nvmem_add_cell_table(struct nvmem_cell_table *table);
+void nvmem_del_cell_table(struct nvmem_cell_table *table);
+
 #else
 
 static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c)
@@ -102,12 +121,8 @@ devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem)
 
 }
 
-static inline int nvmem_add_cells(struct nvmem_device *nvmem,
-				  const struct nvmem_cell_info *info,
-				  int ncells)
-{
-	return -ENOSYS;
-}
+static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {}
+static inline void nvmem_del_cell_table(struct nvmem_cell_table *table) {}
 
 #endif /* CONFIG_NVMEM */
 #endif  /* ifndef _LINUX_NVMEM_PROVIDER_H */
-- 
cgit v1.2.3


From 506157be06ba28137b18b7419a4dccfa244f4983 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 21 Sep 2018 06:40:17 -0700
Subject: nvmem: add support for cell lookups from machine code

Add a way for machine code users to associate devices with nvmem cells.

This restores the support for non-DT systems but following a different
approach. Cells must now be associated with devices using provided
routines and data structures before they can be retrieved using
nvmem_cell_get().

It's still possible to define cells statically in nvmem_config but
cells created this way still need to be associated with consumers using
lookup entries.

Note that nvmem_find() must be moved higher in the source file as we
want to call it from __nvmem_device_get() for devices that don't have
a device node.

The signature of __nvmem_device_get() is also changed as it's no longer
used to retrieve cells.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 129 ++++++++++++++++++++++++++++++++---------
 include/linux/nvmem-consumer.h |  28 +++++++++
 2 files changed, 131 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 74b6b97680d5..9cc86d131e1e 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -62,6 +62,9 @@ static DEFINE_IDA(nvmem_ida);
 static DEFINE_MUTEX(nvmem_cell_mutex);
 static LIST_HEAD(nvmem_cell_tables);
 
+static DEFINE_MUTEX(nvmem_lookup_mutex);
+static LIST_HEAD(nvmem_lookup_list);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key eeprom_lock_key;
 #endif
@@ -283,6 +286,18 @@ static struct nvmem_device *of_nvmem_find(struct device_node *nvmem_np)
 	return to_nvmem_device(d);
 }
 
+static struct nvmem_device *nvmem_find(const char *name)
+{
+	struct device *d;
+
+	d = bus_find_device_by_name(&nvmem_bus_type, NULL, name);
+
+	if (!d)
+		return NULL;
+
+	return to_nvmem_device(d);
+}
+
 static void nvmem_cell_drop(struct nvmem_cell *cell)
 {
 	mutex_lock(&nvmem_mutex);
@@ -472,6 +487,21 @@ nvmem_find_cell_by_index(struct nvmem_device *nvmem, int index)
 	return cell;
 }
 
+static struct nvmem_cell *
+nvmem_find_cell_by_name(struct nvmem_device *nvmem, const char *cell_id)
+{
+	struct nvmem_cell *cell = NULL;
+
+	mutex_lock(&nvmem_mutex);
+	list_for_each_entry(cell, &nvmem->cells, node) {
+		if (strcmp(cell_id, cell->name) == 0)
+			break;
+	}
+	mutex_unlock(&nvmem_mutex);
+
+	return cell;
+}
+
 static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
 {
 	struct device_node *parent, *child;
@@ -719,16 +749,12 @@ int devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem)
 EXPORT_SYMBOL(devm_nvmem_unregister);
 
 static struct nvmem_device *__nvmem_device_get(struct device_node *np,
-					       struct nvmem_cell **cellp,
-					       const char *cell_id)
+					       const char *nvmem_name)
 {
 	struct nvmem_device *nvmem = NULL;
 
-	if (!np)
-		return ERR_PTR(-ENOENT);
-
 	mutex_lock(&nvmem_mutex);
-	nvmem = of_nvmem_find(np);
+	nvmem = np ? of_nvmem_find(np) : nvmem_find(nvmem_name);
 	mutex_unlock(&nvmem_mutex);
 	if (!nvmem)
 		return ERR_PTR(-EPROBE_DEFER);
@@ -752,18 +778,6 @@ static void __nvmem_device_put(struct nvmem_device *nvmem)
 	kref_put(&nvmem->refcnt, nvmem_device_release);
 }
 
-static struct nvmem_device *nvmem_find(const char *name)
-{
-	struct device *d;
-
-	d = bus_find_device_by_name(&nvmem_bus_type, NULL, name);
-
-	if (!d)
-		return ERR_PTR(-ENOENT);
-
-	return to_nvmem_device(d);
-}
-
 #if IS_ENABLED(CONFIG_OF)
 /**
  * of_nvmem_device_get() - Get nvmem device from a given id
@@ -786,7 +800,7 @@ struct nvmem_device *of_nvmem_device_get(struct device_node *np, const char *id)
 	if (!nvmem_np)
 		return ERR_PTR(-EINVAL);
 
-	return __nvmem_device_get(nvmem_np, NULL, NULL);
+	return __nvmem_device_get(nvmem_np, NULL);
 }
 EXPORT_SYMBOL_GPL(of_nvmem_device_get);
 #endif
@@ -890,15 +904,43 @@ struct nvmem_device *devm_nvmem_device_get(struct device *dev, const char *id)
 }
 EXPORT_SYMBOL_GPL(devm_nvmem_device_get);
 
-static struct nvmem_cell *nvmem_cell_get_from_list(const char *cell_id)
+static struct nvmem_cell *
+nvmem_cell_get_from_lookup(struct device *dev, const char *con_id)
 {
-	struct nvmem_cell *cell = NULL;
+	struct nvmem_cell *cell = ERR_PTR(-ENOENT);
+	struct nvmem_cell_lookup *lookup;
 	struct nvmem_device *nvmem;
+	const char *dev_id;
 
-	nvmem = __nvmem_device_get(NULL, &cell, cell_id);
-	if (IS_ERR(nvmem))
-		return ERR_CAST(nvmem);
+	if (!dev)
+		return ERR_PTR(-EINVAL);
+
+	dev_id = dev_name(dev);
+
+	mutex_lock(&nvmem_lookup_mutex);
+
+	list_for_each_entry(lookup, &nvmem_lookup_list, node) {
+		if ((strcmp(lookup->dev_id, dev_id) == 0) &&
+		    (strcmp(lookup->con_id, con_id) == 0)) {
+			/* This is the right entry. */
+			nvmem = __nvmem_device_get(NULL, lookup->nvmem_name);
+			if (!nvmem) {
+				/* Provider may not be registered yet. */
+				cell = ERR_PTR(-EPROBE_DEFER);
+				goto out;
+			}
+
+			cell = nvmem_find_cell_by_name(nvmem,
+						       lookup->cell_name);
+			if (!cell) {
+				__nvmem_device_put(nvmem);
+				goto out;
+			}
+		}
+	}
 
+out:
+	mutex_unlock(&nvmem_lookup_mutex);
 	return cell;
 }
 
@@ -935,7 +977,7 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
 	if (!nvmem_np)
 		return ERR_PTR(-EINVAL);
 
-	nvmem = __nvmem_device_get(nvmem_np, NULL, NULL);
+	nvmem = __nvmem_device_get(nvmem_np, NULL);
 	of_node_put(nvmem_np);
 	if (IS_ERR(nvmem))
 		return ERR_CAST(nvmem);
@@ -975,7 +1017,7 @@ struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *cell_id)
 	if (!cell_id)
 		return ERR_PTR(-EINVAL);
 
-	return nvmem_cell_get_from_list(cell_id);
+	return nvmem_cell_get_from_lookup(dev, cell_id);
 }
 EXPORT_SYMBOL_GPL(nvmem_cell_get);
 
@@ -1405,6 +1447,41 @@ void nvmem_del_cell_table(struct nvmem_cell_table *table)
 }
 EXPORT_SYMBOL_GPL(nvmem_del_cell_table);
 
+/**
+ * nvmem_add_cell_lookups() - register a list of cell lookup entries
+ *
+ * @entries: array of cell lookup entries
+ * @nentries: number of cell lookup entries in the array
+ */
+void nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries)
+{
+	int i;
+
+	mutex_lock(&nvmem_lookup_mutex);
+	for (i = 0; i < nentries; i++)
+		list_add_tail(&entries[i].node, &nvmem_lookup_list);
+	mutex_unlock(&nvmem_lookup_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_add_cell_lookups);
+
+/**
+ * nvmem_del_cell_lookups() - remove a list of previously added cell lookup
+ *                            entries
+ *
+ * @entries: array of cell lookup entries
+ * @nentries: number of cell lookup entries in the array
+ */
+void nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries)
+{
+	int i;
+
+	mutex_lock(&nvmem_lookup_mutex);
+	for (i = 0; i < nentries; i++)
+		list_del(&entries[i].node);
+	mutex_unlock(&nvmem_lookup_mutex);
+}
+EXPORT_SYMBOL_GPL(nvmem_del_cell_lookups);
+
 /**
  * nvmem_dev_name() - Get the name of a given nvmem device.
  *
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 0389fe00b177..27eee3945405 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -29,6 +29,24 @@ struct nvmem_cell_info {
 	unsigned int		nbits;
 };
 
+/**
+ * struct nvmem_cell_lookup - cell lookup entry
+ *
+ * @nvmem_name:	Name of the provider.
+ * @cell_name:	Name of the nvmem cell as defined in the name field of
+ *		struct nvmem_cell_info.
+ * @dev_id:	Name of the consumer device that will be associated with
+ *		this cell.
+ * @con_id:	Connector id for this cell lookup.
+ */
+struct nvmem_cell_lookup {
+	const char		*nvmem_name;
+	const char		*cell_name;
+	const char		*dev_id;
+	const char		*con_id;
+	struct list_head	node;
+};
+
 #if IS_ENABLED(CONFIG_NVMEM)
 
 /* Cell based interface */
@@ -57,6 +75,11 @@ int nvmem_device_cell_write(struct nvmem_device *nvmem,
 
 const char *nvmem_dev_name(struct nvmem_device *nvmem);
 
+void nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries,
+			    size_t nentries);
+void nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries,
+			    size_t nentries);
+
 #else
 
 static inline struct nvmem_cell *nvmem_cell_get(struct device *dev,
@@ -151,6 +174,11 @@ static inline const char *nvmem_dev_name(struct nvmem_device *nvmem)
 	return NULL;
 }
 
+static inline void
+nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries) {}
+static inline void
+nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries) {}
+
 #endif /* CONFIG_NVMEM */
 
 #if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF)
-- 
cgit v1.2.3


From bee1138bea15a640aaa9e9bb909af5b2762520e0 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 21 Sep 2018 06:40:19 -0700
Subject: nvmem: add a notifier chain

Add a blocking notifier chain with four events (add and remove for
both devices and cells) so that users can get notified about the
addition of nvmem resources they're waiting for.

We'll use this instead of the at24 setup callback in the mityomapl138
board file.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/nvmem-consumer.h | 21 +++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 9cc86d131e1e..da441019b609 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -65,6 +65,8 @@ static LIST_HEAD(nvmem_cell_tables);
 static DEFINE_MUTEX(nvmem_lookup_mutex);
 static LIST_HEAD(nvmem_lookup_list);
 
+static BLOCKING_NOTIFIER_HEAD(nvmem_notifier);
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key eeprom_lock_key;
 #endif
@@ -300,6 +302,7 @@ static struct nvmem_device *nvmem_find(const char *name)
 
 static void nvmem_cell_drop(struct nvmem_cell *cell)
 {
+	blocking_notifier_call_chain(&nvmem_notifier, NVMEM_CELL_REMOVE, cell);
 	mutex_lock(&nvmem_mutex);
 	list_del(&cell->node);
 	mutex_unlock(&nvmem_mutex);
@@ -319,6 +322,7 @@ static void nvmem_cell_add(struct nvmem_cell *cell)
 	mutex_lock(&nvmem_mutex);
 	list_add_tail(&cell->node, &cell->nvmem->cells);
 	mutex_unlock(&nvmem_mutex);
+	blocking_notifier_call_chain(&nvmem_notifier, NVMEM_CELL_ADD, cell);
 }
 
 static int nvmem_cell_info_to_nvmem_cell(struct nvmem_device *nvmem,
@@ -434,6 +438,32 @@ static int nvmem_setup_compat(struct nvmem_device *nvmem,
 	return 0;
 }
 
+/**
+ * nvmem_register_notifier() - Register a notifier block for nvmem events.
+ *
+ * @nb: notifier block to be called on nvmem events.
+ *
+ * Return: 0 on success, negative error number on failure.
+ */
+int nvmem_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&nvmem_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(nvmem_register_notifier);
+
+/**
+ * nvmem_unregister_notifier() - Unregister a notifier block for nvmem events.
+ *
+ * @nb: notifier block to be unregistered.
+ *
+ * Return: 0 on success, negative error number on failure.
+ */
+int nvmem_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&nvmem_notifier, nb);
+}
+EXPORT_SYMBOL_GPL(nvmem_unregister_notifier);
+
 static int nvmem_add_cells_from_table(struct nvmem_device *nvmem)
 {
 	const struct nvmem_cell_info *info;
@@ -647,6 +677,10 @@ struct nvmem_device *nvmem_register(const struct nvmem_config *config)
 	if (rval)
 		goto err_remove_cells;
 
+	rval = blocking_notifier_call_chain(&nvmem_notifier, NVMEM_ADD, nvmem);
+	if (rval)
+		goto err_remove_cells;
+
 	return nvmem;
 
 err_remove_cells:
@@ -669,6 +703,8 @@ static void nvmem_device_release(struct kref *kref)
 
 	nvmem = container_of(kref, struct nvmem_device, refcnt);
 
+	blocking_notifier_call_chain(&nvmem_notifier, NVMEM_REMOVE, nvmem);
+
 	if (nvmem->flags & FLAG_COMPAT)
 		device_remove_bin_file(nvmem->base_dev, &nvmem->eeprom);
 
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 27eee3945405..0326b52e906b 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -14,6 +14,7 @@
 
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/notifier.h>
 
 struct device;
 struct device_node;
@@ -47,6 +48,13 @@ struct nvmem_cell_lookup {
 	struct list_head	node;
 };
 
+enum {
+	NVMEM_ADD = 1,
+	NVMEM_REMOVE,
+	NVMEM_CELL_ADD,
+	NVMEM_CELL_REMOVE,
+};
+
 #if IS_ENABLED(CONFIG_NVMEM)
 
 /* Cell based interface */
@@ -80,6 +88,9 @@ void nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries,
 void nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries,
 			    size_t nentries);
 
+int nvmem_register_notifier(struct notifier_block *nb);
+int nvmem_unregister_notifier(struct notifier_block *nb);
+
 #else
 
 static inline struct nvmem_cell *nvmem_cell_get(struct device *dev,
@@ -179,6 +190,16 @@ nvmem_add_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries) {}
 static inline void
 nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries) {}
 
+static inline int nvmem_register_notifier(struct notifier_block *nb)
+{
+	return -ENOSYS;
+}
+
+static inline int nvmem_unregister_notifier(struct notifier_block *nb)
+{
+	return -ENOSYS;
+}
+
 #endif /* CONFIG_NVMEM */
 
 #if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF)
-- 
cgit v1.2.3


From b1c1db9883c276fe41a04e9ce8f89576a64b1ac0 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 21 Sep 2018 06:40:20 -0700
Subject: nvmem: use SPDX license identifiers

Use SPDX license identiefiers to core nvmem files and remove GPL 2.0
license boilerplate.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 10 +---------
 include/linux/nvmem-consumer.h |  5 +----
 include/linux/nvmem-provider.h |  5 +----
 3 files changed, 3 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index da441019b609..ea05219e60b4 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * nvmem framework core.
  *
  * Copyright (C) 2015 Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
  * Copyright (C) 2013 Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/device.h>
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index 0326b52e906b..d18caae2f7ac 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * nvmem framework consumer.
  *
  * Copyright (C) 2015 Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
  * Copyright (C) 2013 Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #ifndef _LINUX_NVMEM_CONSUMER_H
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 5c9f205cac8f..8ae012f6545a 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * nvmem framework provider.
  *
  * Copyright (C) 2015 Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
  * Copyright (C) 2013 Maxime Ripard <maxime.ripard@free-electrons.com>
- *
- * This file is licensed under the terms of the GNU General Public
- * License version 2.  This program is licensed "as is" without any
- * warranty of any kind, whether express or implied.
  */
 
 #ifndef _LINUX_NVMEM_PROVIDER_H
-- 
cgit v1.2.3


From 165589f0cb52b34db12e15676a034b8f83dfa756 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 21 Sep 2018 06:40:21 -0700
Subject: nvmem: make the naming of arguments in nvmem_cell_get() consistent

The argument representing the cell name in the nvmem_cell_get() family
of functions is not consistend between function prototypes and
definitions. Name it 'id' in all those routines. This is in line with
other frameworks and can represent both the DT cell name from the
nvmem-cell-names property as well as the con_id field from cell
lookup entries.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c           | 27 ++++++++++++++-------------
 include/linux/nvmem-consumer.h | 12 ++++++------
 2 files changed, 20 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index ea05219e60b4..ff21402fbd7d 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -977,16 +977,15 @@ out:
  * of_nvmem_cell_get() - Get a nvmem cell from given device node and cell id
  *
  * @np: Device tree node that uses the nvmem cell.
- * @name: nvmem cell name from nvmem-cell-names property, or NULL
- *	  for the cell at index 0 (the lone cell with no accompanying
- *	  nvmem-cell-names property).
+ * @id: nvmem cell name from nvmem-cell-names property, or NULL
+ *      for the cell at index 0 (the lone cell with no accompanying
+ *      nvmem-cell-names property).
  *
  * Return: Will be an ERR_PTR() on error or a valid pointer
  * to a struct nvmem_cell.  The nvmem_cell will be freed by the
  * nvmem_cell_put().
  */
-struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
-					    const char *name)
+struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id)
 {
 	struct device_node *cell_np, *nvmem_np;
 	struct nvmem_device *nvmem;
@@ -994,8 +993,8 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
 	int index = 0;
 
 	/* if cell name exists, find index to the name */
-	if (name)
-		index = of_property_match_string(np, "nvmem-cell-names", name);
+	if (id)
+		index = of_property_match_string(np, "nvmem-cell-names", id);
 
 	cell_np = of_parse_phandle(np, "nvmem-cells", index);
 	if (!cell_np)
@@ -1025,27 +1024,29 @@ EXPORT_SYMBOL_GPL(of_nvmem_cell_get);
  * nvmem_cell_get() - Get nvmem cell of device form a given cell name
  *
  * @dev: Device that requests the nvmem cell.
- * @cell_id: nvmem cell name to get.
+ * @id: nvmem cell name to get (this corresponds with the name from the
+ *      nvmem-cell-names property for DT systems and with the con_id from
+ *      the lookup entry for non-DT systems).
  *
  * Return: Will be an ERR_PTR() on error or a valid pointer
  * to a struct nvmem_cell.  The nvmem_cell will be freed by the
  * nvmem_cell_put().
  */
-struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *cell_id)
+struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *id)
 {
 	struct nvmem_cell *cell;
 
 	if (dev->of_node) { /* try dt first */
-		cell = of_nvmem_cell_get(dev->of_node, cell_id);
+		cell = of_nvmem_cell_get(dev->of_node, id);
 		if (!IS_ERR(cell) || PTR_ERR(cell) == -EPROBE_DEFER)
 			return cell;
 	}
 
-	/* NULL cell_id only allowed for device tree; invalid otherwise */
-	if (!cell_id)
+	/* NULL cell id only allowed for device tree; invalid otherwise */
+	if (!id)
 		return ERR_PTR(-EINVAL);
 
-	return nvmem_cell_get_from_lookup(dev, cell_id);
+	return nvmem_cell_get_from_lookup(dev, id);
 }
 EXPORT_SYMBOL_GPL(nvmem_cell_get);
 
diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index d18caae2f7ac..e17617fa034f 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -55,8 +55,8 @@ enum {
 #if IS_ENABLED(CONFIG_NVMEM)
 
 /* Cell based interface */
-struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *name);
-struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *name);
+struct nvmem_cell *nvmem_cell_get(struct device *dev, const char *id);
+struct nvmem_cell *devm_nvmem_cell_get(struct device *dev, const char *id);
 void nvmem_cell_put(struct nvmem_cell *cell);
 void devm_nvmem_cell_put(struct device *dev, struct nvmem_cell *cell);
 void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len);
@@ -91,13 +91,13 @@ int nvmem_unregister_notifier(struct notifier_block *nb);
 #else
 
 static inline struct nvmem_cell *nvmem_cell_get(struct device *dev,
-						const char *name)
+						const char *id)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct nvmem_cell *devm_nvmem_cell_get(struct device *dev,
-				       const char *name)
+						     const char *id)
 {
 	return ERR_PTR(-ENOSYS);
 }
@@ -201,12 +201,12 @@ static inline int nvmem_unregister_notifier(struct notifier_block *nb)
 
 #if IS_ENABLED(CONFIG_NVMEM) && IS_ENABLED(CONFIG_OF)
 struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
-				     const char *name);
+				     const char *id);
 struct nvmem_device *of_nvmem_device_get(struct device_node *np,
 					 const char *name);
 #else
 static inline struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
-				     const char *name)
+						   const char *id)
 {
 	return ERR_PTR(-ENOSYS);
 }
-- 
cgit v1.2.3


From 20167b70c894f20cd01e2579fad206de440816ef Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 21 Sep 2018 06:40:22 -0700
Subject: nvmem: use EOPNOTSUPP instead of ENOSYS

Checkpatch emits warnings when using ENOSYS. Some of the frameworks
started using EOPNOTSUPP as return values for API functions when given
subsystem is disabled in Kconfig.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/nvmem-consumer.h | 30 +++++++++++++++---------------
 include/linux/nvmem-provider.h |  5 ++---
 2 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nvmem-consumer.h b/include/linux/nvmem-consumer.h
index e17617fa034f..312bfa5efd80 100644
--- a/include/linux/nvmem-consumer.h
+++ b/include/linux/nvmem-consumer.h
@@ -93,13 +93,13 @@ int nvmem_unregister_notifier(struct notifier_block *nb);
 static inline struct nvmem_cell *nvmem_cell_get(struct device *dev,
 						const char *id)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline struct nvmem_cell *devm_nvmem_cell_get(struct device *dev,
 						     const char *id)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline void devm_nvmem_cell_put(struct device *dev,
@@ -113,31 +113,31 @@ static inline void nvmem_cell_put(struct nvmem_cell *cell)
 
 static inline void *nvmem_cell_read(struct nvmem_cell *cell, size_t *len)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline int nvmem_cell_write(struct nvmem_cell *cell,
 				    const char *buf, size_t len)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_cell_read_u32(struct device *dev,
 				      const char *cell_id, u32 *val)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline struct nvmem_device *nvmem_device_get(struct device *dev,
 						    const char *name)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline struct nvmem_device *devm_nvmem_device_get(struct device *dev,
 							 const char *name)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline void nvmem_device_put(struct nvmem_device *nvmem)
@@ -153,28 +153,28 @@ static inline ssize_t nvmem_device_cell_read(struct nvmem_device *nvmem,
 					 struct nvmem_cell_info *info,
 					 void *buf)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_device_cell_write(struct nvmem_device *nvmem,
 					  struct nvmem_cell_info *info,
 					  void *buf)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_device_read(struct nvmem_device *nvmem,
 				    unsigned int offset, size_t bytes,
 				    void *buf)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_device_write(struct nvmem_device *nvmem,
 				     unsigned int offset, size_t bytes,
 				     void *buf)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline const char *nvmem_dev_name(struct nvmem_device *nvmem)
@@ -189,12 +189,12 @@ nvmem_del_cell_lookups(struct nvmem_cell_lookup *entries, size_t nentries) {}
 
 static inline int nvmem_register_notifier(struct notifier_block *nb)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 static inline int nvmem_unregister_notifier(struct notifier_block *nb)
 {
-	return -ENOSYS;
+	return -EOPNOTSUPP;
 }
 
 #endif /* CONFIG_NVMEM */
@@ -208,13 +208,13 @@ struct nvmem_device *of_nvmem_device_get(struct device_node *np,
 static inline struct nvmem_cell *of_nvmem_cell_get(struct device_node *np,
 						   const char *id)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline struct nvmem_device *of_nvmem_device_get(struct device_node *np,
 						       const char *name)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 #endif /* CONFIG_NVMEM && CONFIG_OF */
 
diff --git a/include/linux/nvmem-provider.h b/include/linux/nvmem-provider.h
index 8ae012f6545a..1e3283c2af77 100644
--- a/include/linux/nvmem-provider.h
+++ b/include/linux/nvmem-provider.h
@@ -100,7 +100,7 @@ void nvmem_del_cell_table(struct nvmem_cell_table *table);
 
 static inline struct nvmem_device *nvmem_register(const struct nvmem_config *c)
 {
-	return ERR_PTR(-ENOSYS);
+	return ERR_PTR(-EOPNOTSUPP);
 }
 
 static inline void nvmem_unregister(struct nvmem_device *nvmem) {}
@@ -114,8 +114,7 @@ devm_nvmem_register(struct device *dev, const struct nvmem_config *c)
 static inline int
 devm_nvmem_unregister(struct device *dev, struct nvmem_device *nvmem)
 {
-	return -ENOSYS;
-
+	return -EOPNOTSUPP;
 }
 
 static inline void nvmem_add_cell_table(struct nvmem_cell_table *table) {}
-- 
cgit v1.2.3


From 494edd266b945f36908184433dd36eda1719cdb0 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Fri, 14 Sep 2018 11:27:46 +0300
Subject: regulator/mfd: Support ROHM BD71847 power management IC

BD71847 is reduced version of BD71837. DVS bucks 3 and 4 are
removed as is LDO7. Voltage ranges of some regulators are
expanded.

Add initial support for BD71847 with BD71837 driver.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/mfd/rohm-bd718x7.c            |   70 +--
 drivers/regulator/bd71837-regulator.c | 1055 +++++++++++++++++++++++----------
 include/linux/mfd/rohm-bd718x7.h      |  271 ++++-----
 3 files changed, 879 insertions(+), 517 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rohm-bd718x7.c b/drivers/mfd/rohm-bd718x7.c
index 75c8ec659547..ce5aa5cd3545 100644
--- a/drivers/mfd/rohm-bd718x7.c
+++ b/drivers/mfd/rohm-bd718x7.c
@@ -7,21 +7,16 @@
 // Datasheet available from
 // https://www.rohm.com/datasheet/BD71837MWV/bd71837mwv-e
 
+#include <linux/gpio_keys.h>
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/interrupt.h>
 #include <linux/mfd/rohm-bd718x7.h>
 #include <linux/mfd/core.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/regmap.h>
-
-/*
- * gpio_keys.h requires definiton of bool. It is brought in
- * by above includes. Keep this as last until gpio_keys.h gets fixed.
- */
-#include <linux/gpio_keys.h>
-
-static const u8 supported_revisions[] = { 0xA2 /* BD71837 */ };
+#include <linux/types.h>
 
 static struct gpio_keys_button button = {
 	.code = KEY_POWER,
@@ -41,8 +36,8 @@ static struct mfd_cell bd71837_mfd_cells[] = {
 		.platform_data = &bd718xx_powerkey_data,
 		.pdata_size = sizeof(bd718xx_powerkey_data),
 	},
-	{ .name = "bd71837-clk", },
-	{ .name = "bd71837-pmic", },
+	{ .name = "bd718xx-clk", },
+	{ .name = "bd718xx-pmic", },
 };
 
 static const struct regmap_irq bd71837_irqs[] = {
@@ -61,16 +56,16 @@ static struct regmap_irq_chip bd71837_irq_chip = {
 	.num_irqs = ARRAY_SIZE(bd71837_irqs),
 	.num_regs = 1,
 	.irq_reg_stride = 1,
-	.status_base = BD71837_REG_IRQ,
-	.mask_base = BD71837_REG_MIRQ,
-	.ack_base = BD71837_REG_IRQ,
+	.status_base = BD718XX_REG_IRQ,
+	.mask_base = BD718XX_REG_MIRQ,
+	.ack_base = BD718XX_REG_IRQ,
 	.init_ack_masked = true,
 	.mask_invert = false,
 };
 
 static const struct regmap_range pmic_status_range = {
-	.range_min = BD71837_REG_IRQ,
-	.range_max = BD71837_REG_POW_STATE,
+	.range_min = BD718XX_REG_IRQ,
+	.range_max = BD718XX_REG_POW_STATE,
 };
 
 static const struct regmap_access_table volatile_regs = {
@@ -82,7 +77,7 @@ static const struct regmap_config bd71837_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 	.volatile_table = &volatile_regs,
-	.max_register = BD71837_MAX_REGISTER - 1,
+	.max_register = BD718XX_MAX_REGISTER - 1,
 	.cache_type = REGCACHE_RBTREE,
 };
 
@@ -90,8 +85,12 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
 	struct bd71837 *bd71837;
-	int ret, i;
-	unsigned int val;
+	int ret;
+
+	if (!i2c->irq) {
+		dev_err(&i2c->dev, "No IRQ configured\n");
+		return -EINVAL;
+	}
 
 	bd71837 = devm_kzalloc(&i2c->dev, sizeof(struct bd71837), GFP_KERNEL);
 
@@ -99,12 +98,8 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 		return -ENOMEM;
 
 	bd71837->chip_irq = i2c->irq;
-
-	if (!bd71837->chip_irq) {
-		dev_err(&i2c->dev, "No IRQ configured\n");
-		return -EINVAL;
-	}
-
+	bd71837->chip_type = (unsigned int)(uintptr_t)
+				of_device_get_match_data(&i2c->dev);
 	bd71837->dev = &i2c->dev;
 	dev_set_drvdata(&i2c->dev, bd71837);
 
@@ -114,20 +109,6 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 		return PTR_ERR(bd71837->regmap);
 	}
 
-	ret = regmap_read(bd71837->regmap, BD71837_REG_REV, &val);
-	if (ret) {
-		dev_err(&i2c->dev, "Read BD71837_REG_DEVICE failed\n");
-		return ret;
-	}
-	for (i = 0; i < ARRAY_SIZE(supported_revisions); i++)
-		if (supported_revisions[i] == val)
-			break;
-
-	if (i == ARRAY_SIZE(supported_revisions)) {
-		dev_err(&i2c->dev, "Unsupported chip revision\n");
-		return -ENODEV;
-	}
-
 	ret = devm_regmap_add_irq_chip(&i2c->dev, bd71837->regmap,
 				       bd71837->chip_irq, IRQF_ONESHOT, 0,
 				       &bd71837_irq_chip, &bd71837->irq_data);
@@ -138,7 +119,7 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 
 	/* Configure short press to 10 milliseconds */
 	ret = regmap_update_bits(bd71837->regmap,
-				 BD71837_REG_PWRONCONFIG0,
+				 BD718XX_REG_PWRONCONFIG0,
 				 BD718XX_PWRBTN_PRESS_DURATION_MASK,
 				 BD718XX_PWRBTN_SHORT_PRESS_10MS);
 	if (ret) {
@@ -149,7 +130,7 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 
 	/* Configure long press to 10 seconds */
 	ret = regmap_update_bits(bd71837->regmap,
-				 BD71837_REG_PWRONCONFIG1,
+				 BD718XX_REG_PWRONCONFIG1,
 				 BD718XX_PWRBTN_PRESS_DURATION_MASK,
 				 BD718XX_PWRBTN_LONG_PRESS_10S);
 
@@ -179,7 +160,14 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 }
 
 static const struct of_device_id bd71837_of_match[] = {
-	{ .compatible = "rohm,bd71837", },
+	{
+		.compatible = "rohm,bd71837",
+		.data = (void *)BD718XX_TYPE_BD71837,
+	},
+	{
+		.compatible = "rohm,bd71847",
+		.data = (void *)BD718XX_TYPE_BD71847,
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, bd71837_of_match);
diff --git a/drivers/regulator/bd71837-regulator.c b/drivers/regulator/bd71837-regulator.c
index 4f4599acfebd..1dbba20ede3e 100644
--- a/drivers/regulator/bd71837-regulator.c
+++ b/drivers/regulator/bd71837-regulator.c
@@ -15,11 +15,11 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/slab.h>
 
-struct bd71837_pmic {
-	struct regulator_desc descs[BD71837_REGULATOR_CNT];
+struct bd718xx_pmic {
+	struct bd718xx_regulator_data *rdata;
 	struct bd71837 *mfd;
 	struct platform_device *pdev;
-	struct regulator_dev *rdev[BD71837_REGULATOR_CNT];
+	struct regulator_dev *rdev[BD718XX_REGULATOR_AMOUNT];
 };
 
 /*
@@ -33,7 +33,7 @@ struct bd71837_pmic {
 static int bd71837_buck1234_set_ramp_delay(struct regulator_dev *rdev,
 					   int ramp_delay)
 {
-	struct bd71837_pmic *pmic = rdev_get_drvdata(rdev);
+	struct bd718xx_pmic *pmic = rdev_get_drvdata(rdev);
 	struct bd71837 *mfd = pmic->mfd;
 	int id = rdev->desc->id;
 	unsigned int ramp_value = BUCK_RAMPRATE_10P00MV;
@@ -60,7 +60,7 @@ static int bd71837_buck1234_set_ramp_delay(struct regulator_dev *rdev,
 			rdev->desc->name, ramp_delay);
 	}
 
-	return regmap_update_bits(mfd->regmap, BD71837_REG_BUCK1_CTRL + id,
+	return regmap_update_bits(mfd->regmap, BD718XX_REG_BUCK1_CTRL + id,
 				  BUCK_RAMPRATE_MASK, ramp_value << 6);
 }
 
@@ -69,7 +69,7 @@ static int bd71837_buck1234_set_ramp_delay(struct regulator_dev *rdev,
  * is changed. Hence we return -EBUSY for these if voltage is changed
  * when BUCK/LDO is enabled.
  */
-static int bd71837_set_voltage_sel_restricted(struct regulator_dev *rdev,
+static int bd718xx_set_voltage_sel_restricted(struct regulator_dev *rdev,
 						    unsigned int sel)
 {
 	if (regulator_is_enabled_regmap(rdev))
@@ -78,45 +78,45 @@ static int bd71837_set_voltage_sel_restricted(struct regulator_dev *rdev,
 	return regulator_set_voltage_sel_regmap(rdev, sel);
 }
 
-static struct regulator_ops bd71837_ldo_regulator_ops = {
+static struct regulator_ops bd718xx_ldo_regulator_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_linear_range,
-	.set_voltage_sel = bd71837_set_voltage_sel_restricted,
+	.set_voltage_sel = bd718xx_set_voltage_sel_restricted,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 };
 
-static struct regulator_ops bd71837_ldo_regulator_nolinear_ops = {
+static struct regulator_ops bd718xx_ldo_regulator_nolinear_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_table,
-	.set_voltage_sel = bd71837_set_voltage_sel_restricted,
+	.set_voltage_sel = bd718xx_set_voltage_sel_restricted,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 };
 
-static struct regulator_ops bd71837_buck_regulator_ops = {
+static struct regulator_ops bd718xx_buck_regulator_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_linear_range,
-	.set_voltage_sel = bd71837_set_voltage_sel_restricted,
+	.set_voltage_sel = bd718xx_set_voltage_sel_restricted,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
 };
 
-static struct regulator_ops bd71837_buck_regulator_nolinear_ops = {
+static struct regulator_ops bd718xx_buck_regulator_nolinear_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
 	.list_voltage = regulator_list_voltage_table,
-	.set_voltage_sel = bd71837_set_voltage_sel_restricted,
+	.set_voltage_sel = bd718xx_set_voltage_sel_restricted,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
 };
 
-static struct regulator_ops bd71837_buck1234_regulator_ops = {
+static struct regulator_ops bd718xx_dvs_buck_regulator_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
 	.is_enabled = regulator_is_enabled_regmap,
@@ -128,24 +128,30 @@ static struct regulator_ops bd71837_buck1234_regulator_ops = {
 };
 
 /*
- * BUCK1/2/3/4
+ * BD71837 BUCK1/2/3/4
+ * BD71847 BUCK1/2
  * 0.70 to 1.30V (10mV step)
  */
-static const struct regulator_linear_range bd71837_buck1234_voltage_ranges[] = {
+static const struct regulator_linear_range bd718xx_dvs_buck_volts[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0x00, 0x3C, 10000),
 	REGULATOR_LINEAR_RANGE(1300000, 0x3D, 0x3F, 0),
 };
 
 /*
- * BUCK5
- * 0.9V to 1.35V ()
+ * BD71837 BUCK5
+ * BD71847 BUCK3
+ * 0.7V to 1.35V ()
  */
-static const struct regulator_linear_range bd71837_buck5_voltage_ranges[] = {
+static const struct regulator_linear_range bd718xx_1st_nodvs_buck_volts[] = {
 	REGULATOR_LINEAR_RANGE(700000, 0x00, 0x03, 100000),
 	REGULATOR_LINEAR_RANGE(1050000, 0x04, 0x05, 50000),
 	REGULATOR_LINEAR_RANGE(1200000, 0x06, 0x07, 150000),
 };
 
+static const struct regulator_linear_range bd71847_buck4_voltage_ranges[] = {
+	REGULATOR_LINEAR_RANGE(3000000, 0x00, 0x03, 100000),
+};
+
 /*
  * BUCK6
  * 3.0V to 3.3V (step 100mV)
@@ -155,7 +161,8 @@ static const struct regulator_linear_range bd71837_buck6_voltage_ranges[] = {
 };
 
 /*
- * BUCK7
+ * BD71837 BUCK7
+ * BD71847 BUCK5
  * 000 = 1.605V
  * 001 = 1.695V
  * 010 = 1.755V
@@ -165,7 +172,7 @@ static const struct regulator_linear_range bd71837_buck6_voltage_ranges[] = {
  * 110 = 1.95V
  * 111 = 1.995V
  */
-static const unsigned int buck_7_volts[] = {
+static const unsigned int bd718xx_3rd_nodvs_buck_volts[] = {
 	1605000, 1695000, 1755000, 1800000, 1845000, 1905000, 1950000, 1995000
 };
 
@@ -173,16 +180,15 @@ static const unsigned int buck_7_volts[] = {
  * BUCK8
  * 0.8V to 1.40V (step 10mV)
  */
-static const struct regulator_linear_range bd71837_buck8_voltage_ranges[] = {
+static const struct regulator_linear_range bd718xx_4th_nodvs_buck_volts[] = {
 	REGULATOR_LINEAR_RANGE(800000, 0x00, 0x3C, 10000),
-	REGULATOR_LINEAR_RANGE(1400000, 0x3D, 0x3F, 0),
 };
 
 /*
  * LDO1
  * 3.0 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd71837_ldo1_voltage_ranges[] = {
+static const struct regulator_linear_range bd718xx_ldo1_volts[] = {
 	REGULATOR_LINEAR_RANGE(3000000, 0x00, 0x03, 100000),
 };
 
@@ -198,7 +204,7 @@ static const unsigned int ldo_2_volts[] = {
  * LDO3
  * 1.8 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd71837_ldo3_voltage_ranges[] = {
+static const struct regulator_linear_range bd718xx_ldo3_volts[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
 };
 
@@ -206,16 +212,15 @@ static const struct regulator_linear_range bd71837_ldo3_voltage_ranges[] = {
  * LDO4
  * 0.9 to 1.8V (100mV step)
  */
-static const struct regulator_linear_range bd71837_ldo4_voltage_ranges[] = {
+static const struct regulator_linear_range bd718xx_ldo4_volts[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0x00, 0x09, 100000),
-	REGULATOR_LINEAR_RANGE(1800000, 0x0A, 0x0F, 0),
 };
 
 /*
- * LDO5
+ * LDO5 for BD71837
  * 1.8 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd71837_ldo5_voltage_ranges[] = {
+static const struct regulator_linear_range bd718xx_ldo5_volts[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
 };
 
@@ -223,330 +228,722 @@ static const struct regulator_linear_range bd71837_ldo5_voltage_ranges[] = {
  * LDO6
  * 0.9 to 1.8V (100mV step)
  */
-static const struct regulator_linear_range bd71837_ldo6_voltage_ranges[] = {
+static const struct regulator_linear_range bd718xx_ldo6_volts[] = {
 	REGULATOR_LINEAR_RANGE(900000, 0x00, 0x09, 100000),
-	REGULATOR_LINEAR_RANGE(1800000, 0x0A, 0x0F, 0),
 };
 
 /*
  * LDO7
  * 1.8 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd71837_ldo7_voltage_ranges[] = {
+static const struct regulator_linear_range bd71837_ldo7_volts[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
 };
 
-static const struct regulator_desc bd71837_regulators[] = {
+struct reg_init {
+	unsigned int reg;
+	unsigned int mask;
+	unsigned int val;
+};
+struct bd718xx_regulator_data {
+	struct regulator_desc desc;
+	const struct reg_init init;
+	const struct reg_init *additional_inits;
+	int additional_init_amnt;
+};
+
+/*
+ * There is a HW quirk in BD71837. The shutdown sequence timings for
+ * bucks/LDOs which are controlled via register interface are changed.
+ * At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the
+ * beginning of shut-down sequence. As bucks 6 and 7 are parent
+ * supplies for LDO5 and LDO6 - this causes LDO5/6 voltage
+ * monitoring to errorneously detect under voltage and force PMIC to
+ * emergency state instead of poweroff. In order to avoid this we
+ * disable voltage monitoring for LDO5 and LDO6
+ */
+static const struct reg_init bd71837_ldo5_inits[] = {
+	{
+		.reg = BD718XX_REG_MVRFLTMASK2,
+		.mask = BD718XX_LDO5_VRMON80,
+		.val = BD718XX_LDO5_VRMON80,
+	},
+};
+
+static const struct reg_init bd71837_ldo6_inits[] = {
+	{
+		.reg = BD718XX_REG_MVRFLTMASK2,
+		.mask = BD718XX_LDO6_VRMON80,
+		.val = BD718XX_LDO6_VRMON80,
+	},
+};
+
+static const struct bd718xx_regulator_data bd71847_regulators[] = {
+	{
+		.desc = {
+			.name = "buck1",
+			.of_match = of_match_ptr("BUCK1"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK1,
+			.ops = &bd718xx_dvs_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_DVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_dvs_buck_volts,
+			.n_linear_ranges =
+				ARRAY_SIZE(bd718xx_dvs_buck_volts),
+			.vsel_reg = BD718XX_REG_BUCK1_VOLT_RUN,
+			.vsel_mask = DVS_BUCK_RUN_MASK,
+			.enable_reg = BD718XX_REG_BUCK1_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_BUCK1_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
+	},
 	{
-		.name = "buck1",
-		.of_match = of_match_ptr("BUCK1"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_BUCK1,
-		.ops = &bd71837_buck1234_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_BUCK1_VOLTAGE_NUM,
-		.linear_ranges = bd71837_buck1234_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_buck1234_voltage_ranges),
-		.vsel_reg = BD71837_REG_BUCK1_VOLT_RUN,
-		.vsel_mask = BUCK1_RUN_MASK,
-		.enable_reg = BD71837_REG_BUCK1_CTRL,
-		.enable_mask = BD71837_BUCK_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck2",
+			.of_match = of_match_ptr("BUCK2"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK2,
+			.ops = &bd718xx_dvs_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_DVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_dvs_buck_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_dvs_buck_volts),
+			.vsel_reg = BD718XX_REG_BUCK2_VOLT_RUN,
+			.vsel_mask = DVS_BUCK_RUN_MASK,
+			.enable_reg = BD718XX_REG_BUCK2_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_BUCK2_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "buck2",
-		.of_match = of_match_ptr("BUCK2"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_BUCK2,
-		.ops = &bd71837_buck1234_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_BUCK2_VOLTAGE_NUM,
-		.linear_ranges = bd71837_buck1234_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_buck1234_voltage_ranges),
-		.vsel_reg = BD71837_REG_BUCK2_VOLT_RUN,
-		.vsel_mask = BUCK2_RUN_MASK,
-		.enable_reg = BD71837_REG_BUCK2_CTRL,
-		.enable_mask = BD71837_BUCK_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck3",
+			.of_match = of_match_ptr("BUCK3"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK3,
+			.ops = &bd718xx_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_1ST_NODVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_1st_nodvs_buck_volts,
+			.n_linear_ranges =
+				ARRAY_SIZE(bd718xx_1st_nodvs_buck_volts),
+			.vsel_reg = BD718XX_REG_1ST_NODVS_BUCK_VOLT,
+			.vsel_mask = BD718XX_1ST_NODVS_BUCK_MASK,
+			.enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "buck3",
-		.of_match = of_match_ptr("BUCK3"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_BUCK3,
-		.ops = &bd71837_buck1234_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_BUCK3_VOLTAGE_NUM,
-		.linear_ranges = bd71837_buck1234_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_buck1234_voltage_ranges),
-		.vsel_reg = BD71837_REG_BUCK3_VOLT_RUN,
-		.vsel_mask = BUCK3_RUN_MASK,
-		.enable_reg = BD71837_REG_BUCK3_CTRL,
-		.enable_mask = BD71837_BUCK_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck4",
+			.of_match = of_match_ptr("BUCK4"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK4,
+			.ops = &bd718xx_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD71847_BUCK4_VOLTAGE_NUM,
+			.linear_ranges = bd71847_buck4_voltage_ranges,
+			.n_linear_ranges =
+				ARRAY_SIZE(bd71847_buck4_voltage_ranges),
+			.enable_reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL,
+			.vsel_reg = BD718XX_REG_2ND_NODVS_BUCK_VOLT,
+			.vsel_mask = BD71847_BUCK4_MASK,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "buck4",
-		.of_match = of_match_ptr("BUCK4"),
-		.regulators_node = of_match_ptr("regulators"),
-			.id = BD71837_BUCK4,
-		.ops = &bd71837_buck1234_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_BUCK4_VOLTAGE_NUM,
-		.linear_ranges = bd71837_buck1234_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_buck1234_voltage_ranges),
-		.vsel_reg = BD71837_REG_BUCK4_VOLT_RUN,
-		.vsel_mask = BUCK4_RUN_MASK,
-		.enable_reg = BD71837_REG_BUCK4_CTRL,
-		.enable_mask = BD71837_BUCK_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck5",
+			.of_match = of_match_ptr("BUCK5"),
+				.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK5,
+			.ops = &bd718xx_buck_regulator_nolinear_ops,
+			.type = REGULATOR_VOLTAGE,
+			.volt_table = &bd718xx_3rd_nodvs_buck_volts[0],
+			.n_voltages = ARRAY_SIZE(bd718xx_3rd_nodvs_buck_volts),
+			.vsel_reg = BD718XX_REG_3RD_NODVS_BUCK_VOLT,
+			.vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK,
+			.enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "buck5",
-		.of_match = of_match_ptr("BUCK5"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_BUCK5,
-		.ops = &bd71837_buck_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_BUCK5_VOLTAGE_NUM,
-		.linear_ranges = bd71837_buck5_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_buck5_voltage_ranges),
-		.vsel_reg = BD71837_REG_BUCK5_VOLT,
-		.vsel_mask = BUCK5_MASK,
-		.enable_reg = BD71837_REG_BUCK5_CTRL,
-		.enable_mask = BD71837_BUCK_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck6",
+			.of_match = of_match_ptr("BUCK6"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK6,
+			.ops = &bd718xx_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD71837_4TH_NODVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_4th_nodvs_buck_volts,
+			.n_linear_ranges =
+				ARRAY_SIZE(bd718xx_4th_nodvs_buck_volts),
+			.vsel_reg = BD718XX_REG_4TH_NODVS_BUCK_VOLT,
+			.vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK,
+			.enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "buck6",
-		.of_match = of_match_ptr("BUCK6"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_BUCK6,
-		.ops = &bd71837_buck_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_BUCK6_VOLTAGE_NUM,
-		.linear_ranges = bd71837_buck6_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_buck6_voltage_ranges),
-		.vsel_reg = BD71837_REG_BUCK6_VOLT,
-		.vsel_mask = BUCK6_MASK,
-		.enable_reg = BD71837_REG_BUCK6_CTRL,
-		.enable_mask = BD71837_BUCK_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "ldo1",
+			.of_match = of_match_ptr("LDO1"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO1,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO1_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo1_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo1_volts),
+			.vsel_reg = BD718XX_REG_LDO1_VOLT,
+			.vsel_mask = BD718XX_LDO1_MASK,
+			.enable_reg = BD718XX_REG_LDO1_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO1_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
 	},
 	{
-		.name = "buck7",
-		.of_match = of_match_ptr("BUCK7"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_BUCK7,
-		.ops = &bd71837_buck_regulator_nolinear_ops,
-		.type = REGULATOR_VOLTAGE,
-		.volt_table = &buck_7_volts[0],
-		.n_voltages = ARRAY_SIZE(buck_7_volts),
-		.vsel_reg = BD71837_REG_BUCK7_VOLT,
-		.vsel_mask = BUCK7_MASK,
-		.enable_reg = BD71837_REG_BUCK7_CTRL,
-		.enable_mask = BD71837_BUCK_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "ldo2",
+			.of_match = of_match_ptr("LDO2"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO2,
+			.ops = &bd718xx_ldo_regulator_nolinear_ops,
+			.type = REGULATOR_VOLTAGE,
+			.volt_table = &ldo_2_volts[0],
+			.vsel_reg = BD718XX_REG_LDO2_VOLT,
+			.vsel_mask = BD718XX_LDO2_MASK,
+			.n_voltages = ARRAY_SIZE(ldo_2_volts),
+			.enable_reg = BD718XX_REG_LDO2_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO2_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
 	},
 	{
-		.name = "buck8",
-		.of_match = of_match_ptr("BUCK8"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_BUCK8,
-		.ops = &bd71837_buck_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_BUCK8_VOLTAGE_NUM,
-		.linear_ranges = bd71837_buck8_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_buck8_voltage_ranges),
-		.vsel_reg = BD71837_REG_BUCK8_VOLT,
-		.vsel_mask = BUCK8_MASK,
-		.enable_reg = BD71837_REG_BUCK8_CTRL,
-		.enable_mask = BD71837_BUCK_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "ldo3",
+			.of_match = of_match_ptr("LDO3"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO3,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO3_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo3_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo3_volts),
+			.vsel_reg = BD718XX_REG_LDO3_VOLT,
+			.vsel_mask = BD718XX_LDO3_MASK,
+			.enable_reg = BD718XX_REG_LDO3_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO3_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
 	},
 	{
-		.name = "ldo1",
-		.of_match = of_match_ptr("LDO1"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_LDO1,
-		.ops = &bd71837_ldo_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_LDO1_VOLTAGE_NUM,
-		.linear_ranges = bd71837_ldo1_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_ldo1_voltage_ranges),
-		.vsel_reg = BD71837_REG_LDO1_VOLT,
-		.vsel_mask = LDO1_MASK,
-		.enable_reg = BD71837_REG_LDO1_VOLT,
-		.enable_mask = BD71837_LDO_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "ldo4",
+			.of_match = of_match_ptr("LDO4"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO4,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO4_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo4_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo4_volts),
+			.vsel_reg = BD718XX_REG_LDO4_VOLT,
+			.vsel_mask = BD718XX_LDO4_MASK,
+			.enable_reg = BD718XX_REG_LDO4_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO4_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
 	},
 	{
-		.name = "ldo2",
-		.of_match = of_match_ptr("LDO2"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_LDO2,
-		.ops = &bd71837_ldo_regulator_nolinear_ops,
-		.type = REGULATOR_VOLTAGE,
-		.volt_table = &ldo_2_volts[0],
-		.n_voltages = ARRAY_SIZE(ldo_2_volts),
-		.vsel_reg = BD71837_REG_LDO2_VOLT,
-		.vsel_mask = LDO2_MASK,
-		.enable_reg = BD71837_REG_LDO2_VOLT,
-		.enable_mask = BD71837_LDO_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "ldo5",
+			.of_match = of_match_ptr("LDO5"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO5,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO5_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo5_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo5_volts),
+			.vsel_reg = BD718XX_REG_LDO5_VOLT,
+			.vsel_mask = BD71847_LDO5_MASK,
+			.enable_reg = BD718XX_REG_LDO5_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO5_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
 	},
 	{
-		.name = "ldo3",
-		.of_match = of_match_ptr("LDO3"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_LDO3,
-		.ops = &bd71837_ldo_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_LDO3_VOLTAGE_NUM,
-		.linear_ranges = bd71837_ldo3_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_ldo3_voltage_ranges),
-		.vsel_reg = BD71837_REG_LDO3_VOLT,
-		.vsel_mask = LDO3_MASK,
-		.enable_reg = BD71837_REG_LDO3_VOLT,
-		.enable_mask = BD71837_LDO_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "ldo6",
+			.of_match = of_match_ptr("LDO6"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO6,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO6_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo6_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo6_volts),
+			/* LDO6 is supplied by buck5 */
+			.supply_name = "buck5",
+			.vsel_reg = BD718XX_REG_LDO6_VOLT,
+			.vsel_mask = BD718XX_LDO6_MASK,
+			.enable_reg = BD718XX_REG_LDO6_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO6_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
+	},
+};
+
+static const struct bd718xx_regulator_data bd71837_regulators[] = {
+	{
+		.desc = {
+			.name = "buck1",
+			.of_match = of_match_ptr("BUCK1"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK1,
+			.ops = &bd718xx_dvs_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_DVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_dvs_buck_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_dvs_buck_volts),
+			.vsel_reg = BD718XX_REG_BUCK1_VOLT_RUN,
+			.vsel_mask = DVS_BUCK_RUN_MASK,
+			.enable_reg = BD718XX_REG_BUCK1_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_BUCK1_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
+	},
+	{
+		.desc = {
+			.name = "buck2",
+			.of_match = of_match_ptr("BUCK2"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK2,
+			.ops = &bd718xx_dvs_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_DVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_dvs_buck_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_dvs_buck_volts),
+			.vsel_reg = BD718XX_REG_BUCK2_VOLT_RUN,
+			.vsel_mask = DVS_BUCK_RUN_MASK,
+			.enable_reg = BD718XX_REG_BUCK2_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_BUCK2_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
+	},
+	{
+		.desc = {
+			.name = "buck3",
+			.of_match = of_match_ptr("BUCK3"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK3,
+			.ops = &bd718xx_dvs_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_DVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_dvs_buck_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_dvs_buck_volts),
+			.vsel_reg = BD71837_REG_BUCK3_VOLT_RUN,
+			.vsel_mask = DVS_BUCK_RUN_MASK,
+			.enable_reg = BD71837_REG_BUCK3_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD71837_REG_BUCK3_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
+	},
+	{
+		.desc = {
+			.name = "buck4",
+			.of_match = of_match_ptr("BUCK4"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK4,
+			.ops = &bd718xx_dvs_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_DVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_dvs_buck_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_dvs_buck_volts),
+			.vsel_reg = BD71837_REG_BUCK4_VOLT_RUN,
+			.vsel_mask = DVS_BUCK_RUN_MASK,
+			.enable_reg = BD71837_REG_BUCK4_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD71837_REG_BUCK4_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "ldo4",
-		.of_match = of_match_ptr("LDO4"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_LDO4,
-		.ops = &bd71837_ldo_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_LDO4_VOLTAGE_NUM,
-		.linear_ranges = bd71837_ldo4_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_ldo4_voltage_ranges),
-		.vsel_reg = BD71837_REG_LDO4_VOLT,
-		.vsel_mask = LDO4_MASK,
-		.enable_reg = BD71837_REG_LDO4_VOLT,
-		.enable_mask = BD71837_LDO_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck5",
+			.of_match = of_match_ptr("BUCK5"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK5,
+			.ops = &bd718xx_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_1ST_NODVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_1st_nodvs_buck_volts,
+			.n_linear_ranges =
+				ARRAY_SIZE(bd718xx_1st_nodvs_buck_volts),
+			.vsel_reg = BD718XX_REG_1ST_NODVS_BUCK_VOLT,
+			.vsel_mask = BD718XX_1ST_NODVS_BUCK_MASK,
+			.enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "ldo5",
-		.of_match = of_match_ptr("LDO5"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_LDO5,
-		.ops = &bd71837_ldo_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_LDO5_VOLTAGE_NUM,
-		.linear_ranges = bd71837_ldo5_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_ldo5_voltage_ranges),
-		/* LDO5 is supplied by buck6 */
-		.supply_name = "buck6",
-		.vsel_reg = BD71837_REG_LDO5_VOLT,
-		.vsel_mask = LDO5_MASK,
-		.enable_reg = BD71837_REG_LDO5_VOLT,
-		.enable_mask = BD71837_LDO_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck6",
+			.of_match = of_match_ptr("BUCK6"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK6,
+			.ops = &bd718xx_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD71837_BUCK6_VOLTAGE_NUM,
+			.linear_ranges = bd71837_buck6_voltage_ranges,
+			.n_linear_ranges =
+				ARRAY_SIZE(bd71837_buck6_voltage_ranges),
+			.vsel_reg = BD718XX_REG_2ND_NODVS_BUCK_VOLT,
+			.vsel_mask = BD71837_BUCK6_MASK,
+			.enable_reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "ldo6",
-		.of_match = of_match_ptr("LDO6"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_LDO6,
-		.ops = &bd71837_ldo_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_LDO6_VOLTAGE_NUM,
-		.linear_ranges = bd71837_ldo6_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_ldo6_voltage_ranges),
-		/* LDO6 is supplied by buck7 */
-		.supply_name = "buck7",
-		.vsel_reg = BD71837_REG_LDO6_VOLT,
-		.vsel_mask = LDO6_MASK,
-		.enable_reg = BD71837_REG_LDO6_VOLT,
-		.enable_mask = BD71837_LDO_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck7",
+			.of_match = of_match_ptr("BUCK7"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK7,
+			.ops = &bd718xx_buck_regulator_nolinear_ops,
+			.type = REGULATOR_VOLTAGE,
+			.volt_table = &bd718xx_3rd_nodvs_buck_volts[0],
+			.n_voltages = ARRAY_SIZE(bd718xx_3rd_nodvs_buck_volts),
+			.vsel_reg = BD718XX_REG_3RD_NODVS_BUCK_VOLT,
+			.vsel_mask = BD718XX_3RD_NODVS_BUCK_MASK,
+			.enable_reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_3RD_NODVS_BUCK_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
 	},
 	{
-		.name = "ldo7",
-		.of_match = of_match_ptr("LDO7"),
-		.regulators_node = of_match_ptr("regulators"),
-		.id = BD71837_LDO7,
-		.ops = &bd71837_ldo_regulator_ops,
-		.type = REGULATOR_VOLTAGE,
-		.n_voltages = BD71837_LDO7_VOLTAGE_NUM,
-		.linear_ranges = bd71837_ldo7_voltage_ranges,
-		.n_linear_ranges = ARRAY_SIZE(bd71837_ldo7_voltage_ranges),
-		.vsel_reg = BD71837_REG_LDO7_VOLT,
-		.vsel_mask = LDO7_MASK,
-		.enable_reg = BD71837_REG_LDO7_VOLT,
-		.enable_mask = BD71837_LDO_EN,
-		.owner = THIS_MODULE,
+		.desc = {
+			.name = "buck8",
+			.of_match = of_match_ptr("BUCK8"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_BUCK8,
+			.ops = &bd718xx_buck_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD71837_4TH_NODVS_BUCK_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_4th_nodvs_buck_volts,
+			.n_linear_ranges =
+				ARRAY_SIZE(bd718xx_4th_nodvs_buck_volts),
+			.vsel_reg = BD718XX_REG_4TH_NODVS_BUCK_VOLT,
+			.vsel_mask = BD718XX_4TH_NODVS_BUCK_MASK,
+			.enable_reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL,
+			.enable_mask = BD718XX_BUCK_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_4TH_NODVS_BUCK_CTRL,
+			.mask = BD718XX_BUCK_SEL,
+			.val = BD718XX_BUCK_SEL,
+		},
+	},
+	{
+		.desc = {
+			.name = "ldo1",
+			.of_match = of_match_ptr("LDO1"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO1,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO1_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo1_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo1_volts),
+			.vsel_reg = BD718XX_REG_LDO1_VOLT,
+			.vsel_mask = BD718XX_LDO1_MASK,
+			.enable_reg = BD718XX_REG_LDO1_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO1_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
+	},
+	{
+		.desc = {
+			.name = "ldo2",
+			.of_match = of_match_ptr("LDO2"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO2,
+			.ops = &bd718xx_ldo_regulator_nolinear_ops,
+			.type = REGULATOR_VOLTAGE,
+			.volt_table = &ldo_2_volts[0],
+			.vsel_reg = BD718XX_REG_LDO2_VOLT,
+			.vsel_mask = BD718XX_LDO2_MASK,
+			.n_voltages = ARRAY_SIZE(ldo_2_volts),
+			.enable_reg = BD718XX_REG_LDO2_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO2_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
+	},
+	{
+		.desc = {
+			.name = "ldo3",
+			.of_match = of_match_ptr("LDO3"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO3,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO3_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo3_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo3_volts),
+			.vsel_reg = BD718XX_REG_LDO3_VOLT,
+			.vsel_mask = BD718XX_LDO3_MASK,
+			.enable_reg = BD718XX_REG_LDO3_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO3_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
+	},
+	{
+		.desc = {
+			.name = "ldo4",
+			.of_match = of_match_ptr("LDO4"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO4,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO4_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo4_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo4_volts),
+			.vsel_reg = BD718XX_REG_LDO4_VOLT,
+			.vsel_mask = BD718XX_LDO4_MASK,
+			.enable_reg = BD718XX_REG_LDO4_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO4_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
+	},
+	{
+		.desc = {
+			.name = "ldo5",
+			.of_match = of_match_ptr("LDO5"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO5,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO5_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo5_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo5_volts),
+			/* LDO5 is supplied by buck6 */
+			.supply_name = "buck6",
+			.vsel_reg = BD718XX_REG_LDO5_VOLT,
+			.vsel_mask = BD71837_LDO5_MASK,
+			.enable_reg = BD718XX_REG_LDO5_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO5_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
+		.additional_inits = bd71837_ldo5_inits,
+		.additional_init_amnt = ARRAY_SIZE(bd71837_ldo5_inits),
+	},
+	{
+		.desc = {
+			.name = "ldo6",
+			.of_match = of_match_ptr("LDO6"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO6,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD718XX_LDO6_VOLTAGE_NUM,
+			.linear_ranges = bd718xx_ldo6_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo6_volts),
+			/* LDO6 is supplied by buck7 */
+			.supply_name = "buck7",
+			.vsel_reg = BD718XX_REG_LDO6_VOLT,
+			.vsel_mask = BD718XX_LDO6_MASK,
+			.enable_reg = BD718XX_REG_LDO6_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD718XX_REG_LDO6_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
+		.additional_inits = bd71837_ldo6_inits,
+		.additional_init_amnt = ARRAY_SIZE(bd71837_ldo6_inits),
+	},
+	{
+		.desc = {
+			.name = "ldo7",
+			.of_match = of_match_ptr("LDO7"),
+			.regulators_node = of_match_ptr("regulators"),
+			.id = BD718XX_LDO7,
+			.ops = &bd718xx_ldo_regulator_ops,
+			.type = REGULATOR_VOLTAGE,
+			.n_voltages = BD71837_LDO7_VOLTAGE_NUM,
+			.linear_ranges = bd71837_ldo7_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd71837_ldo7_volts),
+			.vsel_reg = BD71837_REG_LDO7_VOLT,
+			.vsel_mask = BD71837_LDO7_MASK,
+			.enable_reg = BD71837_REG_LDO7_VOLT,
+			.enable_mask = BD718XX_LDO_EN,
+			.owner = THIS_MODULE,
+		},
+		.init = {
+			.reg = BD71837_REG_LDO7_VOLT,
+			.mask = BD718XX_LDO_SEL,
+			.val = BD718XX_LDO_SEL,
+		},
 	},
 };
 
-struct reg_init {
-	unsigned int reg;
-	unsigned int mask;
+struct bd718xx_pmic_inits {
+	const struct bd718xx_regulator_data (*r_datas)[];
+	unsigned int r_amount;
 };
 
 static int bd71837_probe(struct platform_device *pdev)
 {
-	struct bd71837_pmic *pmic;
+	struct bd718xx_pmic *pmic;
 	struct regulator_config config = { 0 };
-	struct reg_init pmic_regulator_inits[] = {
-		{
-			.reg = BD71837_REG_BUCK1_CTRL,
-			.mask = BD71837_BUCK_SEL,
-		}, {
-			.reg = BD71837_REG_BUCK2_CTRL,
-			.mask = BD71837_BUCK_SEL,
-		}, {
-			.reg = BD71837_REG_BUCK3_CTRL,
-			.mask = BD71837_BUCK_SEL,
-		}, {
-			.reg = BD71837_REG_BUCK4_CTRL,
-			.mask = BD71837_BUCK_SEL,
-		}, {
-			.reg = BD71837_REG_BUCK5_CTRL,
-			.mask = BD71837_BUCK_SEL,
-		}, {
-			.reg = BD71837_REG_BUCK6_CTRL,
-			.mask = BD71837_BUCK_SEL,
-		}, {
-			.reg = BD71837_REG_BUCK7_CTRL,
-			.mask = BD71837_BUCK_SEL,
-		}, {
-			.reg = BD71837_REG_BUCK8_CTRL,
-			.mask = BD71837_BUCK_SEL,
-		}, {
-			.reg = BD71837_REG_LDO1_VOLT,
-			.mask = BD71837_LDO_SEL,
-		}, {
-			.reg = BD71837_REG_LDO2_VOLT,
-			.mask = BD71837_LDO_SEL,
-		}, {
-			.reg = BD71837_REG_LDO3_VOLT,
-			.mask = BD71837_LDO_SEL,
-		}, {
-			.reg = BD71837_REG_LDO4_VOLT,
-			.mask = BD71837_LDO_SEL,
-		}, {
-			.reg = BD71837_REG_LDO5_VOLT,
-			.mask = BD71837_LDO_SEL,
-		}, {
-			.reg = BD71837_REG_LDO6_VOLT,
-			.mask = BD71837_LDO_SEL,
-		}, {
-			.reg = BD71837_REG_LDO7_VOLT,
-			.mask = BD71837_LDO_SEL,
-		}
+	struct bd718xx_pmic_inits pmic_regulators[] = {
+		[BD718XX_TYPE_BD71837] = {
+			.r_datas = &bd71837_regulators,
+			.r_amount = ARRAY_SIZE(bd71837_regulators),
+		},
+		[BD718XX_TYPE_BD71847] = {
+			.r_datas = &bd71847_regulators,
+			.r_amount = ARRAY_SIZE(bd71847_regulators),
+		},
 	};
 
-	int i, err;
+	int i, j, err;
 
 	pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
 	if (!pmic)
 		return -ENOMEM;
 
-	memcpy(pmic->descs, bd71837_regulators, sizeof(pmic->descs));
-
 	pmic->pdev = pdev;
 	pmic->mfd = dev_get_drvdata(pdev->dev.parent);
 
@@ -555,44 +952,34 @@ static int bd71837_probe(struct platform_device *pdev)
 		err = -EINVAL;
 		goto err;
 	}
+	if (pmic->mfd->chip_type >= BD718XX_TYPE_AMOUNT ||
+	    !pmic_regulators[pmic->mfd->chip_type].r_datas) {
+		dev_err(&pdev->dev, "Unsupported chip type\n");
+		err = -EINVAL;
+		goto err;
+	}
+
 	platform_set_drvdata(pdev, pmic);
 
 	/* Register LOCK release */
-	err = regmap_update_bits(pmic->mfd->regmap, BD71837_REG_REGLOCK,
+	err = regmap_update_bits(pmic->mfd->regmap, BD718XX_REG_REGLOCK,
 				 (REGLOCK_PWRSEQ | REGLOCK_VREG), 0);
 	if (err) {
 		dev_err(&pmic->pdev->dev, "Failed to unlock PMIC (%d)\n", err);
 		goto err;
 	} else {
 		dev_dbg(&pmic->pdev->dev, "Unlocked lock register 0x%x\n",
-			BD71837_REG_REGLOCK);
-	}
-
-	/*
-	 * There is a HW quirk in BD71837. The shutdown sequence timings for
-	 * bucks/LDOs which are controlled via register interface are changed.
-	 * At PMIC poweroff the voltage for BUCK6/7 is cut immediately at the
-	 * beginning of shut-down sequence. As bucks 6 and 7 are parent
-	 * supplies for LDO5 and LDO6 - this causes LDO5/6 voltage
-	 * monitoring to errorneously detect under voltage and force PMIC to
-	 * emergency state instead of poweroff. In order to avoid this we
-	 * disable voltage monitoring for LDO5 and LDO6
-	 */
-	err = regmap_update_bits(pmic->mfd->regmap, BD718XX_REG_MVRFLTMASK2,
-				 BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80,
-				 BD718XX_LDO5_VRMON80 | BD718XX_LDO6_VRMON80);
-	if (err) {
-		dev_err(&pmic->pdev->dev,
-			"Failed to disable voltage monitoring\n");
-		goto err;
+			BD718XX_REG_REGLOCK);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(pmic_regulator_inits); i++) {
+	for (i = 0; i < pmic_regulators[pmic->mfd->chip_type].r_amount; i++) {
 
-		struct regulator_desc *desc;
+		const struct regulator_desc *desc;
 		struct regulator_dev *rdev;
+		const struct bd718xx_regulator_data *r;
 
-		desc = &pmic->descs[i];
+		r = &(*pmic_regulators[pmic->mfd->chip_type].r_datas)[i];
+		desc = &r->desc;
 
 		config.dev = pdev->dev.parent;
 		config.driver_data = pmic;
@@ -612,16 +999,26 @@ static int bd71837_probe(struct platform_device *pdev)
 		 * can now switch the control from PMIC state machine to the
 		 * register interface
 		 */
-		err = regmap_update_bits(pmic->mfd->regmap,
-					 pmic_regulator_inits[i].reg,
-					 pmic_regulator_inits[i].mask,
-					 0xFFFFFFFF);
+		err = regmap_update_bits(pmic->mfd->regmap, r->init.reg,
+					 r->init.mask, r->init.val);
 		if (err) {
 			dev_err(&pmic->pdev->dev,
 				"Failed to write BUCK/LDO SEL bit for (%s)\n",
 				desc->name);
 			goto err;
 		}
+		for (j = 0; j < r->additional_init_amnt; j++) {
+			err = regmap_update_bits(pmic->mfd->regmap,
+						 r->additional_inits[j].reg,
+						 r->additional_inits[j].mask,
+						 r->additional_inits[j].val);
+			if (err) {
+				dev_err(&pmic->pdev->dev,
+					"Buck (%s) initialization failed\n",
+					desc->name);
+				goto err;
+			}
+		}
 
 		pmic->rdev[i] = rdev;
 	}
@@ -632,7 +1029,7 @@ err:
 
 static struct platform_driver bd71837_regulator = {
 	.driver = {
-		.name = "bd71837-pmic",
+		.name = "bd718xx-pmic",
 	},
 	.probe = bd71837_probe,
 };
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
index e8338e5dc10b..625e3607e069 100644
--- a/include/linux/mfd/rohm-bd718x7.h
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -7,106 +7,125 @@
 #include <linux/regmap.h>
 
 enum {
-	BD71837_BUCK1	=	0,
-	BD71837_BUCK2,
-	BD71837_BUCK3,
-	BD71837_BUCK4,
-	BD71837_BUCK5,
-	BD71837_BUCK6,
-	BD71837_BUCK7,
-	BD71837_BUCK8,
-	BD71837_LDO1,
-	BD71837_LDO2,
-	BD71837_LDO3,
-	BD71837_LDO4,
-	BD71837_LDO5,
-	BD71837_LDO6,
-	BD71837_LDO7,
-	BD71837_REGULATOR_CNT,
+	BD718XX_TYPE_BD71837 = 0,
+	BD718XX_TYPE_BD71847,
+	BD718XX_TYPE_AMOUNT
 };
 
-#define BD71837_BUCK1_VOLTAGE_NUM	0x40
-#define BD71837_BUCK2_VOLTAGE_NUM	0x40
-#define BD71837_BUCK3_VOLTAGE_NUM	0x40
-#define BD71837_BUCK4_VOLTAGE_NUM	0x40
+enum {
+	BD718XX_BUCK1 = 0,
+	BD718XX_BUCK2,
+	BD718XX_BUCK3,
+	BD718XX_BUCK4,
+	BD718XX_BUCK5,
+	BD718XX_BUCK6,
+	BD718XX_BUCK7,
+	BD718XX_BUCK8,
+	BD718XX_LDO1,
+	BD718XX_LDO2,
+	BD718XX_LDO3,
+	BD718XX_LDO4,
+	BD718XX_LDO5,
+	BD718XX_LDO6,
+	BD718XX_LDO7,
+	BD718XX_REGULATOR_AMOUNT,
+};
+
+/* Common voltage configurations
+ *
+ * Note, we support only one range of voltages for each buck/LDO until we
+ * get pickable ranges support. (See range selection bits for BUCK5 and
+ * LDO1. On BD71847 also the second no DVS buck and LDO5)
+ */
+
+#define BD718XX_DVS_BUCK_VOLTAGE_NUM		0x3D
+#define BD718XX_1ST_NODVS_BUCK_VOLTAGE_NUM	0x08
+#define BD71837_4TH_NODVS_BUCK_VOLTAGE_NUM	0x3D
+
+#define BD718XX_LDO1_VOLTAGE_NUM	0x04
+#define BD718XX_LDO2_VOLTAGE_NUM	0x02
+#define BD718XX_LDO3_VOLTAGE_NUM	0x10
+#define BD718XX_LDO4_VOLTAGE_NUM	0x0A
+#define BD718XX_LDO5_VOLTAGE_NUM	0x10
+#define BD718XX_LDO6_VOLTAGE_NUM	0x0A
 
-#define BD71837_BUCK5_VOLTAGE_NUM	0x08
+/* BD71837 specific voltage configurations */
 #define BD71837_BUCK6_VOLTAGE_NUM	0x04
 #define BD71837_BUCK7_VOLTAGE_NUM	0x08
-#define BD71837_BUCK8_VOLTAGE_NUM	0x40
-
-#define BD71837_LDO1_VOLTAGE_NUM	0x04
-#define BD71837_LDO2_VOLTAGE_NUM	0x02
-#define BD71837_LDO3_VOLTAGE_NUM	0x10
-#define BD71837_LDO4_VOLTAGE_NUM	0x10
-#define BD71837_LDO5_VOLTAGE_NUM	0x10
-#define BD71837_LDO6_VOLTAGE_NUM	0x10
 #define BD71837_LDO7_VOLTAGE_NUM	0x10
 
+/* BD71847 specific voltage configurations */
+#define BD71847_BUCK4_VOLTAGE_NUM	0x04
+
+/* Registers specific to BD71837 */
 enum {
-	BD71837_REG_REV                = 0x00,
-	BD71837_REG_SWRESET            = 0x01,
-	BD71837_REG_I2C_DEV            = 0x02,
-	BD71837_REG_PWRCTRL0           = 0x03,
-	BD71837_REG_PWRCTRL1           = 0x04,
-	BD71837_REG_BUCK1_CTRL         = 0x05,
-	BD71837_REG_BUCK2_CTRL         = 0x06,
-	BD71837_REG_BUCK3_CTRL         = 0x07,
-	BD71837_REG_BUCK4_CTRL         = 0x08,
-	BD71837_REG_BUCK5_CTRL         = 0x09,
-	BD71837_REG_BUCK6_CTRL         = 0x0A,
-	BD71837_REG_BUCK7_CTRL         = 0x0B,
-	BD71837_REG_BUCK8_CTRL         = 0x0C,
-	BD71837_REG_BUCK1_VOLT_RUN     = 0x0D,
-	BD71837_REG_BUCK1_VOLT_IDLE    = 0x0E,
-	BD71837_REG_BUCK1_VOLT_SUSP    = 0x0F,
-	BD71837_REG_BUCK2_VOLT_RUN     = 0x10,
-	BD71837_REG_BUCK2_VOLT_IDLE    = 0x11,
-	BD71837_REG_BUCK3_VOLT_RUN     = 0x12,
-	BD71837_REG_BUCK4_VOLT_RUN     = 0x13,
-	BD71837_REG_BUCK5_VOLT         = 0x14,
-	BD71837_REG_BUCK6_VOLT         = 0x15,
-	BD71837_REG_BUCK7_VOLT         = 0x16,
-	BD71837_REG_BUCK8_VOLT         = 0x17,
-	BD71837_REG_LDO1_VOLT          = 0x18,
-	BD71837_REG_LDO2_VOLT          = 0x19,
-	BD71837_REG_LDO3_VOLT          = 0x1A,
-	BD71837_REG_LDO4_VOLT          = 0x1B,
-	BD71837_REG_LDO5_VOLT          = 0x1C,
-	BD71837_REG_LDO6_VOLT          = 0x1D,
-	BD71837_REG_LDO7_VOLT          = 0x1E,
-	BD71837_REG_TRANS_COND0        = 0x1F,
-	BD71837_REG_TRANS_COND1        = 0x20,
-	BD71837_REG_VRFAULTEN          = 0x21,
-	BD718XX_REG_MVRFLTMASK0        = 0x22,
-	BD718XX_REG_MVRFLTMASK1        = 0x23,
-	BD718XX_REG_MVRFLTMASK2        = 0x24,
-	BD71837_REG_RCVCFG             = 0x25,
-	BD71837_REG_RCVNUM             = 0x26,
-	BD71837_REG_PWRONCONFIG0       = 0x27,
-	BD71837_REG_PWRONCONFIG1       = 0x28,
-	BD71837_REG_RESETSRC           = 0x29,
-	BD71837_REG_MIRQ               = 0x2A,
-	BD71837_REG_IRQ                = 0x2B,
-	BD71837_REG_IN_MON             = 0x2C,
-	BD71837_REG_POW_STATE          = 0x2D,
-	BD71837_REG_OUT32K             = 0x2E,
-	BD71837_REG_REGLOCK            = 0x2F,
-	BD71837_REG_OTPVER             = 0xFF,
-	BD71837_MAX_REGISTER           = 0x100,
+	BD71837_REG_BUCK3_CTRL =	0x07,
+	BD71837_REG_BUCK4_CTRL =	0x08,
+	BD71837_REG_BUCK3_VOLT_RUN =	0x12,
+	BD71837_REG_BUCK4_VOLT_RUN =	0x13,
+	BD71837_REG_LDO7_VOLT =		0x1E,
+};
+
+/* Registers common for BD71837 and BD71847 */
+enum {
+	BD718XX_REG_REV =			0x00,
+	BD718XX_REG_SWRESET =			0x01,
+	BD718XX_REG_I2C_DEV =			0x02,
+	BD718XX_REG_PWRCTRL0 =			0x03,
+	BD718XX_REG_PWRCTRL1 =			0x04,
+	BD718XX_REG_BUCK1_CTRL =		0x05,
+	BD718XX_REG_BUCK2_CTRL =		0x06,
+	BD718XX_REG_1ST_NODVS_BUCK_CTRL =	0x09,
+	BD718XX_REG_2ND_NODVS_BUCK_CTRL =	0x0A,
+	BD718XX_REG_3RD_NODVS_BUCK_CTRL =	0x0B,
+	BD718XX_REG_4TH_NODVS_BUCK_CTRL =	0x0C,
+	BD718XX_REG_BUCK1_VOLT_RUN =		0x0D,
+	BD718XX_REG_BUCK1_VOLT_IDLE =		0x0E,
+	BD718XX_REG_BUCK1_VOLT_SUSP =		0x0F,
+	BD718XX_REG_BUCK2_VOLT_RUN =		0x10,
+	BD718XX_REG_BUCK2_VOLT_IDLE =		0x11,
+	BD718XX_REG_1ST_NODVS_BUCK_VOLT =	0x14,
+	BD718XX_REG_2ND_NODVS_BUCK_VOLT =	0x15,
+	BD718XX_REG_3RD_NODVS_BUCK_VOLT =	0x16,
+	BD718XX_REG_4TH_NODVS_BUCK_VOLT =	0x17,
+	BD718XX_REG_LDO1_VOLT =			0x18,
+	BD718XX_REG_LDO2_VOLT =			0x19,
+	BD718XX_REG_LDO3_VOLT =			0x1A,
+	BD718XX_REG_LDO4_VOLT =			0x1B,
+	BD718XX_REG_LDO5_VOLT =			0x1C,
+	BD718XX_REG_LDO6_VOLT =			0x1D,
+	BD718XX_REG_TRANS_COND0 =		0x1F,
+	BD718XX_REG_TRANS_COND1 =		0x20,
+	BD718XX_REG_VRFAULTEN =			0x21,
+	BD718XX_REG_MVRFLTMASK0 =		0x22,
+	BD718XX_REG_MVRFLTMASK1 =		0x23,
+	BD718XX_REG_MVRFLTMASK2 =		0x24,
+	BD718XX_REG_RCVCFG =			0x25,
+	BD718XX_REG_RCVNUM =			0x26,
+	BD718XX_REG_PWRONCONFIG0 =		0x27,
+	BD718XX_REG_PWRONCONFIG1 =		0x28,
+	BD718XX_REG_RESETSRC =			0x29,
+	BD718XX_REG_MIRQ =			0x2A,
+	BD718XX_REG_IRQ =			0x2B,
+	BD718XX_REG_IN_MON =			0x2C,
+	BD718XX_REG_POW_STATE =			0x2D,
+	BD718XX_REG_OUT32K =			0x2E,
+	BD718XX_REG_REGLOCK =			0x2F,
+	BD718XX_REG_OTPVER =			0xFF,
+	BD718XX_MAX_REGISTER =			0x100,
 };
 
 #define REGLOCK_PWRSEQ	0x1
 #define REGLOCK_VREG	0x10
 
 /* Generic BUCK control masks */
-#define BD71837_BUCK_SEL	0x02
-#define BD71837_BUCK_EN		0x01
-#define BD71837_BUCK_RUN_ON	0x04
+#define BD718XX_BUCK_SEL	0x02
+#define BD718XX_BUCK_EN		0x01
+#define BD718XX_BUCK_RUN_ON	0x04
 
 /* Generic LDO masks */
-#define BD71837_LDO_SEL		0x80
-#define BD71837_LDO_EN		0x40
+#define BD718XX_LDO_SEL		0x80
+#define BD718XX_LDO_EN		0x40
 
 /* BD71837 BUCK ramp rate CTRL reg bits */
 #define BUCK_RAMPRATE_MASK	0xC0
@@ -115,49 +134,27 @@ enum {
 #define BUCK_RAMPRATE_2P50MV	0x2
 #define BUCK_RAMPRATE_1P25MV	0x3
 
-/* BD71837_REG_BUCK1_VOLT_RUN bits */
-#define BUCK1_RUN_MASK		0x3F
-#define BUCK1_RUN_DEFAULT	0x14
-
-/* BD71837_REG_BUCK1_VOLT_SUSP bits */
-#define BUCK1_SUSP_MASK		0x3F
-#define BUCK1_SUSP_DEFAULT	0x14
+#define DVS_BUCK_RUN_MASK	0x3F
+#define DVS_BUCK_SUSP_MASK	0x3F
+#define DVS_BUCK_IDLE_MASK	0x3F
 
-/* BD71837_REG_BUCK1_VOLT_IDLE bits */
-#define BUCK1_IDLE_MASK		0x3F
-#define BUCK1_IDLE_DEFAULT	0x14
+#define BD718XX_1ST_NODVS_BUCK_MASK	0x07
+#define BD71847_BUCK4_MASK		0x03
+#define BD71837_BUCK6_MASK		0x03
+#define BD718XX_3RD_NODVS_BUCK_MASK	0x07
+#define BD718XX_4TH_NODVS_BUCK_MASK	0x3F
 
-/* BD71837_REG_BUCK2_VOLT_RUN bits */
-#define BUCK2_RUN_MASK		0x3F
-#define BUCK2_RUN_DEFAULT	0x1E
+#define BD718XX_LDO1_MASK		0x03
+#define BD718XX_LDO2_MASK		0x20
+#define BD718XX_LDO3_MASK		0x0F
+#define BD718XX_LDO4_MASK		0x0F
+#define BD718XX_LDO6_MASK		0x0F
 
-/* BD71837_REG_BUCK2_VOLT_IDLE bits */
-#define BUCK2_IDLE_MASK		0x3F
-#define BUCK2_IDLE_DEFAULT	0x14
+#define BD71837_LDO5_MASK		0x0F
+#define BD71847_LDO5_MASK		0x0F
 
-/* BD71837_REG_BUCK3_VOLT_RUN bits */
-#define BUCK3_RUN_MASK		0x3F
-#define BUCK3_RUN_DEFAULT	0x1E
+#define BD71837_LDO7_MASK		0x0F
 
-/* BD71837_REG_BUCK4_VOLT_RUN bits */
-#define BUCK4_RUN_MASK		0x3F
-#define BUCK4_RUN_DEFAULT	0x1E
-
-/* BD71837_REG_BUCK5_VOLT bits */
-#define BUCK5_MASK		0x07
-#define BUCK5_DEFAULT		0x02
-
-/* BD71837_REG_BUCK6_VOLT bits */
-#define BUCK6_MASK		0x03
-#define BUCK6_DEFAULT		0x03
-
-/* BD71837_REG_BUCK7_VOLT bits */
-#define BUCK7_MASK		0x07
-#define BUCK7_DEFAULT		0x03
-
-/* BD71837_REG_BUCK8_VOLT bits */
-#define BUCK8_MASK		0x3F
-#define BUCK8_DEFAULT		0x1E
 
 /* BD718XX Voltage monitoring masks */
 #define BD718XX_BUCK1_VRMON80           0x1
@@ -221,27 +218,6 @@ enum {
 #define BD71837_INT_ON_REQ_MASK		0x2
 #define BD71837_INT_STBY_REQ_MASK	0x1
 
-/* BD71837_REG_LDO1_VOLT bits */
-#define LDO1_MASK		0x03
-
-/* BD71837_REG_LDO1_VOLT bits */
-#define LDO2_MASK		0x20
-
-/* BD71837_REG_LDO3_VOLT bits */
-#define LDO3_MASK		0x0F
-
-/* BD71837_REG_LDO4_VOLT bits */
-#define LDO4_MASK		0x0F
-
-/* BD71837_REG_LDO5_VOLT bits */
-#define LDO5_MASK		0x0F
-
-/* BD71837_REG_LDO6_VOLT bits */
-#define LDO6_MASK		0x0F
-
-/* BD71837_REG_LDO7_VOLT bits */
-#define LDO7_MASK		0x0F
-
 /* Register write induced reset settings */
 
 /*
@@ -341,10 +317,11 @@ enum {
 	BD718XX_PWRBTN_LONG_PRESS_15S
 };
 
-struct bd71837_pmic;
-struct bd71837_clk;
+struct bd718xx_pmic;
+struct bd718xx_clk;
 
 struct bd71837 {
+	unsigned int chip_type;
 	struct device *dev;
 	struct regmap *regmap;
 	unsigned long int id;
@@ -352,8 +329,8 @@ struct bd71837 {
 	int chip_irq;
 	struct regmap_irq_chip_data *irq_data;
 
-	struct bd71837_pmic *pmic;
-	struct bd71837_clk *clk;
+	struct bd718xx_pmic *pmic;
+	struct bd718xx_clk *clk;
 };
 
 #endif /* __LINUX_MFD_BD71837_H__ */
-- 
cgit v1.2.3


From 18e4b55fbd2069cee51ef9660b35c65ec13bee6d Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Fri, 14 Sep 2018 11:31:36 +0300
Subject: regulator: Support regulators where voltage ranges are selectable

For example ROHM BD71837 and ROHM BD71847 Power management ICs have
regulators which provide multiple linear ranges. Ranges can be
selected by individual non contagious bit in vsel register. Add
regmap helper functions for selecting ranges.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c         |   5 +
 drivers/regulator/helpers.c      | 232 +++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/driver.h |  20 +++-
 3 files changed, 256 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 9577d8941846..4b1755a8ef00 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -2783,6 +2783,11 @@ static int regulator_map_voltage(struct regulator_dev *rdev, int min_uV,
 	if (desc->ops->list_voltage == regulator_list_voltage_linear_range)
 		return regulator_map_voltage_linear_range(rdev, min_uV, max_uV);
 
+	if (desc->ops->list_voltage ==
+		regulator_list_voltage_pickable_linear_range)
+		return regulator_map_voltage_pickable_linear_range(rdev,
+							min_uV, max_uV);
+
 	return regulator_map_voltage_iterate(rdev, min_uV, max_uV);
 }
 
diff --git a/drivers/regulator/helpers.c b/drivers/regulator/helpers.c
index 2ae7c3ac5940..d2b9fc359318 100644
--- a/drivers/regulator/helpers.c
+++ b/drivers/regulator/helpers.c
@@ -103,6 +103,128 @@ int regulator_disable_regmap(struct regulator_dev *rdev)
 }
 EXPORT_SYMBOL_GPL(regulator_disable_regmap);
 
+static int regulator_range_selector_to_index(struct regulator_dev *rdev,
+					     unsigned int rval)
+{
+	int i;
+
+	if (!rdev->desc->linear_range_selectors)
+		return -EINVAL;
+
+	rval &= rdev->desc->vsel_range_mask;
+
+	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
+		if (rdev->desc->linear_range_selectors[i] == rval)
+			return i;
+	}
+	return -EINVAL;
+}
+
+/**
+ * regulator_get_voltage_sel_pickable_regmap - pickable range get_voltage_sel
+ *
+ * @rdev: regulator to operate on
+ *
+ * Regulators that use regmap for their register I/O and use pickable
+ * ranges can set the vsel_reg, vsel_mask, vsel_range_reg and vsel_range_mask
+ * fields in their descriptor and then use this as their get_voltage_vsel
+ * operation, saving some code.
+ */
+int regulator_get_voltage_sel_pickable_regmap(struct regulator_dev *rdev)
+{
+	unsigned int r_val;
+	int range;
+	unsigned int val;
+	int ret, i;
+	unsigned int voltages_in_range = 0;
+
+	if (!rdev->desc->linear_ranges)
+		return -EINVAL;
+
+	ret = regmap_read(rdev->regmap, rdev->desc->vsel_reg, &val);
+	if (ret != 0)
+		return ret;
+
+	ret = regmap_read(rdev->regmap, rdev->desc->vsel_range_reg, &r_val);
+	if (ret != 0)
+		return ret;
+
+	val &= rdev->desc->vsel_mask;
+	val >>= ffs(rdev->desc->vsel_mask) - 1;
+
+	range = regulator_range_selector_to_index(rdev, r_val);
+	if (range < 0)
+		return -EINVAL;
+
+	for (i = 0; i < range; i++)
+		voltages_in_range += (rdev->desc->linear_ranges[i].max_sel -
+				     rdev->desc->linear_ranges[i].min_sel) + 1;
+
+	return val + voltages_in_range;
+}
+EXPORT_SYMBOL_GPL(regulator_get_voltage_sel_pickable_regmap);
+
+/**
+ * regulator_set_voltage_sel_pickable_regmap - pickable range set_voltage_sel
+ *
+ * @rdev: regulator to operate on
+ * @sel: Selector to set
+ *
+ * Regulators that use regmap for their register I/O and use pickable
+ * ranges can set the vsel_reg, vsel_mask, vsel_range_reg and vsel_range_mask
+ * fields in their descriptor and then use this as their set_voltage_vsel
+ * operation, saving some code.
+ */
+int regulator_set_voltage_sel_pickable_regmap(struct regulator_dev *rdev,
+					      unsigned int sel)
+{
+	unsigned int range;
+	int ret, i;
+	unsigned int voltages_in_range = 0;
+
+	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
+		voltages_in_range = (rdev->desc->linear_ranges[i].max_sel -
+				     rdev->desc->linear_ranges[i].min_sel) + 1;
+		if (sel < voltages_in_range)
+			break;
+		sel -= voltages_in_range;
+	}
+
+	if (i == rdev->desc->n_linear_ranges)
+		return -EINVAL;
+
+	sel <<= ffs(rdev->desc->vsel_mask) - 1;
+	sel += rdev->desc->linear_ranges[i].min_sel;
+
+	range = rdev->desc->linear_range_selectors[i];
+
+	if (rdev->desc->vsel_reg == rdev->desc->vsel_range_reg) {
+		ret = regmap_update_bits(rdev->regmap,
+					 rdev->desc->vsel_reg,
+					 rdev->desc->vsel_range_mask |
+					 rdev->desc->vsel_mask, sel | range);
+	} else {
+		ret = regmap_update_bits(rdev->regmap,
+					 rdev->desc->vsel_range_reg,
+					 rdev->desc->vsel_range_mask, range);
+		if (ret)
+			return ret;
+
+		ret = regmap_update_bits(rdev->regmap, rdev->desc->vsel_reg,
+				  rdev->desc->vsel_mask, sel);
+	}
+
+	if (ret)
+		return ret;
+
+	if (rdev->desc->apply_bit)
+		ret = regmap_update_bits(rdev->regmap, rdev->desc->apply_reg,
+					 rdev->desc->apply_bit,
+					 rdev->desc->apply_bit);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_set_voltage_sel_pickable_regmap);
+
 /**
  * regulator_get_voltage_sel_regmap - standard get_voltage_sel for regmap users
  *
@@ -336,6 +458,76 @@ int regulator_map_voltage_linear_range(struct regulator_dev *rdev,
 }
 EXPORT_SYMBOL_GPL(regulator_map_voltage_linear_range);
 
+/**
+ * regulator_map_voltage_pickable_linear_range - map_voltage, pickable ranges
+ *
+ * @rdev: Regulator to operate on
+ * @min_uV: Lower bound for voltage
+ * @max_uV: Upper bound for voltage
+ *
+ * Drivers providing pickable linear_ranges in their descriptor can use
+ * this as their map_voltage() callback.
+ */
+int regulator_map_voltage_pickable_linear_range(struct regulator_dev *rdev,
+						int min_uV, int max_uV)
+{
+	const struct regulator_linear_range *range;
+	int ret = -EINVAL;
+	int voltage, i;
+	unsigned int selector = 0;
+
+	if (!rdev->desc->n_linear_ranges) {
+		BUG_ON(!rdev->desc->n_linear_ranges);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
+		int linear_max_uV;
+
+		range = &rdev->desc->linear_ranges[i];
+		linear_max_uV = range->min_uV +
+			(range->max_sel - range->min_sel) * range->uV_step;
+
+		if (!(min_uV <= linear_max_uV && max_uV >= range->min_uV)) {
+			selector += (range->max_sel - range->min_sel + 1);
+			continue;
+		}
+
+		if (min_uV <= range->min_uV)
+			min_uV = range->min_uV;
+
+		/* range->uV_step == 0 means fixed voltage range */
+		if (range->uV_step == 0) {
+			ret = 0;
+		} else {
+			ret = DIV_ROUND_UP(min_uV - range->min_uV,
+					   range->uV_step);
+			if (ret < 0)
+				return ret;
+		}
+
+		ret += selector;
+
+		voltage = rdev->desc->ops->list_voltage(rdev, ret);
+
+		/*
+		 * Map back into a voltage to verify we're still in bounds.
+		 * We may have overlapping voltage ranges. Hence we don't
+		 * exit but retry until we have checked all ranges.
+		 */
+		if (voltage < min_uV || voltage > max_uV)
+			selector += (range->max_sel - range->min_sel + 1);
+		else
+			break;
+	}
+
+	if (i == rdev->desc->n_linear_ranges)
+		return -EINVAL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regulator_map_voltage_pickable_linear_range);
+
 /**
  * regulator_list_voltage_linear - List voltages with simple calculation
  *
@@ -360,6 +552,46 @@ int regulator_list_voltage_linear(struct regulator_dev *rdev,
 }
 EXPORT_SYMBOL_GPL(regulator_list_voltage_linear);
 
+/**
+ * regulator_list_voltage_pickable_linear_range - pickable range list voltages
+ *
+ * @rdev: Regulator device
+ * @selector: Selector to convert into a voltage
+ *
+ * list_voltage() operation, intended to be used by drivers utilizing pickable
+ * ranges helpers.
+ */
+int regulator_list_voltage_pickable_linear_range(struct regulator_dev *rdev,
+						 unsigned int selector)
+{
+	const struct regulator_linear_range *range;
+	int i;
+	unsigned int all_sels = 0;
+
+	if (!rdev->desc->n_linear_ranges) {
+		BUG_ON(!rdev->desc->n_linear_ranges);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < rdev->desc->n_linear_ranges; i++) {
+		unsigned int sels_in_range;
+
+		range = &rdev->desc->linear_ranges[i];
+
+		sels_in_range = range->max_sel - range->min_sel;
+
+		if (all_sels + sels_in_range >= selector) {
+			selector -= all_sels;
+			return range->min_uV + (range->uV_step * selector);
+		}
+
+		all_sels += (sels_in_range + 1);
+	}
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(regulator_list_voltage_pickable_linear_range);
+
 /**
  * regulator_list_voltage_linear_range - List voltages for linear ranges
  *
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 0fd8fbb74763..a9c030192147 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -271,9 +271,16 @@ enum regulator_type {
  * @ramp_delay: Time to settle down after voltage change (unit: uV/us)
  * @min_dropout_uV: The minimum dropout voltage this regulator can handle
  * @linear_ranges: A constant table of possible voltage ranges.
- * @n_linear_ranges: Number of entries in the @linear_ranges table.
+ * @linear_range_selectors: A constant table of voltage range selectors.
+ *			    If pickable ranges are used each range must
+ *			    have corresponding selector here.
+ * @n_linear_ranges: Number of entries in the @linear_ranges (and in
+ *		     linear_range_selectors if used) table(s).
  * @volt_table: Voltage mapping table (if table based mapping)
  *
+ * @vsel_range_reg: Register for range selector when using pickable ranges
+ *		    and regulator_regmap_X_voltage_X_pickable functions.
+ * @vsel_range_mask: Mask for register bitfield used for range selector
  * @vsel_reg: Register for selector when using regulator_regmap_X_voltage_
  * @vsel_mask: Mask for register bitfield used for selector
  * @csel_reg: Register for TPS65218 LS3 current regulator
@@ -338,10 +345,14 @@ struct regulator_desc {
 	int min_dropout_uV;
 
 	const struct regulator_linear_range *linear_ranges;
+	const unsigned int *linear_range_selectors;
+
 	int n_linear_ranges;
 
 	const unsigned int *volt_table;
 
+	unsigned int vsel_range_reg;
+	unsigned int vsel_range_mask;
 	unsigned int vsel_reg;
 	unsigned int vsel_mask;
 	unsigned int csel_reg;
@@ -498,18 +509,25 @@ int regulator_mode_to_status(unsigned int);
 
 int regulator_list_voltage_linear(struct regulator_dev *rdev,
 				  unsigned int selector);
+int regulator_list_voltage_pickable_linear_range(struct regulator_dev *rdev,
+						   unsigned int selector);
 int regulator_list_voltage_linear_range(struct regulator_dev *rdev,
 					unsigned int selector);
 int regulator_list_voltage_table(struct regulator_dev *rdev,
 				  unsigned int selector);
 int regulator_map_voltage_linear(struct regulator_dev *rdev,
 				  int min_uV, int max_uV);
+int regulator_map_voltage_pickable_linear_range(struct regulator_dev *rdev,
+						  int min_uV, int max_uV);
 int regulator_map_voltage_linear_range(struct regulator_dev *rdev,
 				       int min_uV, int max_uV);
 int regulator_map_voltage_iterate(struct regulator_dev *rdev,
 				  int min_uV, int max_uV);
 int regulator_map_voltage_ascend(struct regulator_dev *rdev,
 				  int min_uV, int max_uV);
+int regulator_get_voltage_sel_pickable_regmap(struct regulator_dev *rdev);
+int regulator_set_voltage_sel_pickable_regmap(struct regulator_dev *rdev,
+						unsigned int sel);
 int regulator_get_voltage_sel_regmap(struct regulator_dev *rdev);
 int regulator_set_voltage_sel_regmap(struct regulator_dev *rdev, unsigned sel);
 int regulator_is_enabled_regmap(struct regulator_dev *rdev);
-- 
cgit v1.2.3


From dd2be639f4a918b335818bf22a937956e552b957 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Fri, 14 Sep 2018 11:32:26 +0300
Subject: regulator/mfd: bd718xx: rename bd71837/bd71847 common instances

Rename parts of code that support both BD71837 and BD71847 to BD718XX.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/mfd/rohm-bd718x7.c            | 98 +++++++++++++++++------------------
 drivers/regulator/bd71837-regulator.c | 26 +++++-----
 include/linux/mfd/rohm-bd718x7.h      | 64 +++++++++++------------
 3 files changed, 94 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/rohm-bd718x7.c b/drivers/mfd/rohm-bd718x7.c
index ce5aa5cd3545..161c8aac6d86 100644
--- a/drivers/mfd/rohm-bd718x7.c
+++ b/drivers/mfd/rohm-bd718x7.c
@@ -2,9 +2,9 @@
 //
 // Copyright (C) 2018 ROHM Semiconductors
 //
-// ROHM BD71837MWV PMIC driver
+// ROHM BD71837MWV and BD71847MWV PMIC driver
 //
-// Datasheet available from
+// Datasheet for BD71837MWV available from
 // https://www.rohm.com/datasheet/BD71837MWV/bd71837mwv-e
 
 #include <linux/gpio_keys.h>
@@ -30,7 +30,7 @@ static struct gpio_keys_platform_data bd718xx_powerkey_data = {
 	.name = "bd718xx-pwrkey",
 };
 
-static struct mfd_cell bd71837_mfd_cells[] = {
+static struct mfd_cell bd718xx_mfd_cells[] = {
 	{
 		.name = "gpio-keys",
 		.platform_data = &bd718xx_powerkey_data,
@@ -40,20 +40,20 @@ static struct mfd_cell bd71837_mfd_cells[] = {
 	{ .name = "bd718xx-pmic", },
 };
 
-static const struct regmap_irq bd71837_irqs[] = {
-	REGMAP_IRQ_REG(BD71837_INT_SWRST, 0, BD71837_INT_SWRST_MASK),
-	REGMAP_IRQ_REG(BD71837_INT_PWRBTN_S, 0, BD71837_INT_PWRBTN_S_MASK),
-	REGMAP_IRQ_REG(BD71837_INT_PWRBTN_L, 0, BD71837_INT_PWRBTN_L_MASK),
-	REGMAP_IRQ_REG(BD71837_INT_PWRBTN, 0, BD71837_INT_PWRBTN_MASK),
-	REGMAP_IRQ_REG(BD71837_INT_WDOG, 0, BD71837_INT_WDOG_MASK),
-	REGMAP_IRQ_REG(BD71837_INT_ON_REQ, 0, BD71837_INT_ON_REQ_MASK),
-	REGMAP_IRQ_REG(BD71837_INT_STBY_REQ, 0, BD71837_INT_STBY_REQ_MASK),
+static const struct regmap_irq bd718xx_irqs[] = {
+	REGMAP_IRQ_REG(BD718XX_INT_SWRST, 0, BD718XX_INT_SWRST_MASK),
+	REGMAP_IRQ_REG(BD718XX_INT_PWRBTN_S, 0, BD718XX_INT_PWRBTN_S_MASK),
+	REGMAP_IRQ_REG(BD718XX_INT_PWRBTN_L, 0, BD718XX_INT_PWRBTN_L_MASK),
+	REGMAP_IRQ_REG(BD718XX_INT_PWRBTN, 0, BD718XX_INT_PWRBTN_MASK),
+	REGMAP_IRQ_REG(BD718XX_INT_WDOG, 0, BD718XX_INT_WDOG_MASK),
+	REGMAP_IRQ_REG(BD718XX_INT_ON_REQ, 0, BD718XX_INT_ON_REQ_MASK),
+	REGMAP_IRQ_REG(BD718XX_INT_STBY_REQ, 0, BD718XX_INT_STBY_REQ_MASK),
 };
 
-static struct regmap_irq_chip bd71837_irq_chip = {
-	.name = "bd71837-irq",
-	.irqs = bd71837_irqs,
-	.num_irqs = ARRAY_SIZE(bd71837_irqs),
+static struct regmap_irq_chip bd718xx_irq_chip = {
+	.name = "bd718xx-irq",
+	.irqs = bd718xx_irqs,
+	.num_irqs = ARRAY_SIZE(bd718xx_irqs),
 	.num_regs = 1,
 	.irq_reg_stride = 1,
 	.status_base = BD718XX_REG_IRQ,
@@ -73,7 +73,7 @@ static const struct regmap_access_table volatile_regs = {
 	.n_yes_ranges = 1,
 };
 
-static const struct regmap_config bd71837_regmap_config = {
+static const struct regmap_config bd718xx_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 	.volatile_table = &volatile_regs,
@@ -81,10 +81,10 @@ static const struct regmap_config bd71837_regmap_config = {
 	.cache_type = REGCACHE_RBTREE,
 };
 
-static int bd71837_i2c_probe(struct i2c_client *i2c,
+static int bd718xx_i2c_probe(struct i2c_client *i2c,
 			    const struct i2c_device_id *id)
 {
-	struct bd71837 *bd71837;
+	struct bd718xx *bd718xx;
 	int ret;
 
 	if (!i2c->irq) {
@@ -92,33 +92,33 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 		return -EINVAL;
 	}
 
-	bd71837 = devm_kzalloc(&i2c->dev, sizeof(struct bd71837), GFP_KERNEL);
+	bd718xx = devm_kzalloc(&i2c->dev, sizeof(struct bd718xx), GFP_KERNEL);
 
-	if (!bd71837)
+	if (!bd718xx)
 		return -ENOMEM;
 
-	bd71837->chip_irq = i2c->irq;
-	bd71837->chip_type = (unsigned int)(uintptr_t)
+	bd718xx->chip_irq = i2c->irq;
+	bd718xx->chip_type = (unsigned int)
 				of_device_get_match_data(&i2c->dev);
-	bd71837->dev = &i2c->dev;
-	dev_set_drvdata(&i2c->dev, bd71837);
+	bd718xx->dev = &i2c->dev;
+	dev_set_drvdata(&i2c->dev, bd718xx);
 
-	bd71837->regmap = devm_regmap_init_i2c(i2c, &bd71837_regmap_config);
-	if (IS_ERR(bd71837->regmap)) {
+	bd718xx->regmap = devm_regmap_init_i2c(i2c, &bd718xx_regmap_config);
+	if (IS_ERR(bd718xx->regmap)) {
 		dev_err(&i2c->dev, "regmap initialization failed\n");
-		return PTR_ERR(bd71837->regmap);
+		return PTR_ERR(bd718xx->regmap);
 	}
 
-	ret = devm_regmap_add_irq_chip(&i2c->dev, bd71837->regmap,
-				       bd71837->chip_irq, IRQF_ONESHOT, 0,
-				       &bd71837_irq_chip, &bd71837->irq_data);
+	ret = devm_regmap_add_irq_chip(&i2c->dev, bd718xx->regmap,
+				       bd718xx->chip_irq, IRQF_ONESHOT, 0,
+				       &bd718xx_irq_chip, &bd718xx->irq_data);
 	if (ret) {
 		dev_err(&i2c->dev, "Failed to add irq_chip\n");
 		return ret;
 	}
 
 	/* Configure short press to 10 milliseconds */
-	ret = regmap_update_bits(bd71837->regmap,
+	ret = regmap_update_bits(bd718xx->regmap,
 				 BD718XX_REG_PWRONCONFIG0,
 				 BD718XX_PWRBTN_PRESS_DURATION_MASK,
 				 BD718XX_PWRBTN_SHORT_PRESS_10MS);
@@ -129,7 +129,7 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 	}
 
 	/* Configure long press to 10 seconds */
-	ret = regmap_update_bits(bd71837->regmap,
+	ret = regmap_update_bits(bd718xx->regmap,
 				 BD718XX_REG_PWRONCONFIG1,
 				 BD718XX_PWRBTN_PRESS_DURATION_MASK,
 				 BD718XX_PWRBTN_LONG_PRESS_10S);
@@ -140,7 +140,7 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 		return ret;
 	}
 
-	ret = regmap_irq_get_virq(bd71837->irq_data, BD71837_INT_PWRBTN_S);
+	ret = regmap_irq_get_virq(bd718xx->irq_data, BD718XX_INT_PWRBTN_S);
 
 	if (ret < 0) {
 		dev_err(&i2c->dev, "Failed to get the IRQ\n");
@@ -149,17 +149,17 @@ static int bd71837_i2c_probe(struct i2c_client *i2c,
 
 	button.irq = ret;
 
-	ret = devm_mfd_add_devices(bd71837->dev, PLATFORM_DEVID_AUTO,
-				   bd71837_mfd_cells,
-				   ARRAY_SIZE(bd71837_mfd_cells), NULL, 0,
-				   regmap_irq_get_domain(bd71837->irq_data));
+	ret = devm_mfd_add_devices(bd718xx->dev, PLATFORM_DEVID_AUTO,
+				   bd718xx_mfd_cells,
+				   ARRAY_SIZE(bd718xx_mfd_cells), NULL, 0,
+				   regmap_irq_get_domain(bd718xx->irq_data));
 	if (ret)
 		dev_err(&i2c->dev, "Failed to create subdevices\n");
 
 	return ret;
 }
 
-static const struct of_device_id bd71837_of_match[] = {
+static const struct of_device_id bd718xx_of_match[] = {
 	{
 		.compatible = "rohm,bd71837",
 		.data = (void *)BD718XX_TYPE_BD71837,
@@ -170,30 +170,30 @@ static const struct of_device_id bd71837_of_match[] = {
 	},
 	{ }
 };
-MODULE_DEVICE_TABLE(of, bd71837_of_match);
+MODULE_DEVICE_TABLE(of, bd718xx_of_match);
 
-static struct i2c_driver bd71837_i2c_driver = {
+static struct i2c_driver bd718xx_i2c_driver = {
 	.driver = {
 		.name = "rohm-bd718x7",
-		.of_match_table = bd71837_of_match,
+		.of_match_table = bd718xx_of_match,
 	},
-	.probe = bd71837_i2c_probe,
+	.probe = bd718xx_i2c_probe,
 };
 
-static int __init bd71837_i2c_init(void)
+static int __init bd718xx_i2c_init(void)
 {
-	return i2c_add_driver(&bd71837_i2c_driver);
+	return i2c_add_driver(&bd718xx_i2c_driver);
 }
 
 /* Initialise early so consumer devices can complete system boot */
-subsys_initcall(bd71837_i2c_init);
+subsys_initcall(bd718xx_i2c_init);
 
-static void __exit bd71837_i2c_exit(void)
+static void __exit bd718xx_i2c_exit(void)
 {
-	i2c_del_driver(&bd71837_i2c_driver);
+	i2c_del_driver(&bd718xx_i2c_driver);
 }
-module_exit(bd71837_i2c_exit);
+module_exit(bd718xx_i2c_exit);
 
 MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
-MODULE_DESCRIPTION("ROHM BD71837 Power Management IC driver");
+MODULE_DESCRIPTION("ROHM BD71837/BD71847 Power Management IC driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/regulator/bd71837-regulator.c b/drivers/regulator/bd71837-regulator.c
index 1dbba20ede3e..8e89334f94d1 100644
--- a/drivers/regulator/bd71837-regulator.c
+++ b/drivers/regulator/bd71837-regulator.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2018 ROHM Semiconductors
-// bd71837-regulator.c ROHM BD71837MWV regulator driver
+// bd71837-regulator.c ROHM BD71837MWV/BD71847MWV regulator driver
 
 #include <linux/delay.h>
 #include <linux/err.h>
@@ -17,7 +17,7 @@
 
 struct bd718xx_pmic {
 	struct bd718xx_regulator_data *rdata;
-	struct bd71837 *mfd;
+	struct bd718xx *mfd;
 	struct platform_device *pdev;
 	struct regulator_dev *rdev[BD718XX_REGULATOR_AMOUNT];
 };
@@ -30,11 +30,11 @@ struct bd718xx_pmic {
  * 10: 2.50mV/usec	10mV 4uS
  * 11: 1.25mV/usec	10mV 8uS
  */
-static int bd71837_buck1234_set_ramp_delay(struct regulator_dev *rdev,
+static int bd718xx_buck1234_set_ramp_delay(struct regulator_dev *rdev,
 					   int ramp_delay)
 {
 	struct bd718xx_pmic *pmic = rdev_get_drvdata(rdev);
-	struct bd71837 *mfd = pmic->mfd;
+	struct bd718xx *mfd = pmic->mfd;
 	int id = rdev->desc->id;
 	unsigned int ramp_value = BUCK_RAMPRATE_10P00MV;
 
@@ -124,7 +124,7 @@ static struct regulator_ops bd718xx_dvs_buck_regulator_ops = {
 	.set_voltage_sel = regulator_set_voltage_sel_regmap,
 	.get_voltage_sel = regulator_get_voltage_sel_regmap,
 	.set_voltage_time_sel = regulator_set_voltage_time_sel,
-	.set_ramp_delay = bd71837_buck1234_set_ramp_delay,
+	.set_ramp_delay = bd718xx_buck1234_set_ramp_delay,
 };
 
 /*
@@ -378,7 +378,7 @@ static const struct bd718xx_regulator_data bd71847_regulators[] = {
 		.desc = {
 			.name = "buck5",
 			.of_match = of_match_ptr("BUCK5"),
-				.regulators_node = of_match_ptr("regulators"),
+			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_BUCK5,
 			.ops = &bd718xx_buck_regulator_nolinear_ops,
 			.type = REGULATOR_VOLTAGE,
@@ -404,7 +404,7 @@ static const struct bd718xx_regulator_data bd71847_regulators[] = {
 			.id = BD718XX_BUCK6,
 			.ops = &bd718xx_buck_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
-			.n_voltages = BD71837_4TH_NODVS_BUCK_VOLTAGE_NUM,
+			.n_voltages = BD718XX_4TH_NODVS_BUCK_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_4th_nodvs_buck_volts,
 			.n_linear_ranges =
 				ARRAY_SIZE(bd718xx_4th_nodvs_buck_volts),
@@ -732,7 +732,7 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.id = BD718XX_BUCK8,
 			.ops = &bd718xx_buck_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
-			.n_voltages = BD71837_4TH_NODVS_BUCK_VOLTAGE_NUM,
+			.n_voltages = BD718XX_4TH_NODVS_BUCK_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_4th_nodvs_buck_volts,
 			.n_linear_ranges =
 				ARRAY_SIZE(bd718xx_4th_nodvs_buck_volts),
@@ -923,7 +923,7 @@ struct bd718xx_pmic_inits {
 	unsigned int r_amount;
 };
 
-static int bd71837_probe(struct platform_device *pdev)
+static int bd718xx_probe(struct platform_device *pdev)
 {
 	struct bd718xx_pmic *pmic;
 	struct regulator_config config = { 0 };
@@ -1027,15 +1027,15 @@ err:
 	return err;
 }
 
-static struct platform_driver bd71837_regulator = {
+static struct platform_driver bd718xx_regulator = {
 	.driver = {
 		.name = "bd718xx-pmic",
 	},
-	.probe = bd71837_probe,
+	.probe = bd718xx_probe,
 };
 
-module_platform_driver(bd71837_regulator);
+module_platform_driver(bd718xx_regulator);
 
 MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
-MODULE_DESCRIPTION("BD71837 voltage regulator driver");
+MODULE_DESCRIPTION("BD71837/BD71847 voltage regulator driver");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
index 625e3607e069..fed5fed75732 100644
--- a/include/linux/mfd/rohm-bd718x7.h
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0-or-later */
 /* Copyright (C) 2018 ROHM Semiconductors */
 
-#ifndef __LINUX_MFD_BD71837_H__
-#define __LINUX_MFD_BD71837_H__
+#ifndef __LINUX_MFD_BD718XX_H__
+#define __LINUX_MFD_BD718XX_H__
 
 #include <linux/regmap.h>
 
@@ -40,7 +40,7 @@ enum {
 
 #define BD718XX_DVS_BUCK_VOLTAGE_NUM		0x3D
 #define BD718XX_1ST_NODVS_BUCK_VOLTAGE_NUM	0x08
-#define BD71837_4TH_NODVS_BUCK_VOLTAGE_NUM	0x3D
+#define BD718XX_4TH_NODVS_BUCK_VOLTAGE_NUM	0x3D
 
 #define BD718XX_LDO1_VOLTAGE_NUM	0x04
 #define BD718XX_LDO2_VOLTAGE_NUM	0x02
@@ -183,7 +183,7 @@ enum {
 #define BD71837_BUCK4_VRMON130          0x80
 #define BD71837_LDO7_VRMON80            0x40
 
-/* BD71837_REG_IRQ bits */
+/* BD718XX_REG_IRQ bits */
 #define IRQ_SWRST		0x40
 #define IRQ_PWRON_S		0x20
 #define IRQ_PWRON_L		0x10
@@ -192,31 +192,31 @@ enum {
 #define IRQ_ON_REQ		0x02
 #define IRQ_STBY_REQ		0x01
 
-/* BD71837_REG_OUT32K bits */
-#define BD71837_OUT32K_EN	0x01
+/* BD718XX_REG_OUT32K bits */
+#define BD718XX_OUT32K_EN	0x01
 
-/* BD71837 gated clock rate */
-#define BD71837_CLK_RATE 32768
+/* BD7183XX gated clock rate */
+#define BD718XX_CLK_RATE 32768
 
-/* ROHM BD71837 irqs */
+/* ROHM BD718XX irqs */
 enum {
-	BD71837_INT_STBY_REQ,
-	BD71837_INT_ON_REQ,
-	BD71837_INT_WDOG,
-	BD71837_INT_PWRBTN,
-	BD71837_INT_PWRBTN_L,
-	BD71837_INT_PWRBTN_S,
-	BD71837_INT_SWRST
+	BD718XX_INT_STBY_REQ,
+	BD718XX_INT_ON_REQ,
+	BD718XX_INT_WDOG,
+	BD718XX_INT_PWRBTN,
+	BD718XX_INT_PWRBTN_L,
+	BD718XX_INT_PWRBTN_S,
+	BD718XX_INT_SWRST
 };
 
-/* ROHM BD71837 interrupt masks */
-#define BD71837_INT_SWRST_MASK		0x40
-#define BD71837_INT_PWRBTN_S_MASK	0x20
-#define BD71837_INT_PWRBTN_L_MASK	0x10
-#define BD71837_INT_PWRBTN_MASK		0x8
-#define BD71837_INT_WDOG_MASK		0x4
-#define BD71837_INT_ON_REQ_MASK		0x2
-#define BD71837_INT_STBY_REQ_MASK	0x1
+/* ROHM BD718XX interrupt masks */
+#define BD718XX_INT_SWRST_MASK		0x40
+#define BD718XX_INT_PWRBTN_S_MASK	0x20
+#define BD718XX_INT_PWRBTN_L_MASK	0x10
+#define BD718XX_INT_PWRBTN_MASK		0x8
+#define BD718XX_INT_WDOG_MASK		0x4
+#define BD718XX_INT_ON_REQ_MASK		0x2
+#define BD718XX_INT_STBY_REQ_MASK	0x1
 
 /* Register write induced reset settings */
 
@@ -226,13 +226,13 @@ enum {
  * write 1 to it we will trigger the action. So always write 0 to it when
  * changning SWRESET action - no matter what we read from it.
  */
-#define BD71837_SWRESET_TYPE_MASK	7
-#define BD71837_SWRESET_TYPE_DISABLED	0
-#define BD71837_SWRESET_TYPE_COLD	4
-#define BD71837_SWRESET_TYPE_WARM	6
+#define BD718XX_SWRESET_TYPE_MASK	7
+#define BD718XX_SWRESET_TYPE_DISABLED	0
+#define BD718XX_SWRESET_TYPE_COLD	4
+#define BD718XX_SWRESET_TYPE_WARM	6
 
-#define BD71837_SWRESET_RESET_MASK	1
-#define BD71837_SWRESET_RESET		1
+#define BD718XX_SWRESET_RESET_MASK	1
+#define BD718XX_SWRESET_RESET		1
 
 /* Poweroff state transition conditions */
 
@@ -320,7 +320,7 @@ enum {
 struct bd718xx_pmic;
 struct bd718xx_clk;
 
-struct bd71837 {
+struct bd718xx {
 	unsigned int chip_type;
 	struct device *dev;
 	struct regmap *regmap;
@@ -333,4 +333,4 @@ struct bd71837 {
 	struct bd718xx_clk *clk;
 };
 
-#endif /* __LINUX_MFD_BD71837_H__ */
+#endif /* __LINUX_MFD_BD718XX_H__ */
-- 
cgit v1.2.3


From a4bfc2c28a21f4d5274d813b20fd015a9dc9bcfa Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Fri, 14 Sep 2018 11:33:11 +0300
Subject: regulator: bd718XX use pickable ranges

Few regulators in BD71837 and BD71847 can output voltages from
different voltage ranges. Register interface is arranged so that
used range is selected by toggling bits which are not next to actual
voltage selection bits. Then the voltage inside selected range is
determined by voltage selection bits (as usual). Support BD71837
and BD71847 selectible range voltages using new pickable ranges
helpers.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd71837-regulator.c | 153 +++++++++++++++++++++++++++-------
 include/linux/mfd/rohm-bd718x7.h      |  32 +++----
 2 files changed, 143 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/bd71837-regulator.c b/drivers/regulator/bd71837-regulator.c
index 8e89334f94d1..d2522d4e1505 100644
--- a/drivers/regulator/bd71837-regulator.c
+++ b/drivers/regulator/bd71837-regulator.c
@@ -78,6 +78,34 @@ static int bd718xx_set_voltage_sel_restricted(struct regulator_dev *rdev,
 	return regulator_set_voltage_sel_regmap(rdev, sel);
 }
 
+static int bd718xx_set_voltage_sel_pickable_restricted(
+		struct regulator_dev *rdev, unsigned int sel)
+{
+	if (regulator_is_enabled_regmap(rdev))
+		return -EBUSY;
+
+	return regulator_set_voltage_sel_pickable_regmap(rdev, sel);
+}
+
+static struct regulator_ops bd718xx_pickable_range_ldo_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_pickable_linear_range,
+	.set_voltage_sel = bd718xx_set_voltage_sel_pickable_restricted,
+	.get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
+};
+
+static struct regulator_ops bd718xx_pickable_range_buck_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_pickable_linear_range,
+	.set_voltage_sel = bd718xx_set_voltage_sel_pickable_restricted,
+	.get_voltage_sel = regulator_get_voltage_sel_pickable_regmap,
+	.set_voltage_time_sel = regulator_set_voltage_time_sel,
+};
+
 static struct regulator_ops bd718xx_ldo_regulator_ops = {
 	.enable = regulator_enable_regmap,
 	.disable = regulator_disable_regmap,
@@ -139,24 +167,61 @@ static const struct regulator_linear_range bd718xx_dvs_buck_volts[] = {
 
 /*
  * BD71837 BUCK5
+ * 0.7V to 1.35V  (range 0)
+ * and
+ * 0.675 to 1.325 (range 1)
+ */
+static const struct regulator_linear_range bd71837_buck5_volts[] = {
+	/* Ranges when VOLT_SEL bit is 0 */
+	REGULATOR_LINEAR_RANGE(700000, 0x00, 0x03, 100000),
+	REGULATOR_LINEAR_RANGE(1050000, 0x04, 0x05, 50000),
+	REGULATOR_LINEAR_RANGE(1200000, 0x06, 0x07, 150000),
+	/* Ranges when VOLT_SEL bit is 1  */
+	REGULATOR_LINEAR_RANGE(675000, 0x0, 0x3, 100000),
+	REGULATOR_LINEAR_RANGE(1025000, 0x4, 0x5, 50000),
+	REGULATOR_LINEAR_RANGE(1175000, 0x6, 0x7, 150000),
+};
+
+/*
+ * Range selector for first 3 linear ranges is 0x0
+ * and 0x1 for last 3 ranges.
+ */
+static const unsigned int bd71837_buck5_volt_range_sel[] = {
+	0x0, 0x0, 0x0, 0x80, 0x80, 0x80
+};
+
+/*
  * BD71847 BUCK3
- * 0.7V to 1.35V ()
  */
-static const struct regulator_linear_range bd718xx_1st_nodvs_buck_volts[] = {
+static const struct regulator_linear_range bd71847_buck3_volts[] = {
+	/* Ranges when VOLT_SEL bits are 00 */
 	REGULATOR_LINEAR_RANGE(700000, 0x00, 0x03, 100000),
 	REGULATOR_LINEAR_RANGE(1050000, 0x04, 0x05, 50000),
 	REGULATOR_LINEAR_RANGE(1200000, 0x06, 0x07, 150000),
+	/* Ranges when VOLT_SEL bits are 01 */
+	REGULATOR_LINEAR_RANGE(550000, 0x0, 0x7, 50000),
+	/* Ranges when VOLT_SEL bits are 11 */
+	REGULATOR_LINEAR_RANGE(675000, 0x0, 0x3, 100000),
+	REGULATOR_LINEAR_RANGE(1025000, 0x4, 0x5, 50000),
+	REGULATOR_LINEAR_RANGE(1175000, 0x6, 0x7, 150000),
+};
+
+static const unsigned int bd71847_buck3_volt_range_sel[] = {
+	0x0, 0x0, 0x0, 0x40, 0x80, 0x80, 0x80
 };
 
-static const struct regulator_linear_range bd71847_buck4_voltage_ranges[] = {
+static const struct regulator_linear_range bd71847_buck4_volts[] = {
 	REGULATOR_LINEAR_RANGE(3000000, 0x00, 0x03, 100000),
+	REGULATOR_LINEAR_RANGE(2600000, 0x00, 0x03, 100000),
 };
 
+static const unsigned int bd71847_buck4_volt_range_sel[] = { 0x0, 0x40 };
+
 /*
  * BUCK6
  * 3.0V to 3.3V (step 100mV)
  */
-static const struct regulator_linear_range bd71837_buck6_voltage_ranges[] = {
+static const struct regulator_linear_range bd71837_buck6_volts[] = {
 	REGULATOR_LINEAR_RANGE(3000000, 0x00, 0x03, 100000),
 };
 
@@ -190,8 +255,11 @@ static const struct regulator_linear_range bd718xx_4th_nodvs_buck_volts[] = {
  */
 static const struct regulator_linear_range bd718xx_ldo1_volts[] = {
 	REGULATOR_LINEAR_RANGE(3000000, 0x00, 0x03, 100000),
+	REGULATOR_LINEAR_RANGE(1600000, 0x00, 0x03, 100000),
 };
 
+static const unsigned int bd718xx_ldo1_volt_range_sel[] = { 0x0, 0x20 };
+
 /*
  * LDO2
  * 0.8 or 0.9V
@@ -220,10 +288,21 @@ static const struct regulator_linear_range bd718xx_ldo4_volts[] = {
  * LDO5 for BD71837
  * 1.8 to 3.3V (100mV step)
  */
-static const struct regulator_linear_range bd718xx_ldo5_volts[] = {
+static const struct regulator_linear_range bd71837_ldo5_volts[] = {
 	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
 };
 
+/*
+ * LDO5 for BD71837
+ * 1.8 to 3.3V (100mV step)
+ */
+static const struct regulator_linear_range bd71847_ldo5_volts[] = {
+	REGULATOR_LINEAR_RANGE(1800000, 0x00, 0x0F, 100000),
+	REGULATOR_LINEAR_RANGE(800000, 0x00, 0x0F, 100000),
+};
+
+static const unsigned int bd71847_ldo5_volt_range_sel[] = { 0x0, 0x20 };
+
 /*
  * LDO6
  * 0.9 to 1.8V (100mV step)
@@ -332,14 +411,17 @@ static const struct bd718xx_regulator_data bd71847_regulators[] = {
 			.of_match = of_match_ptr("BUCK3"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_BUCK3,
-			.ops = &bd718xx_buck_regulator_ops,
+			.ops = &bd718xx_pickable_range_buck_ops,
 			.type = REGULATOR_VOLTAGE,
-			.n_voltages = BD718XX_1ST_NODVS_BUCK_VOLTAGE_NUM,
-			.linear_ranges = bd718xx_1st_nodvs_buck_volts,
+			.n_voltages = BD71847_BUCK3_VOLTAGE_NUM,
+			.linear_ranges = bd71847_buck3_volts,
 			.n_linear_ranges =
-				ARRAY_SIZE(bd718xx_1st_nodvs_buck_volts),
+				ARRAY_SIZE(bd71847_buck3_volts),
 			.vsel_reg = BD718XX_REG_1ST_NODVS_BUCK_VOLT,
 			.vsel_mask = BD718XX_1ST_NODVS_BUCK_MASK,
+			.vsel_range_reg = BD718XX_REG_1ST_NODVS_BUCK_VOLT,
+			.vsel_range_mask = BD71847_BUCK3_RANGE_MASK,
+			.linear_range_selectors = bd71847_buck3_volt_range_sel,
 			.enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
 			.owner = THIS_MODULE,
@@ -356,15 +438,18 @@ static const struct bd718xx_regulator_data bd71847_regulators[] = {
 			.of_match = of_match_ptr("BUCK4"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_BUCK4,
-			.ops = &bd718xx_buck_regulator_ops,
+			.ops = &bd718xx_pickable_range_buck_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD71847_BUCK4_VOLTAGE_NUM,
-			.linear_ranges = bd71847_buck4_voltage_ranges,
+			.linear_ranges = bd71847_buck4_volts,
 			.n_linear_ranges =
-				ARRAY_SIZE(bd71847_buck4_voltage_ranges),
+				ARRAY_SIZE(bd71847_buck4_volts),
 			.enable_reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL,
 			.vsel_reg = BD718XX_REG_2ND_NODVS_BUCK_VOLT,
 			.vsel_mask = BD71847_BUCK4_MASK,
+			.vsel_range_reg = BD718XX_REG_2ND_NODVS_BUCK_VOLT,
+			.vsel_range_mask = BD71847_BUCK4_RANGE_MASK,
+			.linear_range_selectors = bd71847_buck4_volt_range_sel,
 			.enable_mask = BD718XX_BUCK_EN,
 			.owner = THIS_MODULE,
 		},
@@ -426,13 +511,16 @@ static const struct bd718xx_regulator_data bd71847_regulators[] = {
 			.of_match = of_match_ptr("LDO1"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO1,
-			.ops = &bd718xx_ldo_regulator_ops,
+			.ops = &bd718xx_pickable_range_ldo_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD718XX_LDO1_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_ldo1_volts,
 			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo1_volts),
 			.vsel_reg = BD718XX_REG_LDO1_VOLT,
 			.vsel_mask = BD718XX_LDO1_MASK,
+			.vsel_range_reg = BD718XX_REG_LDO1_VOLT,
+			.vsel_range_mask = BD718XX_LDO1_RANGE_MASK,
+			.linear_range_selectors = bd718xx_ldo1_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO1_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
 			.owner = THIS_MODULE,
@@ -517,13 +605,16 @@ static const struct bd718xx_regulator_data bd71847_regulators[] = {
 			.of_match = of_match_ptr("LDO5"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO5,
-			.ops = &bd718xx_ldo_regulator_ops,
+			.ops = &bd718xx_pickable_range_ldo_ops,
 			.type = REGULATOR_VOLTAGE,
-			.n_voltages = BD718XX_LDO5_VOLTAGE_NUM,
-			.linear_ranges = bd718xx_ldo5_volts,
-			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo5_volts),
+			.n_voltages = BD71847_LDO5_VOLTAGE_NUM,
+			.linear_ranges = bd71847_ldo5_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd71847_ldo5_volts),
 			.vsel_reg = BD718XX_REG_LDO5_VOLT,
 			.vsel_mask = BD71847_LDO5_MASK,
+			.vsel_range_reg = BD718XX_REG_LDO5_VOLT,
+			.vsel_range_mask = BD71847_LDO5_RANGE_MASK,
+			.linear_range_selectors = bd71847_ldo5_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO5_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
 			.owner = THIS_MODULE,
@@ -660,14 +751,17 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("BUCK5"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_BUCK5,
-			.ops = &bd718xx_buck_regulator_ops,
+			.ops = &bd718xx_pickable_range_buck_ops,
 			.type = REGULATOR_VOLTAGE,
-			.n_voltages = BD718XX_1ST_NODVS_BUCK_VOLTAGE_NUM,
-			.linear_ranges = bd718xx_1st_nodvs_buck_volts,
+			.n_voltages = BD71837_BUCK5_VOLTAGE_NUM,
+			.linear_ranges = bd71837_buck5_volts,
 			.n_linear_ranges =
-				ARRAY_SIZE(bd718xx_1st_nodvs_buck_volts),
+				ARRAY_SIZE(bd71837_buck5_volts),
 			.vsel_reg = BD718XX_REG_1ST_NODVS_BUCK_VOLT,
-			.vsel_mask = BD718XX_1ST_NODVS_BUCK_MASK,
+			.vsel_mask = BD71837_BUCK5_MASK,
+			.vsel_range_reg = BD718XX_REG_1ST_NODVS_BUCK_VOLT,
+			.vsel_range_mask = BD71837_BUCK5_RANGE_MASK,
+			.linear_range_selectors = bd71837_buck5_volt_range_sel,
 			.enable_reg = BD718XX_REG_1ST_NODVS_BUCK_CTRL,
 			.enable_mask = BD718XX_BUCK_EN,
 			.owner = THIS_MODULE,
@@ -687,9 +781,9 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.ops = &bd718xx_buck_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD71837_BUCK6_VOLTAGE_NUM,
-			.linear_ranges = bd71837_buck6_voltage_ranges,
+			.linear_ranges = bd71837_buck6_volts,
 			.n_linear_ranges =
-				ARRAY_SIZE(bd71837_buck6_voltage_ranges),
+				ARRAY_SIZE(bd71837_buck6_volts),
 			.vsel_reg = BD718XX_REG_2ND_NODVS_BUCK_VOLT,
 			.vsel_mask = BD71837_BUCK6_MASK,
 			.enable_reg = BD718XX_REG_2ND_NODVS_BUCK_CTRL,
@@ -754,13 +848,16 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.of_match = of_match_ptr("LDO1"),
 			.regulators_node = of_match_ptr("regulators"),
 			.id = BD718XX_LDO1,
-			.ops = &bd718xx_ldo_regulator_ops,
+			.ops = &bd718xx_pickable_range_ldo_ops,
 			.type = REGULATOR_VOLTAGE,
 			.n_voltages = BD718XX_LDO1_VOLTAGE_NUM,
 			.linear_ranges = bd718xx_ldo1_volts,
 			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo1_volts),
 			.vsel_reg = BD718XX_REG_LDO1_VOLT,
 			.vsel_mask = BD718XX_LDO1_MASK,
+			.vsel_range_reg = BD718XX_REG_LDO1_VOLT,
+			.vsel_range_mask = BD718XX_LDO1_RANGE_MASK,
+			.linear_range_selectors = bd718xx_ldo1_volt_range_sel,
 			.enable_reg = BD718XX_REG_LDO1_VOLT,
 			.enable_mask = BD718XX_LDO_EN,
 			.owner = THIS_MODULE,
@@ -847,9 +944,9 @@ static const struct bd718xx_regulator_data bd71837_regulators[] = {
 			.id = BD718XX_LDO5,
 			.ops = &bd718xx_ldo_regulator_ops,
 			.type = REGULATOR_VOLTAGE,
-			.n_voltages = BD718XX_LDO5_VOLTAGE_NUM,
-			.linear_ranges = bd718xx_ldo5_volts,
-			.n_linear_ranges = ARRAY_SIZE(bd718xx_ldo5_volts),
+			.n_voltages = BD71837_LDO5_VOLTAGE_NUM,
+			.linear_ranges = bd71837_ldo5_volts,
+			.n_linear_ranges = ARRAY_SIZE(bd71837_ldo5_volts),
 			/* LDO5 is supplied by buck6 */
 			.supply_name = "buck6",
 			.vsel_reg = BD718XX_REG_LDO5_VOLT,
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
index fed5fed75732..26acf9a92498 100644
--- a/include/linux/mfd/rohm-bd718x7.h
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -31,31 +31,27 @@ enum {
 	BD718XX_REGULATOR_AMOUNT,
 };
 
-/* Common voltage configurations
- *
- * Note, we support only one range of voltages for each buck/LDO until we
- * get pickable ranges support. (See range selection bits for BUCK5 and
- * LDO1. On BD71847 also the second no DVS buck and LDO5)
- */
-
+/* Common voltage configurations */
 #define BD718XX_DVS_BUCK_VOLTAGE_NUM		0x3D
-#define BD718XX_1ST_NODVS_BUCK_VOLTAGE_NUM	0x08
 #define BD718XX_4TH_NODVS_BUCK_VOLTAGE_NUM	0x3D
 
-#define BD718XX_LDO1_VOLTAGE_NUM	0x04
+#define BD718XX_LDO1_VOLTAGE_NUM	0x08
 #define BD718XX_LDO2_VOLTAGE_NUM	0x02
 #define BD718XX_LDO3_VOLTAGE_NUM	0x10
 #define BD718XX_LDO4_VOLTAGE_NUM	0x0A
-#define BD718XX_LDO5_VOLTAGE_NUM	0x10
 #define BD718XX_LDO6_VOLTAGE_NUM	0x0A
 
 /* BD71837 specific voltage configurations */
+#define BD71837_BUCK5_VOLTAGE_NUM	0x10
 #define BD71837_BUCK6_VOLTAGE_NUM	0x04
 #define BD71837_BUCK7_VOLTAGE_NUM	0x08
+#define BD71837_LDO5_VOLTAGE_NUM	0x10
 #define BD71837_LDO7_VOLTAGE_NUM	0x10
 
 /* BD71847 specific voltage configurations */
-#define BD71847_BUCK4_VOLTAGE_NUM	0x04
+#define BD71847_BUCK3_VOLTAGE_NUM	0x18
+#define BD71847_BUCK4_VOLTAGE_NUM	0x08
+#define BD71847_LDO5_VOLTAGE_NUM	0x20
 
 /* Registers specific to BD71837 */
 enum {
@@ -139,12 +135,20 @@ enum {
 #define DVS_BUCK_IDLE_MASK	0x3F
 
 #define BD718XX_1ST_NODVS_BUCK_MASK	0x07
-#define BD71847_BUCK4_MASK		0x03
-#define BD71837_BUCK6_MASK		0x03
 #define BD718XX_3RD_NODVS_BUCK_MASK	0x07
 #define BD718XX_4TH_NODVS_BUCK_MASK	0x3F
 
+#define BD71847_BUCK3_MASK		0x07
+#define BD71847_BUCK3_RANGE_MASK	0xC0
+#define BD71847_BUCK4_MASK		0x03
+#define BD71847_BUCK4_RANGE_MASK	0x40
+
+#define BD71837_BUCK5_MASK		0x07
+#define BD71837_BUCK5_RANGE_MASK	0x80
+#define BD71837_BUCK6_MASK		0x03
+
 #define BD718XX_LDO1_MASK		0x03
+#define BD718XX_LDO1_RANGE_MASK		0x20
 #define BD718XX_LDO2_MASK		0x20
 #define BD718XX_LDO3_MASK		0x0F
 #define BD718XX_LDO4_MASK		0x0F
@@ -152,10 +156,10 @@ enum {
 
 #define BD71837_LDO5_MASK		0x0F
 #define BD71847_LDO5_MASK		0x0F
+#define BD71847_LDO5_RANGE_MASK		0x20
 
 #define BD71837_LDO7_MASK		0x0F
 
-
 /* BD718XX Voltage monitoring masks */
 #define BD718XX_BUCK1_VRMON80           0x1
 #define BD718XX_BUCK1_VRMON130          0x2
-- 
cgit v1.2.3


From fef912bf860e8e7e48a2bfb978a356bba743a8b7 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 28 Sep 2018 08:17:19 +0200
Subject: block: genhd: add 'groups' argument to device_add_disk

Update device_add_disk() to take an 'groups' argument so that
individual drivers can register a device with additional sysfs
attributes.
This avoids race condition the driver would otherwise have if these
groups were to be created with sysfs_add_groups().

Signed-off-by: Martin Wilck <martin.wilck@suse.com>
Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 arch/um/drivers/ubd_kern.c          |  2 +-
 block/genhd.c                       | 19 ++++++++++++++-----
 drivers/block/floppy.c              |  2 +-
 drivers/block/mtip32xx/mtip32xx.c   |  2 +-
 drivers/block/ps3disk.c             |  2 +-
 drivers/block/ps3vram.c             |  2 +-
 drivers/block/rsxx/dev.c            |  2 +-
 drivers/block/skd_main.c            |  2 +-
 drivers/block/sunvdc.c              |  2 +-
 drivers/block/virtio_blk.c          |  2 +-
 drivers/block/xen-blkfront.c        |  2 +-
 drivers/ide/ide-cd.c                |  2 +-
 drivers/ide/ide-gd.c                |  2 +-
 drivers/memstick/core/ms_block.c    |  2 +-
 drivers/memstick/core/mspro_block.c |  2 +-
 drivers/mmc/core/block.c            |  2 +-
 drivers/mtd/mtd_blkdevs.c           |  2 +-
 drivers/nvdimm/blk.c                |  2 +-
 drivers/nvdimm/btt.c                |  2 +-
 drivers/nvdimm/pmem.c               |  2 +-
 drivers/nvme/host/core.c            |  2 +-
 drivers/nvme/host/multipath.c       |  2 +-
 drivers/s390/block/dasd_genhd.c     |  2 +-
 drivers/s390/block/dcssblk.c        |  2 +-
 drivers/s390/block/scm_blk.c        |  2 +-
 drivers/scsi/sd.c                   |  2 +-
 drivers/scsi/sr.c                   |  2 +-
 include/linux/genhd.h               |  5 +++--
 28 files changed, 43 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 83c470364dfb..6ee4c56032f7 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -891,7 +891,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 
 	disk->private_data = &ubd_devs[unit];
 	disk->queue = ubd_devs[unit].queue;
-	device_add_disk(parent, disk);
+	device_add_disk(parent, disk, NULL);
 
 	*disk_out = disk;
 	return 0;
diff --git a/block/genhd.c b/block/genhd.c
index 8cc719a37b32..ef0936184d69 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -567,7 +567,8 @@ static int exact_lock(dev_t devt, void *data)
 	return 0;
 }
 
-static void register_disk(struct device *parent, struct gendisk *disk)
+static void register_disk(struct device *parent, struct gendisk *disk,
+			  const struct attribute_group **groups)
 {
 	struct device *ddev = disk_to_dev(disk);
 	struct block_device *bdev;
@@ -582,6 +583,10 @@ static void register_disk(struct device *parent, struct gendisk *disk)
 	/* delay uevents, until we scanned partition table */
 	dev_set_uevent_suppress(ddev, 1);
 
+	if (groups) {
+		WARN_ON(ddev->groups);
+		ddev->groups = groups;
+	}
 	if (device_add(ddev))
 		return;
 	if (!sysfs_deprecated) {
@@ -647,6 +652,7 @@ exit:
  * __device_add_disk - add disk information to kernel list
  * @parent: parent device for the disk
  * @disk: per-device partitioning information
+ * @groups: Additional per-device sysfs groups
  * @register_queue: register the queue if set to true
  *
  * This function registers the partitioning information in @disk
@@ -655,6 +661,7 @@ exit:
  * FIXME: error handling
  */
 static void __device_add_disk(struct device *parent, struct gendisk *disk,
+			      const struct attribute_group **groups,
 			      bool register_queue)
 {
 	dev_t devt;
@@ -698,7 +705,7 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 		blk_register_region(disk_devt(disk), disk->minors, NULL,
 				    exact_match, exact_lock, disk);
 	}
-	register_disk(parent, disk);
+	register_disk(parent, disk, groups);
 	if (register_queue)
 		blk_register_queue(disk);
 
@@ -712,15 +719,17 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
 	blk_integrity_add(disk);
 }
 
-void device_add_disk(struct device *parent, struct gendisk *disk)
+void device_add_disk(struct device *parent, struct gendisk *disk,
+		     const struct attribute_group **groups)
+
 {
-	__device_add_disk(parent, disk, true);
+	__device_add_disk(parent, disk, groups, true);
 }
 EXPORT_SYMBOL(device_add_disk);
 
 void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk)
 {
-	__device_add_disk(parent, disk, false);
+	__device_add_disk(parent, disk, NULL, false);
 }
 EXPORT_SYMBOL(device_add_disk_no_queue_reg);
 
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 48f622728ce6..1bc99e9dfaee 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4676,7 +4676,7 @@ static int __init do_floppy_init(void)
 		/* to be cleaned up... */
 		disks[drive]->private_data = (void *)(long)drive;
 		disks[drive]->flags |= GENHD_FL_REMOVABLE;
-		device_add_disk(&floppy_device[drive].dev, disks[drive]);
+		device_add_disk(&floppy_device[drive].dev, disks[drive], NULL);
 	}
 
 	return 0;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index d0666f5ce003..1d7d48d8a205 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3861,7 +3861,7 @@ skip_create_disk:
 	set_capacity(dd->disk, capacity);
 
 	/* Enable the block device and add it to /dev */
-	device_add_disk(&dd->pdev->dev, dd->disk);
+	device_add_disk(&dd->pdev->dev, dd->disk, NULL);
 
 	dd->bdev = bdget_disk(dd->disk, 0);
 	/*
diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c
index afe1508d82c6..29a4419e8ba3 100644
--- a/drivers/block/ps3disk.c
+++ b/drivers/block/ps3disk.c
@@ -500,7 +500,7 @@ static int ps3disk_probe(struct ps3_system_bus_device *_dev)
 		 gendisk->disk_name, priv->model, priv->raw_capacity >> 11,
 		 get_capacity(gendisk) >> 11);
 
-	device_add_disk(&dev->sbd.core, gendisk);
+	device_add_disk(&dev->sbd.core, gendisk, NULL);
 	return 0;
 
 fail_cleanup_queue:
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index 1e3d5de9d838..c0c50816a10b 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -769,7 +769,7 @@ static int ps3vram_probe(struct ps3_system_bus_device *dev)
 	dev_info(&dev->core, "%s: Using %lu MiB of GPU memory\n",
 		 gendisk->disk_name, get_capacity(gendisk) >> 11);
 
-	device_add_disk(&dev->core, gendisk);
+	device_add_disk(&dev->core, gendisk, NULL);
 	return 0;
 
 fail_cleanup_queue:
diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c
index 1a92f9e65937..3894aa0f350b 100644
--- a/drivers/block/rsxx/dev.c
+++ b/drivers/block/rsxx/dev.c
@@ -226,7 +226,7 @@ int rsxx_attach_dev(struct rsxx_cardinfo *card)
 			set_capacity(card->gendisk, card->size8 >> 9);
 		else
 			set_capacity(card->gendisk, 0);
-		device_add_disk(CARD_TO_DEV(card), card->gendisk);
+		device_add_disk(CARD_TO_DEV(card), card->gendisk, NULL);
 		card->bdev_attached = 1;
 	}
 
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 87b9e7fbf062..a85c9a622c41 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -3104,7 +3104,7 @@ static int skd_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 static int skd_bdev_attach(struct device *parent, struct skd_device *skdev)
 {
 	dev_dbg(&skdev->pdev->dev, "add_disk\n");
-	device_add_disk(parent, skdev->disk);
+	device_add_disk(parent, skdev->disk, NULL);
 	return 0;
 }
 
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index 5ca56bfae63c..09409edce384 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -850,7 +850,7 @@ static int probe_disk(struct vdc_port *port)
 	       port->vdisk_size, (port->vdisk_size >> (20 - 9)),
 	       port->vio.ver.major, port->vio.ver.minor);
 
-	device_add_disk(&port->vio.vdev->dev, g);
+	device_add_disk(&port->vio.vdev->dev, g, NULL);
 
 	return 0;
 }
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 23752dc99b00..fe80560000a1 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -780,7 +780,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 	virtblk_update_capacity(vblk, false);
 	virtio_device_ready(vdev);
 
-	device_add_disk(&vdev->dev, vblk->disk);
+	device_add_disk(&vdev->dev, vblk->disk, NULL);
 	err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
 	if (err)
 		goto out_del_disk;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a71d817e900d..e5e40272d233 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2420,7 +2420,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	for (i = 0; i < info->nr_rings; i++)
 		kick_pending_request_queues(&info->rinfo[i]);
 
-	device_add_disk(&info->xbdev->dev, info->gd);
+	device_add_disk(&info->xbdev->dev, info->gd, NULL);
 
 	info->is_ready = 1;
 	return;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 44a7a255ef74..f9b59d41813f 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1784,7 +1784,7 @@ static int ide_cd_probe(ide_drive_t *drive)
 	ide_cd_read_toc(drive);
 	g->fops = &idecd_ops;
 	g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
-	device_add_disk(&drive->gendev, g);
+	device_add_disk(&drive->gendev, g, NULL);
 	return 0;
 
 out_free_disk:
diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c
index e823394ed543..04e008e8f6f9 100644
--- a/drivers/ide/ide-gd.c
+++ b/drivers/ide/ide-gd.c
@@ -416,7 +416,7 @@ static int ide_gd_probe(ide_drive_t *drive)
 	if (drive->dev_flags & IDE_DFLAG_REMOVABLE)
 		g->flags = GENHD_FL_REMOVABLE;
 	g->fops = &ide_gd_ops;
-	device_add_disk(&drive->gendev, g);
+	device_add_disk(&drive->gendev, g, NULL);
 	return 0;
 
 out_free_disk:
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
index 716fc8ed31d3..8a02f11076f9 100644
--- a/drivers/memstick/core/ms_block.c
+++ b/drivers/memstick/core/ms_block.c
@@ -2146,7 +2146,7 @@ static int msb_init_disk(struct memstick_dev *card)
 		set_disk_ro(msb->disk, 1);
 
 	msb_start(card);
-	device_add_disk(&card->dev, msb->disk);
+	device_add_disk(&card->dev, msb->disk, NULL);
 	dbg("Disk added");
 	return 0;
 
diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c
index 5ee932631fae..0cd30dcb6801 100644
--- a/drivers/memstick/core/mspro_block.c
+++ b/drivers/memstick/core/mspro_block.c
@@ -1236,7 +1236,7 @@ static int mspro_block_init_disk(struct memstick_dev *card)
 	set_capacity(msb->disk, capacity);
 	dev_dbg(&card->dev, "capacity set %ld\n", capacity);
 
-	device_add_disk(&card->dev, msb->disk);
+	device_add_disk(&card->dev, msb->disk, NULL);
 	msb->active = 1;
 	return 0;
 
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index a0b9102c4c6e..de8e1a8be690 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2698,7 +2698,7 @@ static int mmc_add_disk(struct mmc_blk_data *md)
 	int ret;
 	struct mmc_card *card = md->queue.card;
 
-	device_add_disk(md->parent, md->disk);
+	device_add_disk(md->parent, md->disk, NULL);
 	md->force_ro.show = force_ro_show;
 	md->force_ro.store = force_ro_store;
 	sysfs_attr_init(&md->force_ro.attr);
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 29c0bfd74e8a..6a41dfa3c36b 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -447,7 +447,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	if (new->readonly)
 		set_disk_ro(gd, 1);
 
-	device_add_disk(&new->mtd->dev, gd);
+	device_add_disk(&new->mtd->dev, gd, NULL);
 
 	if (new->disk_attributes) {
 		ret = sysfs_create_group(&disk_to_dev(gd)->kobj,
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 62e9cb167aad..db45c6bbb7bb 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -290,7 +290,7 @@ static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
 	}
 
 	set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
-	device_add_disk(dev, disk);
+	device_add_disk(dev, disk, NULL);
 	revalidate_disk(disk);
 	return 0;
 }
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 0360c015f658..b123b0dcf274 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -1556,7 +1556,7 @@ static int btt_blk_init(struct btt *btt)
 		}
 	}
 	set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
-	device_add_disk(&btt->nd_btt->dev, btt->btt_disk);
+	device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
 	btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
 	revalidate_disk(btt->btt_disk);
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 6071e2942053..a75d10c23d80 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -474,7 +474,7 @@ static int pmem_attach_disk(struct device *dev,
 	gendev = disk_to_dev(disk);
 	gendev->groups = pmem_attribute_groups;
 
-	device_add_disk(dev, disk);
+	device_add_disk(dev, disk, NULL);
 	if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
 		return -ENOMEM;
 
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dd8ec1dd9219..0e824e8c8fd7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3099,7 +3099,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 
 	nvme_get_ctrl(ctrl);
 
-	device_add_disk(ctrl->device, ns->disk);
+	device_add_disk(ctrl->device, ns->disk, NULL);
 	if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
 					&nvme_ns_id_attr_group))
 		pr_warn("%s: failed to create sysfs group for identification\n",
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 5a9562881d4e..477af51d01e8 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -283,7 +283,7 @@ static void nvme_mpath_set_live(struct nvme_ns *ns)
 		return;
 
 	if (!(head->disk->flags & GENHD_FL_UP)) {
-		device_add_disk(&head->subsys->dev, head->disk);
+		device_add_disk(&head->subsys->dev, head->disk, NULL);
 		if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
 				&nvme_ns_id_attr_group))
 			dev_warn(&head->subsys->dev,
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 7036a6c6f86f..5542d9eadfe0 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -76,7 +76,7 @@ int dasd_gendisk_alloc(struct dasd_block *block)
 	gdp->queue = block->request_queue;
 	block->gdp = gdp;
 	set_capacity(block->gdp, 0);
-	device_add_disk(&base->cdev->dev, block->gdp);
+	device_add_disk(&base->cdev->dev, block->gdp, NULL);
 	return 0;
 }
 
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index 23e526cda5c1..4e8aedd50cb0 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -685,7 +685,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
 	}
 
 	get_device(&dev_info->dev);
-	device_add_disk(&dev_info->dev, dev_info->gd);
+	device_add_disk(&dev_info->dev, dev_info->gd, NULL);
 
 	switch (dev_info->segment_type) {
 		case SEG_TYPE_SR:
diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 98f66b7b6794..e01889394c84 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -500,7 +500,7 @@ int scm_blk_dev_setup(struct scm_blk_dev *bdev, struct scm_device *scmdev)
 
 	/* 512 byte sectors */
 	set_capacity(bdev->gendisk, scmdev->size >> 9);
-	device_add_disk(&scmdev->dev, bdev->gendisk);
+	device_add_disk(&scmdev->dev, bdev->gendisk, NULL);
 	return 0;
 
 out_queue:
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 64514e8359e4..67ed5906b462 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3272,7 +3272,7 @@ static void sd_probe_async(void *data, async_cookie_t cookie)
 	}
 
 	blk_pm_runtime_init(sdp->request_queue, dev);
-	device_add_disk(dev, gd);
+	device_add_disk(dev, gd, NULL);
 	if (sdkp->capacity)
 		sd_dif_config_host(sdkp);
 
diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index 4f07b3410595..54dd70ae9731 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -759,7 +759,7 @@ static int sr_probe(struct device *dev)
 
 	dev_set_drvdata(dev, cd);
 	disk->flags |= GENHD_FL_REMOVABLE;
-	device_add_disk(&sdev->sdev_gendev, disk);
+	device_add_disk(&sdev->sdev_gendev, disk, NULL);
 
 	sdev_printk(KERN_DEBUG, sdev,
 		    "Attached scsi CD-ROM %s\n", cd->cdi.name);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 57864422a2c8..0b820ff05839 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -399,10 +399,11 @@ static inline void free_part_info(struct hd_struct *part)
 extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part);
 
 /* block/genhd.c */
-extern void device_add_disk(struct device *parent, struct gendisk *disk);
+extern void device_add_disk(struct device *parent, struct gendisk *disk,
+			    const struct attribute_group **groups);
 static inline void add_disk(struct gendisk *disk)
 {
-	device_add_disk(NULL, disk);
+	device_add_disk(NULL, disk, NULL);
 }
 extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk);
 static inline void add_disk_no_queue_reg(struct gendisk *disk)
-- 
cgit v1.2.3


From d6112f8def514e019658bcc9b57d53acdb71ca3f Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Fri, 7 Sep 2018 09:16:51 +0300
Subject: PCI: Add support for Immediate Readiness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PCIe r4.0, sec 7.5.1.1.4 defines a new bit in the Status Register:

  Immediate Readiness – This optional bit, when Set, indicates the Function
  is guaranteed to be ready to successfully complete valid configuration
  accesses at any time following any reset that the host is capable of
  issuing Configuration Requests to this Function.

  When this bit is Set, for accesses to this Function, software is exempt
  from all requirements to delay configuration accesses following any type
  of reset, including but not limited to the timing requirements defined in
  Section 6.6.

This means that all delays after a Conventional or Function Reset can be
skipped.

This patch reads such bit and caches its value in a flag inside struct
pci_dev to be checked later if we should delay or can skip delays after a
reset.  While at that, also move the explicit msleep(100) call from
pcie_flr() and pci_af_flr() to pci_dev_wait().

Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
[bhelgaas: rename PCI_STATUS_IMMEDIATE to PCI_STATUS_IMM_READY]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c             | 13 ++++++++++++-
 include/linux/pci.h           |  1 +
 include/uapi/linux/pci_regs.h |  1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1835f3a7aa8d..ee7c2f4eef9b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -999,7 +999,7 @@ static void __pci_start_power_transition(struct pci_dev *dev, pci_power_t state)
 		 * because have already delayed for the bridge.
 		 */
 		if (dev->runtime_d3cold) {
-			if (dev->d3cold_delay)
+			if (dev->d3cold_delay && !dev->imm_ready)
 				msleep(dev->d3cold_delay);
 			/*
 			 * When powering on a bridge from D3cold, the
@@ -2644,6 +2644,7 @@ EXPORT_SYMBOL_GPL(pci_d3cold_disable);
 void pci_pm_init(struct pci_dev *dev)
 {
 	int pm;
+	u16 status;
 	u16 pmc;
 
 	pm_runtime_forbid(&dev->dev);
@@ -2706,6 +2707,10 @@ void pci_pm_init(struct pci_dev *dev)
 		/* Disable the PME# generation functionality */
 		pci_pme_active(dev, false);
 	}
+
+	pci_read_config_word(dev, PCI_STATUS, &status);
+	if (status & PCI_STATUS_IMM_READY)
+		dev->imm_ready = 1;
 }
 
 static unsigned long pci_ea_flags(struct pci_dev *dev, u8 prop)
@@ -4376,6 +4381,9 @@ int pcie_flr(struct pci_dev *dev)
 
 	pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
 
+	if (dev->imm_ready)
+		return 0;
+
 	/*
 	 * Per PCIe r4.0, sec 6.6.2, a device must complete an FLR within
 	 * 100ms, but may silently discard requests while the FLR is in
@@ -4417,6 +4425,9 @@ static int pci_af_flr(struct pci_dev *dev, int probe)
 
 	pci_write_config_byte(dev, pos + PCI_AF_CTRL, PCI_AF_CTRL_FLR);
 
+	if (dev->imm_ready)
+		return 0;
+
 	/*
 	 * Per Advanced Capabilities for Conventional PCI ECN, 13 April 2006,
 	 * updated 27 July 2006; a device must complete an FLR within
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6925828f9f25..60da5d7d4310 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -325,6 +325,7 @@ struct pci_dev {
 	pci_power_t	current_state;	/* Current operating state. In ACPI,
 					   this is D0-D3, D0 being fully
 					   functional, and D3 being off. */
+	unsigned int	imm_ready:1;	/* Supports Immediate Readiness */
 	u8		pm_cap;		/* PM capability offset */
 	unsigned int	pme_support:5;	/* Bitmask of states from which PME#
 					   can be generated */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index ee556ccc93f4..e1e9888c85e6 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -52,6 +52,7 @@
 #define  PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */
 
 #define PCI_STATUS		0x06	/* 16 bits */
+#define  PCI_STATUS_IMM_READY	0x01	/* Immediate Readiness */
 #define  PCI_STATUS_INTERRUPT	0x08	/* Interrupt status */
 #define  PCI_STATUS_CAP_LIST	0x10	/* Support Capability List */
 #define  PCI_STATUS_66MHZ	0x20	/* Support 66 MHz PCI 2.1 bus */
-- 
cgit v1.2.3


From f1f207e43b8a49ac2ee3c36a64de1f84651c6079 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Wed, 22 Aug 2018 15:04:40 -0500
Subject: of: Add cpu node iterator for_each_of_cpu_node()

Iterating thru cpu nodes is a common pattern. Create a common iterator
which can find child nodes either by node name or device_type == cpu.
Using the former will allow for eventually dropping device_type
properties which are deprecated for FDT.

Cc: Frank Rowand <frowand.list@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/base.c  | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/of.h | 11 +++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index a055cd1ef96d..4807db0a35b3 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -741,6 +741,45 @@ struct device_node *of_get_next_available_child(const struct device_node *node,
 }
 EXPORT_SYMBOL(of_get_next_available_child);
 
+/**
+ *	of_get_next_cpu_node - Iterate on cpu nodes
+ *	@prev:	previous child of the /cpus node, or NULL to get first
+ *
+ *	Returns a cpu node pointer with refcount incremented, use of_node_put()
+ *	on it when done. Returns NULL when prev is the last child. Decrements
+ *	the refcount of prev.
+ */
+struct device_node *of_get_next_cpu_node(struct device_node *prev)
+{
+	struct device_node *next = NULL;
+	unsigned long flags;
+	struct device_node *node;
+
+	if (!prev)
+		node = of_find_node_by_path("/cpus");
+
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	if (prev)
+		next = prev->sibling;
+	else if (node) {
+		next = node->child;
+		of_node_put(node);
+	}
+	for (; next; next = next->sibling) {
+		if (!(of_node_name_eq(next, "cpu") ||
+		      (next->type && !of_node_cmp(next->type, "cpu"))))
+			continue;
+		if (!__of_device_is_available(next))
+			continue;
+		if (of_node_get(next))
+			break;
+	}
+	of_node_put(prev);
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+	return next;
+}
+EXPORT_SYMBOL(of_get_next_cpu_node);
+
 /**
  * of_get_compatible_child - Find compatible child node
  * @parent:	parent node
diff --git a/include/linux/of.h b/include/linux/of.h
index 99b0ebf49632..1aca0dbd35df 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -353,6 +353,8 @@ extern const void *of_get_property(const struct device_node *node,
 				const char *name,
 				int *lenp);
 extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
+extern struct device_node *of_get_next_cpu_node(struct device_node *prev);
+
 #define for_each_property_of_node(dn, pp) \
 	for (pp = dn->properties; pp != NULL; pp = pp->next)
 
@@ -754,6 +756,11 @@ static inline struct device_node *of_get_cpu_node(int cpu,
 	return NULL;
 }
 
+static inline struct device_node *of_get_next_cpu_node(struct device_node *prev)
+{
+	return NULL;
+}
+
 static inline int of_n_addr_cells(struct device_node *np)
 {
 	return 0;
@@ -1217,6 +1224,10 @@ static inline int of_property_read_s32(const struct device_node *np,
 	for (child = of_get_next_available_child(parent, NULL); child != NULL; \
 	     child = of_get_next_available_child(parent, child))
 
+#define for_each_of_cpu_node(cpu) \
+	for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \
+	     cpu = of_get_next_cpu_node(cpu))
+
 #define for_each_node_with_property(dn, prop_name) \
 	for (dn = of_find_node_with_property(NULL, prop_name); dn; \
 	     dn = of_find_node_with_property(dn, prop_name))
-- 
cgit v1.2.3


From 9e288cefcc551c7b5b04f8abc7099d3451a70f5f Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 25 Sep 2018 09:34:05 +0200
Subject: clk: renesas: Convert to SPDX identifiers

This patch updates license to use SPDX-License-Identifier
instead of verbose license text.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
[rebased against clk-spdx]
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/renesas/Kconfig             |  2 ++
 drivers/clk/renesas/clk-emev2.c         | 14 +-------------
 drivers/clk/renesas/clk-mstp.c          |  5 +----
 drivers/clk/renesas/clk-r8a73a4.c       |  5 +----
 drivers/clk/renesas/clk-r8a7740.c       |  5 +----
 drivers/clk/renesas/clk-r8a7778.c       |  5 +----
 drivers/clk/renesas/clk-r8a7779.c       |  5 +----
 drivers/clk/renesas/clk-rcar-gen2.c     |  5 +----
 drivers/clk/renesas/clk-rz.c            |  5 +----
 drivers/clk/renesas/clk-sh73a0.c        |  5 +----
 drivers/clk/renesas/r8a7743-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7745-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7790-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7791-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7792-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a7794-cpg-mssr.c  |  5 +----
 drivers/clk/renesas/r8a77970-cpg-mssr.c |  5 +----
 drivers/clk/renesas/rcar-gen2-cpg.c     |  5 +----
 drivers/clk/renesas/rcar-gen2-cpg.h     |  7 ++-----
 drivers/clk/renesas/rcar-gen3-cpg.h     |  7 ++-----
 drivers/clk/renesas/renesas-cpg-mssr.h  |  4 ++--
 include/linux/clk/renesas.h             |  8 ++------
 22 files changed, 27 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/renesas/Kconfig b/drivers/clk/renesas/Kconfig
index 9022bbe1297e..ab2110f96225 100644
--- a/drivers/clk/renesas/Kconfig
+++ b/drivers/clk/renesas/Kconfig
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
 config CLK_RENESAS
 	bool "Renesas SoC clock support" if COMPILE_TEST && !ARCH_RENESAS
 	default y if ARCH_RENESAS
diff --git a/drivers/clk/renesas/clk-emev2.c b/drivers/clk/renesas/clk-emev2.c
index a91825471c79..7f3b5f9631bd 100644
--- a/drivers/clk/renesas/clk-emev2.c
+++ b/drivers/clk/renesas/clk-emev2.c
@@ -1,21 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * EMMA Mobile EV2 common clock framework support
  *
  * Copyright (C) 2013 Takashi Yoshii <takashi.yoshii.ze@renesas.com>
  * Copyright (C) 2012 Magnus Damm
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
 #include <linux/clk-provider.h>
 #include <linux/clkdev.h>
diff --git a/drivers/clk/renesas/clk-mstp.c b/drivers/clk/renesas/clk-mstp.c
index e82adcb16a52..7df14bb1cbbc 100644
--- a/drivers/clk/renesas/clk-mstp.c
+++ b/drivers/clk/renesas/clk-mstp.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * R-Car MSTP clocks
  *
@@ -5,10 +6,6 @@
  * Copyright (C) 2015 Glider bvba
  *
  * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk.h>
diff --git a/drivers/clk/renesas/clk-r8a73a4.c b/drivers/clk/renesas/clk-r8a73a4.c
index 7b903ce4c901..e1a9e89defdd 100644
--- a/drivers/clk/renesas/clk-r8a73a4.c
+++ b/drivers/clk/renesas/clk-r8a73a4.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a73a4 Core CPG Clocks
  *
  * Copyright (C) 2014  Ulrich Hecht
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-r8a7740.c b/drivers/clk/renesas/clk-r8a7740.c
index a7a30d2eca41..d68171263137 100644
--- a/drivers/clk/renesas/clk-r8a7740.c
+++ b/drivers/clk/renesas/clk-r8a7740.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7740 Core CPG Clocks
  *
  * Copyright (C) 2014  Ulrich Hecht
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-r8a7778.c b/drivers/clk/renesas/clk-r8a7778.c
index 886a8380e912..f986ba34661d 100644
--- a/drivers/clk/renesas/clk-r8a7778.c
+++ b/drivers/clk/renesas/clk-r8a7778.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7778 Core CPG Clocks
  *
  * Copyright (C) 2014  Ulrich Hecht
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-r8a7779.c b/drivers/clk/renesas/clk-r8a7779.c
index 5adcca4656c3..437d92cb0630 100644
--- a/drivers/clk/renesas/clk-r8a7779.c
+++ b/drivers/clk/renesas/clk-r8a7779.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7779 Core CPG Clocks
  *
  * Copyright (C) 2013, 2014 Horms Solutions Ltd.
  *
  * Contact: Simon Horman <horms@verge.net.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-rcar-gen2.c b/drivers/clk/renesas/clk-rcar-gen2.c
index bccd62f2cb09..738b723045ce 100644
--- a/drivers/clk/renesas/clk-rcar-gen2.c
+++ b/drivers/clk/renesas/clk-rcar-gen2.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * rcar_gen2 Core CPG Clocks
  *
  * Copyright (C) 2013  Ideas On Board SPRL
  *
  * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-rz.c b/drivers/clk/renesas/clk-rz.c
index ac2f86d626b6..7fe8729bf66c 100644
--- a/drivers/clk/renesas/clk-rz.c
+++ b/drivers/clk/renesas/clk-rz.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * RZ/A1 Core CPG Clocks
  *
  * Copyright (C) 2013 Ideas On Board SPRL
  * Copyright (C) 2014 Wolfram Sang, Sang Engineering <wsa@sang-engineering.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/clk-sh73a0.c b/drivers/clk/renesas/clk-sh73a0.c
index bab33610eb6c..576259237666 100644
--- a/drivers/clk/renesas/clk-sh73a0.c
+++ b/drivers/clk/renesas/clk-sh73a0.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sh73a0 Core CPG Clocks
  *
  * Copyright (C) 2014  Ulrich Hecht
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/clk-provider.h>
diff --git a/drivers/clk/renesas/r8a7743-cpg-mssr.c b/drivers/clk/renesas/r8a7743-cpg-mssr.c
index 011c170ec3f9..f880b9cb72e0 100644
--- a/drivers/clk/renesas/r8a7743-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7743-cpg-mssr.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7743 Clock Pulse Generator / Module Standby and Software Reset
  *
  * Copyright (C) 2016 Cogent Embedded Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation; of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7745-cpg-mssr.c b/drivers/clk/renesas/r8a7745-cpg-mssr.c
index 4b0a9243b748..493874e5ebee 100644
--- a/drivers/clk/renesas/r8a7745-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7745-cpg-mssr.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7745 Clock Pulse Generator / Module Standby and Software Reset
  *
  * Copyright (C) 2016 Cogent Embedded Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation; of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7790-cpg-mssr.c b/drivers/clk/renesas/r8a7790-cpg-mssr.c
index f936cb74b681..c57cb93f8315 100644
--- a/drivers/clk/renesas/r8a7790-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7790-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7790 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on clk-rcar-gen2.c
  *
  * Copyright (C) 2013 Ideas On Board SPRL
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7791-cpg-mssr.c b/drivers/clk/renesas/r8a7791-cpg-mssr.c
index 1b91f03b7598..65702debcabb 100644
--- a/drivers/clk/renesas/r8a7791-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7791-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7791 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on clk-rcar-gen2.c
  *
  * Copyright (C) 2013 Ideas On Board SPRL
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7792-cpg-mssr.c b/drivers/clk/renesas/r8a7792-cpg-mssr.c
index 493e07859f5f..cf8b84a3a060 100644
--- a/drivers/clk/renesas/r8a7792-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7792-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7792 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on clk-rcar-gen2.c
  *
  * Copyright (C) 2013 Ideas On Board SPRL
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a7794-cpg-mssr.c b/drivers/clk/renesas/r8a7794-cpg-mssr.c
index 088f4b79fdfc..c1948693c5c1 100644
--- a/drivers/clk/renesas/r8a7794-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a7794-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a7794 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on clk-rcar-gen2.c
  *
  * Copyright (C) 2013 Ideas On Board SPRL
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/r8a77970-cpg-mssr.c b/drivers/clk/renesas/r8a77970-cpg-mssr.c
index f55842917e8d..38509873d06c 100644
--- a/drivers/clk/renesas/r8a77970-cpg-mssr.c
+++ b/drivers/clk/renesas/r8a77970-cpg-mssr.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * r8a77970 Clock Pulse Generator / Module Standby and Software Reset
  *
@@ -6,10 +7,6 @@
  * Based on r8a7795-cpg-mssr.c
  *
  * Copyright (C) 2015 Glider bvba
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #include <linux/device.h>
diff --git a/drivers/clk/renesas/rcar-gen2-cpg.c b/drivers/clk/renesas/rcar-gen2-cpg.c
index daf88bc2cdae..f596a2dafcf4 100644
--- a/drivers/clk/renesas/rcar-gen2-cpg.c
+++ b/drivers/clk/renesas/rcar-gen2-cpg.c
@@ -1,11 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * R-Car Gen2 Clock Pulse Generator
  *
  * Copyright (C) 2016 Cogent Embedded Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
  */
 
 #include <linux/bug.h>
diff --git a/drivers/clk/renesas/rcar-gen2-cpg.h b/drivers/clk/renesas/rcar-gen2-cpg.h
index 020a3baad015..bff9551c7a38 100644
--- a/drivers/clk/renesas/rcar-gen2-cpg.h
+++ b/drivers/clk/renesas/rcar-gen2-cpg.h
@@ -1,11 +1,8 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * R-Car Gen2 Clock Pulse Generator
  *
  * Copyright (C) 2016 Cogent Embedded Inc.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation; version 2 of the License.
  */
 
 #ifndef __CLK_RENESAS_RCAR_GEN2_CPG_H__
diff --git a/drivers/clk/renesas/rcar-gen3-cpg.h b/drivers/clk/renesas/rcar-gen3-cpg.h
index ea4f8fc3c4c9..6ebc4b42c5a3 100644
--- a/drivers/clk/renesas/rcar-gen3-cpg.h
+++ b/drivers/clk/renesas/rcar-gen3-cpg.h
@@ -1,11 +1,8 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * R-Car Gen3 Clock Pulse Generator
  *
  * Copyright (C) 2015-2016 Glider bvba
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
  */
 
 #ifndef __CLK_RENESAS_RCAR_GEN3_CPG_H__
diff --git a/drivers/clk/renesas/renesas-cpg-mssr.h b/drivers/clk/renesas/renesas-cpg-mssr.h
index 59b1b30c1dba..d52d1f8da9d7 100644
--- a/drivers/clk/renesas/renesas-cpg-mssr.h
+++ b/drivers/clk/renesas/renesas-cpg-mssr.h
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
+/* SPDX-License-Identifier: GPL-2.0
+ *
  * Renesas Clock Pulse Generator / Module Standby and Software Reset
  *
  * Copyright (C) 2015 Glider bvba
diff --git a/include/linux/clk/renesas.h b/include/linux/clk/renesas.h
index 9ebf1f8243bb..0ebbe2f0b45e 100644
--- a/include/linux/clk/renesas.h
+++ b/include/linux/clk/renesas.h
@@ -1,14 +1,10 @@
-/*
+/* SPDX-License-Identifier: GPL-2.0+
+ *
  * Copyright 2013 Ideas On Board SPRL
  * Copyright 2013, 2014 Horms Solutions Ltd.
  *
  * Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
  * Contact: Simon Horman <horms@verge.net.au>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_CLK_RENESAS_H_
-- 
cgit v1.2.3


From 3d0186bb068e6cc6c23dc1d2f0b1cf64894c40ea Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 16 Jun 2018 17:32:07 -0400
Subject: Update email address

Redirect some older email addresses that are in the git logs.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 .mailmap                           | 7 +++++++
 MAINTAINERS                        | 6 +++---
 arch/parisc/kernel/syscall.S       | 2 +-
 drivers/input/keyboard/hilkbd.c    | 2 +-
 drivers/pci/hotplug/acpiphp.h      | 2 +-
 drivers/pci/hotplug/acpiphp_core.c | 4 ++--
 drivers/pci/hotplug/acpiphp_glue.c | 2 +-
 fs/isofs/dir.c                     | 2 +-
 include/linux/xarray.h             | 2 +-
 9 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/.mailmap b/.mailmap
index 285e09645b31..89f532caf639 100644
--- a/.mailmap
+++ b/.mailmap
@@ -119,6 +119,13 @@ Mark Brown <broonie@sirena.org.uk>
 Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
 Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
+Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
+Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
+Matthew Wilcox <willy@infradead.org> <mawilcox@linuxonhyperv.com>
+Matthew Wilcox <willy@infradead.org> <mawilcox@microsoft.com>
+Matthew Wilcox <willy@infradead.org> <willy@debian.org>
+Matthew Wilcox <willy@infradead.org> <willy@linux.intel.com>
+Matthew Wilcox <willy@infradead.org> <willy@parisc-linux.org>
 Matthieu CASTET <castet.matthieu@free.fr>
 Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
 Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index a255240d1452..612e5dbf3431 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -536,7 +536,7 @@ F:	Documentation/hwmon/adt7475
 F:	drivers/hwmon/adt7475.c
 
 ADVANSYS SCSI DRIVER
-M:	Matthew Wilcox <matthew@wil.cx>
+M:	Matthew Wilcox <willy@infradead.org>
 M:	Hannes Reinecke <hare@suse.com>
 L:	linux-scsi@vger.kernel.org
 S:	Maintained
@@ -4364,7 +4364,7 @@ S:	Maintained
 F:	drivers/i2c/busses/i2c-diolan-u2c.c
 
 FILESYSTEM DIRECT ACCESS (DAX)
-M:	Matthew Wilcox <mawilcox@microsoft.com>
+M:	Matthew Wilcox <willy@infradead.org>
 M:	Ross Zwisler <zwisler@kernel.org>
 M:	Jan Kara <jack@suse.cz>
 L:	linux-fsdevel@vger.kernel.org
@@ -8626,7 +8626,7 @@ F:	drivers/message/fusion/
 F:	drivers/scsi/mpt3sas/
 
 LSILOGIC/SYMBIOS/NCR 53C8XX and 53C1010 PCI-SCSI drivers
-M:	Matthew Wilcox <matthew@wil.cx>
+M:	Matthew Wilcox <willy@infradead.org>
 L:	linux-scsi@vger.kernel.org
 S:	Maintained
 F:	drivers/scsi/sym53c8xx_2/
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index f453997a7b8f..a9bc90dc4ae7 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -2,7 +2,7 @@
  * Linux/PA-RISC Project (http://www.parisc-linux.org/)
  * 
  * System call entry code / Linux gateway page
- * Copyright (c) Matthew Wilcox 1999 <willy@bofh.ai>
+ * Copyright (c) Matthew Wilcox 1999 <willy@infradead.org>
  * Licensed under the GNU GPL.
  * thanks to Philipp Rumpf, Mike Shaver and various others
  * sorry about the wall, puffin..
diff --git a/drivers/input/keyboard/hilkbd.c b/drivers/input/keyboard/hilkbd.c
index 5c7afdec192c..f5c5ae8b6c06 100644
--- a/drivers/input/keyboard/hilkbd.c
+++ b/drivers/input/keyboard/hilkbd.c
@@ -2,7 +2,7 @@
  *  linux/drivers/hil/hilkbd.c
  *
  *  Copyright (C) 1998 Philip Blundell <philb@gnu.org>
- *  Copyright (C) 1999 Matthew Wilcox <willy@bofh.ai>
+ *  Copyright (C) 1999 Matthew Wilcox <willy@infradead.org>
  *  Copyright (C) 1999-2007 Helge Deller <deller@gmx.de>
  *
  *  Very basic HP Human Interface Loop (HIL) driver.
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index e438a2d734f2..14cef4cf592b 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -8,7 +8,7 @@
  * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
  * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
  * Copyright (C) 2002,2003 NEC Corporation
- * Copyright (C) 2003-2005 Matthew Wilcox (matthew.wilcox@hp.com)
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
  * Copyright (C) 2003-2005 Hewlett Packard
  *
  * All rights reserved.
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index ad32ffbc4b91..0447b169e7ff 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -8,7 +8,7 @@
  * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
  * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
  * Copyright (C) 2002,2003 NEC Corporation
- * Copyright (C) 2003-2005 Matthew Wilcox (matthew.wilcox@hp.com)
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
  * Copyright (C) 2003-2005 Hewlett Packard
  *
  * All rights reserved.
@@ -40,7 +40,7 @@ bool acpiphp_disabled;
 static struct acpiphp_attention_info *attention_info;
 
 #define DRIVER_VERSION	"0.5"
-#define DRIVER_AUTHOR	"Greg Kroah-Hartman <gregkh@us.ibm.com>, Takayoshi Kochi <t-kochi@bq.jp.nec.com>, Matthew Wilcox <willy@hp.com>"
+#define DRIVER_AUTHOR	"Greg Kroah-Hartman <gregkh@us.ibm.com>, Takayoshi Kochi <t-kochi@bq.jp.nec.com>, Matthew Wilcox <willy@infradead.org>"
 #define DRIVER_DESC	"ACPI Hot Plug PCI Controller Driver"
 
 MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 12afa7fdf77e..e4c46637f32f 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -5,7 +5,7 @@
  * Copyright (C) 2002,2003 Takayoshi Kochi (t-kochi@bq.jp.nec.com)
  * Copyright (C) 2002 Hiroshi Aono (h-aono@ap.jp.nec.com)
  * Copyright (C) 2002,2003 NEC Corporation
- * Copyright (C) 2003-2005 Matthew Wilcox (matthew.wilcox@hp.com)
+ * Copyright (C) 2003-2005 Matthew Wilcox (willy@infradead.org)
  * Copyright (C) 2003-2005 Hewlett Packard
  * Copyright (C) 2005 Rajesh Shah (rajesh.shah@intel.com)
  * Copyright (C) 2005 Intel Corporation
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 947ce22f5b3c..f0fe641893a5 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -46,7 +46,7 @@ int isofs_name_translate(struct iso_directory_record *de, char *new, struct inod
 	return i;
 }
 
-/* Acorn extensions written by Matthew Wilcox <willy@bofh.ai> 1998 */
+/* Acorn extensions written by Matthew Wilcox <willy@infradead.org> 1998 */
 int get_acorn_filename(struct iso_directory_record *de,
 			    char *retname, struct inode *inode)
 {
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 2dfc8006fe64..9e4c86853fa4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -4,7 +4,7 @@
 /*
  * eXtensible Arrays
  * Copyright (c) 2017 Microsoft Corporation
- * Author: Matthew Wilcox <mawilcox@microsoft.com>
+ * Author: Matthew Wilcox <willy@infradead.org>
  */
 
 #include <linux/spinlock.h>
-- 
cgit v1.2.3


From 3159f943aafdbacb2f94c38fdaadabf2bbde2a14 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 3 Nov 2017 13:30:42 -0400
Subject: xarray: Replace exceptional entries

Introduce xarray value entries and tagged pointers to replace radix
tree exceptional entries.  This is a slight change in encoding to allow
the use of an extra bit (we can now store BITS_PER_LONG - 1 bits in a
value entry).  It is also a change in emphasis; exceptional entries are
intimidating and different.  As the comment explains, you can choose
to store values or pointers in the xarray and they are both first-class
citizens.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h |   4 +-
 arch/powerpc/include/asm/nohash/64/pgtable.h |   4 +-
 drivers/gpu/drm/i915/i915_gem.c              |  17 ++--
 drivers/staging/erofs/utils.c                |  18 ++---
 fs/btrfs/compression.c                       |   2 +-
 fs/dax.c                                     | 112 +++++++++++++--------------
 fs/proc/task_mmu.c                           |   2 +-
 include/linux/radix-tree.h                   |  36 ++-------
 include/linux/swapops.h                      |  19 ++---
 include/linux/xarray.h                       | 102 ++++++++++++++++++++++++
 lib/idr.c                                    |  60 ++++++--------
 lib/radix-tree.c                             |  21 +++--
 mm/filemap.c                                 |  10 +--
 mm/khugepaged.c                              |   2 +-
 mm/madvise.c                                 |   2 +-
 mm/memcontrol.c                              |   2 +-
 mm/mincore.c                                 |   2 +-
 mm/readahead.c                               |   2 +-
 mm/shmem.c                                   |  10 +--
 mm/swap.c                                    |   2 +-
 mm/truncate.c                                |  12 +--
 mm/workingset.c                              |  13 ++--
 tools/testing/radix-tree/idr-test.c          |   6 +-
 tools/testing/radix-tree/linux/radix-tree.h  |   1 -
 tools/testing/radix-tree/multiorder.c        |  47 ++++++-----
 tools/testing/radix-tree/test.c              |   2 +-
 26 files changed, 278 insertions(+), 232 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2fdc865ca374..62039e557ac0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -723,9 +723,7 @@ static inline bool pte_user(pte_t pte)
 	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
 	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
 	} while (0)
-/*
- * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
- */
+
 #define SWP_TYPE_BITS 5
 #define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
 				& ((1UL << SWP_TYPE_BITS) - 1))
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 7cd6809f4d33..05765c2d2c1f 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -313,9 +313,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
 #define MAX_SWAPFILES_CHECK() do { \
 	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
 	} while (0)
-/*
- * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
- */
+
 #define SWP_TYPE_BITS 5
 #define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
 				& ((1UL << SWP_TYPE_BITS) - 1))
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fcc73a6ab503..316730b45f84 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -5996,7 +5996,8 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 	count = __sg_page_count(sg);
 
 	while (idx + count <= n) {
-		unsigned long exception, i;
+		void *entry;
+		unsigned long i;
 		int ret;
 
 		/* If we cannot allocate and insert this entry, or the
@@ -6011,12 +6012,9 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj,
 		if (ret && ret != -EEXIST)
 			goto scan;
 
-		exception =
-			RADIX_TREE_EXCEPTIONAL_ENTRY |
-			idx << RADIX_TREE_EXCEPTIONAL_SHIFT;
+		entry = xa_mk_value(idx);
 		for (i = 1; i < count; i++) {
-			ret = radix_tree_insert(&iter->radix, idx + i,
-						(void *)exception);
+			ret = radix_tree_insert(&iter->radix, idx + i, entry);
 			if (ret && ret != -EEXIST)
 				goto scan;
 		}
@@ -6054,15 +6052,14 @@ lookup:
 	GEM_BUG_ON(!sg);
 
 	/* If this index is in the middle of multi-page sg entry,
-	 * the radixtree will contain an exceptional entry that points
+	 * the radix tree will contain a value entry that points
 	 * to the start of that range. We will return the pointer to
 	 * the base page and the offset of this page within the
 	 * sg entry's range.
 	 */
 	*offset = 0;
-	if (unlikely(radix_tree_exception(sg))) {
-		unsigned long base =
-			(unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+	if (unlikely(xa_is_value(sg))) {
+		unsigned long base = xa_to_value(sg);
 
 		sg = radix_tree_lookup(&iter->radix, base);
 		GEM_BUG_ON(!sg);
diff --git a/drivers/staging/erofs/utils.c b/drivers/staging/erofs/utils.c
index 595cf90af9bb..bdee9bd09f11 100644
--- a/drivers/staging/erofs/utils.c
+++ b/drivers/staging/erofs/utils.c
@@ -35,7 +35,6 @@ static atomic_long_t erofs_global_shrink_cnt;
 
 #ifdef CONFIG_EROFS_FS_ZIP
 
-/* radix_tree and the future XArray both don't use tagptr_t yet */
 struct erofs_workgroup *erofs_find_workgroup(
 	struct super_block *sb, pgoff_t index, bool *tag)
 {
@@ -47,9 +46,8 @@ repeat:
 	rcu_read_lock();
 	grp = radix_tree_lookup(&sbi->workstn_tree, index);
 	if (grp != NULL) {
-		*tag = radix_tree_exceptional_entry(grp);
-		grp = (void *)((unsigned long)grp &
-			~RADIX_TREE_EXCEPTIONAL_ENTRY);
+		*tag = xa_pointer_tag(grp);
+		grp = xa_untag_pointer(grp);
 
 		if (erofs_workgroup_get(grp, &oldcount)) {
 			/* prefer to relax rcu read side */
@@ -83,9 +81,7 @@ int erofs_register_workgroup(struct super_block *sb,
 	sbi = EROFS_SB(sb);
 	erofs_workstn_lock(sbi);
 
-	if (tag)
-		grp = (void *)((unsigned long)grp |
-			1UL << RADIX_TREE_EXCEPTIONAL_SHIFT);
+	grp = xa_tag_pointer(grp, tag);
 
 	err = radix_tree_insert(&sbi->workstn_tree,
 		grp->index, grp);
@@ -131,9 +127,7 @@ repeat:
 
 	for (i = 0; i < found; ++i) {
 		int cnt;
-		struct erofs_workgroup *grp = (void *)
-			((unsigned long)batch[i] &
-				~RADIX_TREE_EXCEPTIONAL_ENTRY);
+		struct erofs_workgroup *grp = xa_untag_pointer(batch[i]);
 
 		first_index = grp->index + 1;
 
@@ -150,8 +144,8 @@ repeat:
 #endif
 			continue;
 
-		if (radix_tree_delete(&sbi->workstn_tree,
-			grp->index) != grp) {
+		if (xa_untag_pointer(radix_tree_delete(&sbi->workstn_tree,
+			grp->index)) != grp) {
 #ifdef EROFS_FS_HAS_MANAGED_CACHE
 skip:
 			erofs_workgroup_unfreeze(grp, 1);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 9bfa66592aa7..fd25e125303c 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -440,7 +440,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
 		rcu_read_lock();
 		page = radix_tree_lookup(&mapping->i_pages, pg_index);
 		rcu_read_unlock();
-		if (page && !radix_tree_exceptional_entry(page)) {
+		if (page && !xa_is_value(page)) {
 			misses++;
 			if (misses > 4)
 				break;
diff --git a/fs/dax.c b/fs/dax.c
index b68ce484e1be..ebcec36335eb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -59,56 +59,57 @@ static int __init init_dax_wait_table(void)
 fs_initcall(init_dax_wait_table);
 
 /*
- * We use lowest available bit in exceptional entry for locking, one bit for
- * the entry size (PMD) and two more to tell us if the entry is a zero page or
- * an empty entry that is just used for locking.  In total four special bits.
+ * DAX pagecache entries use XArray value entries so they can't be mistaken
+ * for pages.  We use one bit for locking, one bit for the entry size (PMD)
+ * and two more to tell us if the entry is a zero page or an empty entry that
+ * is just used for locking.  In total four special bits.
  *
  * If the PMD bit isn't set the entry has size PAGE_SIZE, and if the ZERO_PAGE
  * and EMPTY bits aren't set the entry is a normal DAX entry with a filesystem
  * block allocation.
  */
-#define RADIX_DAX_SHIFT		(RADIX_TREE_EXCEPTIONAL_SHIFT + 4)
-#define RADIX_DAX_ENTRY_LOCK	(1 << RADIX_TREE_EXCEPTIONAL_SHIFT)
-#define RADIX_DAX_PMD		(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 1))
-#define RADIX_DAX_ZERO_PAGE	(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
-#define RADIX_DAX_EMPTY		(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
+#define DAX_SHIFT	(4)
+#define DAX_LOCKED	(1UL << 0)
+#define DAX_PMD		(1UL << 1)
+#define DAX_ZERO_PAGE	(1UL << 2)
+#define DAX_EMPTY	(1UL << 3)
 
 static unsigned long dax_radix_pfn(void *entry)
 {
-	return (unsigned long)entry >> RADIX_DAX_SHIFT;
+	return xa_to_value(entry) >> DAX_SHIFT;
 }
 
 static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
 {
-	return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
-			(pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
+	return xa_mk_value(flags | ((unsigned long)pfn << DAX_SHIFT) |
+			DAX_LOCKED);
 }
 
 static unsigned int dax_radix_order(void *entry)
 {
-	if ((unsigned long)entry & RADIX_DAX_PMD)
+	if (xa_to_value(entry) & DAX_PMD)
 		return PMD_SHIFT - PAGE_SHIFT;
 	return 0;
 }
 
 static int dax_is_pmd_entry(void *entry)
 {
-	return (unsigned long)entry & RADIX_DAX_PMD;
+	return xa_to_value(entry) & DAX_PMD;
 }
 
 static int dax_is_pte_entry(void *entry)
 {
-	return !((unsigned long)entry & RADIX_DAX_PMD);
+	return !(xa_to_value(entry) & DAX_PMD);
 }
 
 static int dax_is_zero_entry(void *entry)
 {
-	return (unsigned long)entry & RADIX_DAX_ZERO_PAGE;
+	return xa_to_value(entry) & DAX_ZERO_PAGE;
 }
 
 static int dax_is_empty_entry(void *entry)
 {
-	return (unsigned long)entry & RADIX_DAX_EMPTY;
+	return xa_to_value(entry) & DAX_EMPTY;
 }
 
 /*
@@ -186,9 +187,9 @@ static void dax_wake_mapping_entry_waiter(struct address_space *mapping,
  */
 static inline int slot_locked(struct address_space *mapping, void **slot)
 {
-	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-	return entry & RADIX_DAX_ENTRY_LOCK;
+	unsigned long entry = xa_to_value(
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
+	return entry & DAX_LOCKED;
 }
 
 /*
@@ -196,12 +197,11 @@ static inline int slot_locked(struct address_space *mapping, void **slot)
  */
 static inline void *lock_slot(struct address_space *mapping, void **slot)
 {
-	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-
-	entry |= RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
-	return (void *)entry;
+	unsigned long v = xa_to_value(
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
+	void *entry = xa_mk_value(v | DAX_LOCKED);
+	radix_tree_replace_slot(&mapping->i_pages, slot, entry);
+	return entry;
 }
 
 /*
@@ -209,17 +209,16 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
  */
 static inline void *unlock_slot(struct address_space *mapping, void **slot)
 {
-	unsigned long entry = (unsigned long)
-		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-
-	entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
-	radix_tree_replace_slot(&mapping->i_pages, slot, (void *)entry);
-	return (void *)entry;
+	unsigned long v = xa_to_value(
+		radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock));
+	void *entry = xa_mk_value(v & ~DAX_LOCKED);
+	radix_tree_replace_slot(&mapping->i_pages, slot, entry);
+	return entry;
 }
 
 /*
  * Lookup entry in radix tree, wait for it to become unlocked if it is
- * exceptional entry and return it. The caller must call
+ * a DAX entry and return it. The caller must call
  * put_unlocked_mapping_entry() when he decided not to lock the entry or
  * put_locked_mapping_entry() when he locked the entry and now wants to
  * unlock it.
@@ -242,7 +241,7 @@ static void *__get_unlocked_mapping_entry(struct address_space *mapping,
 		entry = __radix_tree_lookup(&mapping->i_pages, index, NULL,
 					  &slot);
 		if (!entry ||
-		    WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)) ||
+		    WARN_ON_ONCE(!xa_is_value(entry)) ||
 		    !slot_locked(mapping, slot)) {
 			if (slotp)
 				*slotp = slot;
@@ -283,7 +282,7 @@ static void unlock_mapping_entry(struct address_space *mapping, pgoff_t index)
 
 	xa_lock_irq(&mapping->i_pages);
 	entry = __radix_tree_lookup(&mapping->i_pages, index, NULL, &slot);
-	if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry) ||
+	if (WARN_ON_ONCE(!entry || !xa_is_value(entry) ||
 			 !slot_locked(mapping, slot))) {
 		xa_unlock_irq(&mapping->i_pages);
 		return;
@@ -472,12 +471,11 @@ void dax_unlock_mapping_entry(struct page *page)
 }
 
 /*
- * Find radix tree entry at given index. If it points to an exceptional entry,
- * return it with the radix tree entry locked. If the radix tree doesn't
- * contain given index, create an empty exceptional entry for the index and
- * return with it locked.
+ * Find radix tree entry at given index. If it is a DAX entry, return it
+ * with the radix tree entry locked. If the radix tree doesn't contain the
+ * given index, create an empty entry for the index and return with it locked.
  *
- * When requesting an entry with size RADIX_DAX_PMD, grab_mapping_entry() will
+ * When requesting an entry with size DAX_PMD, grab_mapping_entry() will
  * either return that locked entry or will return an error.  This error will
  * happen if there are any 4k entries within the 2MiB range that we are
  * requesting.
@@ -507,13 +505,13 @@ restart:
 	xa_lock_irq(&mapping->i_pages);
 	entry = get_unlocked_mapping_entry(mapping, index, &slot);
 
-	if (WARN_ON_ONCE(entry && !radix_tree_exceptional_entry(entry))) {
+	if (WARN_ON_ONCE(entry && !xa_is_value(entry))) {
 		entry = ERR_PTR(-EIO);
 		goto out_unlock;
 	}
 
 	if (entry) {
-		if (size_flag & RADIX_DAX_PMD) {
+		if (size_flag & DAX_PMD) {
 			if (dax_is_pte_entry(entry)) {
 				put_unlocked_mapping_entry(mapping, index,
 						entry);
@@ -584,7 +582,7 @@ restart:
 					true);
 		}
 
-		entry = dax_radix_locked_entry(0, size_flag | RADIX_DAX_EMPTY);
+		entry = dax_radix_locked_entry(0, size_flag | DAX_EMPTY);
 
 		err = __radix_tree_insert(&mapping->i_pages, index,
 				dax_radix_order(entry), entry);
@@ -673,8 +671,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
 			if (index >= end)
 				break;
 
-			if (WARN_ON_ONCE(
-			     !radix_tree_exceptional_entry(pvec_ent)))
+			if (WARN_ON_ONCE(!xa_is_value(pvec_ent)))
 				continue;
 
 			xa_lock_irq(&mapping->i_pages);
@@ -713,7 +710,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
 
 	xa_lock_irq(pages);
 	entry = get_unlocked_mapping_entry(mapping, index, NULL);
-	if (!entry || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry)))
+	if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
 		goto out;
 	if (!trunc &&
 	    (radix_tree_tag_get(pages, index, PAGECACHE_TAG_DIRTY) ||
@@ -729,8 +726,8 @@ out:
 	return ret;
 }
 /*
- * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
- * entry to get unlocked before deleting it.
+ * Delete DAX entry at @index from @mapping.  Wait for it
+ * to be unlocked before deleting it.
  */
 int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 {
@@ -740,7 +737,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 	 * This gets called from truncate / punch_hole path. As such, the caller
 	 * must hold locks protecting against concurrent modifications of the
 	 * radix tree (usually fs-private i_mmap_sem for writing). Since the
-	 * caller has seen exceptional entry for this index, we better find it
+	 * caller has seen a DAX entry for this index, we better find it
 	 * at that index as well...
 	 */
 	WARN_ON_ONCE(!ret);
@@ -748,7 +745,7 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
 }
 
 /*
- * Invalidate exceptional DAX entry if it is clean.
+ * Invalidate DAX entry if it is clean.
  */
 int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
 				      pgoff_t index)
@@ -802,7 +799,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 	if (dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
-	if (dax_is_zero_entry(entry) && !(flags & RADIX_DAX_ZERO_PAGE)) {
+	if (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE)) {
 		/* we are replacing a zero page with block mapping */
 		if (dax_is_pmd_entry(entry))
 			unmap_mapping_pages(mapping, index & ~PG_PMD_COLOUR,
@@ -940,13 +937,13 @@ static int dax_writeback_one(struct dax_device *dax_dev,
 	 * A page got tagged dirty in DAX mapping? Something is seriously
 	 * wrong.
 	 */
-	if (WARN_ON(!radix_tree_exceptional_entry(entry)))
+	if (WARN_ON(!xa_is_value(entry)))
 		return -EIO;
 
 	xa_lock_irq(pages);
 	entry2 = get_unlocked_mapping_entry(mapping, index, &slot);
 	/* Entry got punched out / reallocated? */
-	if (!entry2 || WARN_ON_ONCE(!radix_tree_exceptional_entry(entry2)))
+	if (!entry2 || WARN_ON_ONCE(!xa_is_value(entry2)))
 		goto put_unlocked;
 	/*
 	 * Entry got reallocated elsewhere? No need to writeback. We have to
@@ -1123,8 +1120,9 @@ static vm_fault_t dax_load_hole(struct address_space *mapping, void *entry,
 	pfn_t pfn = pfn_to_pfn_t(my_zero_pfn(vaddr));
 	vm_fault_t ret;
 
-	dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE,
-			false);
+	dax_insert_mapping_entry(mapping, vmf, entry, pfn,
+			DAX_ZERO_PAGE, false);
+
 	ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
 	trace_dax_load_hole(inode, vmf, ret);
 	return ret;
@@ -1514,7 +1512,7 @@ static vm_fault_t dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
 
 	pfn = page_to_pfn_t(zero_page);
 	ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
-			RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
+			DAX_PMD | DAX_ZERO_PAGE, false);
 
 	ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
 	if (!pmd_none(*(vmf->pmd))) {
@@ -1597,7 +1595,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 	 * is already in the tree, for instance), it will return -EEXIST and
 	 * we just fall back to 4k entries.
 	 */
-	entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+	entry = grab_mapping_entry(mapping, pgoff, DAX_PMD);
 	if (IS_ERR(entry))
 		goto fallback;
 
@@ -1635,7 +1633,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 			goto finish_iomap;
 
 		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
-						RADIX_DAX_PMD, write && !sync);
+						DAX_PMD, write && !sync);
 
 		/*
 		 * If we are doing synchronous page fault and inode needs fsync,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5ea1d64cb0b4..669abb617321 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -521,7 +521,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 		if (!page)
 			return;
 
-		if (radix_tree_exceptional_entry(page))
+		if (xa_is_value(page))
 			mss->swap += PAGE_SIZE;
 		else
 			put_page(page);
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 34149e8b5f73..e9e76ab4fbbf 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -28,34 +28,26 @@
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <linux/xarray.h>
 
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
  * slot are interpreted:
  *
  * 00 - data pointer
- * 01 - internal entry
- * 10 - exceptional entry
- * 11 - this bit combination is currently unused/reserved
+ * 10 - internal entry
+ * x1 - value entry
  *
  * The internal entry may be a pointer to the next level in the tree, a
  * sibling entry, or an indicator that the entry in this slot has been moved
  * to another location in the tree and the lookup should be restarted.  While
  * NULL fits the 'data pointer' pattern, it means that there is no entry in
  * the tree for this index (no matter what level of the tree it is found at).
- * This means that you cannot store NULL in the tree as a value for the index.
+ * This means that storing a NULL entry in the tree is the same as deleting
+ * the entry from the tree.
  */
 #define RADIX_TREE_ENTRY_MASK		3UL
-#define RADIX_TREE_INTERNAL_NODE	1UL
-
-/*
- * Most users of the radix tree store pointers but shmem/tmpfs stores swap
- * entries in the same tree.  They are marked as exceptional entries to
- * distinguish them from pointers to struct page.
- * EXCEPTIONAL_ENTRY tests the bit, EXCEPTIONAL_SHIFT shifts content past it.
- */
-#define RADIX_TREE_EXCEPTIONAL_ENTRY	2
-#define RADIX_TREE_EXCEPTIONAL_SHIFT	2
+#define RADIX_TREE_INTERNAL_NODE	2UL
 
 static inline bool radix_tree_is_internal_node(void *ptr)
 {
@@ -83,11 +75,10 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 /*
  * @count is the count of every non-NULL element in the ->slots array
- * whether that is an exceptional entry, a retry entry, a user pointer,
+ * whether that is a value entry, a retry entry, a user pointer,
  * a sibling entry or a pointer to the next level of the tree.
  * @exceptional is the count of every element in ->slots which is
- * either radix_tree_exceptional_entry() or is a sibling entry for an
- * exceptional entry.
+ * either a value entry or a sibling of a value entry.
  */
 struct radix_tree_node {
 	unsigned char	shift;		/* Bits remaining in each slot */
@@ -268,17 +259,6 @@ static inline int radix_tree_deref_retry(void *arg)
 	return unlikely(radix_tree_is_internal_node(arg));
 }
 
-/**
- * radix_tree_exceptional_entry	- radix_tree_deref_slot gave exceptional entry?
- * @arg:	value returned by radix_tree_deref_slot
- * Returns:	0 if well-aligned pointer, non-0 if exceptional entry.
- */
-static inline int radix_tree_exceptional_entry(void *arg)
-{
-	/* Not unlikely because radix_tree_exception often tested first */
-	return (unsigned long)arg & RADIX_TREE_EXCEPTIONAL_ENTRY;
-}
-
 /**
  * radix_tree_exception	- radix_tree_deref_slot returned either exception?
  * @arg:	value returned by radix_tree_deref_slot
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 22af9d8a84ae..4d961668e5fc 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -18,9 +18,8 @@
  *
  * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
  */
-#define SWP_TYPE_SHIFT(e)	((sizeof(e.val) * 8) - \
-			(MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT))
-#define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
+#define SWP_TYPE_SHIFT	(BITS_PER_XA_VALUE - MAX_SWAPFILES_SHIFT)
+#define SWP_OFFSET_MASK	((1UL << SWP_TYPE_SHIFT) - 1)
 
 /*
  * Store a type+offset into a swp_entry_t in an arch-independent format
@@ -29,8 +28,7 @@ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
 {
 	swp_entry_t ret;
 
-	ret.val = (type << SWP_TYPE_SHIFT(ret)) |
-			(offset & SWP_OFFSET_MASK(ret));
+	ret.val = (type << SWP_TYPE_SHIFT) | (offset & SWP_OFFSET_MASK);
 	return ret;
 }
 
@@ -40,7 +38,7 @@ static inline swp_entry_t swp_entry(unsigned long type, pgoff_t offset)
  */
 static inline unsigned swp_type(swp_entry_t entry)
 {
-	return (entry.val >> SWP_TYPE_SHIFT(entry));
+	return (entry.val >> SWP_TYPE_SHIFT);
 }
 
 /*
@@ -49,7 +47,7 @@ static inline unsigned swp_type(swp_entry_t entry)
  */
 static inline pgoff_t swp_offset(swp_entry_t entry)
 {
-	return entry.val & SWP_OFFSET_MASK(entry);
+	return entry.val & SWP_OFFSET_MASK;
 }
 
 #ifdef CONFIG_MMU
@@ -90,16 +88,13 @@ static inline swp_entry_t radix_to_swp_entry(void *arg)
 {
 	swp_entry_t entry;
 
-	entry.val = (unsigned long)arg >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+	entry.val = xa_to_value(arg);
 	return entry;
 }
 
 static inline void *swp_to_radix_entry(swp_entry_t entry)
 {
-	unsigned long value;
-
-	value = entry.val << RADIX_TREE_EXCEPTIONAL_SHIFT;
-	return (void *)(value | RADIX_TREE_EXCEPTIONAL_ENTRY);
+	return xa_mk_value(entry.val);
 }
 
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 9e4c86853fa4..5c8acfc4ff55 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -5,9 +5,111 @@
  * eXtensible Arrays
  * Copyright (c) 2017 Microsoft Corporation
  * Author: Matthew Wilcox <willy@infradead.org>
+ *
+ * See Documentation/core-api/xarray.rst for how to use the XArray.
  */
 
+#include <linux/bug.h>
 #include <linux/spinlock.h>
+#include <linux/types.h>
+
+/*
+ * The bottom two bits of the entry determine how the XArray interprets
+ * the contents:
+ *
+ * 00: Pointer entry
+ * 10: Internal entry
+ * x1: Value entry or tagged pointer
+ *
+ * Attempting to store internal entries in the XArray is a bug.
+ */
+
+#define BITS_PER_XA_VALUE	(BITS_PER_LONG - 1)
+
+/**
+ * xa_mk_value() - Create an XArray entry from an integer.
+ * @v: Value to store in XArray.
+ *
+ * Context: Any context.
+ * Return: An entry suitable for storing in the XArray.
+ */
+static inline void *xa_mk_value(unsigned long v)
+{
+	WARN_ON((long)v < 0);
+	return (void *)((v << 1) | 1);
+}
+
+/**
+ * xa_to_value() - Get value stored in an XArray entry.
+ * @entry: XArray entry.
+ *
+ * Context: Any context.
+ * Return: The value stored in the XArray entry.
+ */
+static inline unsigned long xa_to_value(const void *entry)
+{
+	return (unsigned long)entry >> 1;
+}
+
+/**
+ * xa_is_value() - Determine if an entry is a value.
+ * @entry: XArray entry.
+ *
+ * Context: Any context.
+ * Return: True if the entry is a value, false if it is a pointer.
+ */
+static inline bool xa_is_value(const void *entry)
+{
+	return (unsigned long)entry & 1;
+}
+
+/**
+ * xa_tag_pointer() - Create an XArray entry for a tagged pointer.
+ * @p: Plain pointer.
+ * @tag: Tag value (0, 1 or 3).
+ *
+ * If the user of the XArray prefers, they can tag their pointers instead
+ * of storing value entries.  Three tags are available (0, 1 and 3).
+ * These are distinct from the xa_mark_t as they are not replicated up
+ * through the array and cannot be searched for.
+ *
+ * Context: Any context.
+ * Return: An XArray entry.
+ */
+static inline void *xa_tag_pointer(void *p, unsigned long tag)
+{
+	return (void *)((unsigned long)p | tag);
+}
+
+/**
+ * xa_untag_pointer() - Turn an XArray entry into a plain pointer.
+ * @entry: XArray entry.
+ *
+ * If you have stored a tagged pointer in the XArray, call this function
+ * to get the untagged version of the pointer.
+ *
+ * Context: Any context.
+ * Return: A pointer.
+ */
+static inline void *xa_untag_pointer(void *entry)
+{
+	return (void *)((unsigned long)entry & ~3UL);
+}
+
+/**
+ * xa_pointer_tag() - Get the tag stored in an XArray entry.
+ * @entry: XArray entry.
+ *
+ * If you have stored a tagged pointer in the XArray, call this function
+ * to get the tag of that pointer.
+ *
+ * Context: Any context.
+ * Return: A tag.
+ */
+static inline unsigned int xa_pointer_tag(void *entry)
+{
+	return (unsigned long)entry & 3UL;
+}
 
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
diff --git a/lib/idr.c b/lib/idr.c
index 729e381e23b4..88419fbc5737 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -338,11 +338,8 @@ EXPORT_SYMBOL(idr_replace);
  * by the number of bits in the leaf bitmap before doing a radix tree lookup.
  *
  * As an optimisation, if there are only a few low bits set in any given
- * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
- * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
- * directly in the entry.  By being really tricksy, we could store
- * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
- * for 0-3 allocated IDs.
+ * leaf, instead of allocating a 128-byte bitmap, we store the bits
+ * directly in the entry.
  *
  * We allow the radix tree 'exceptional' count to get out of date.  Nothing
  * in the IDA nor the radix tree code checks it.  If it becomes important
@@ -366,12 +363,11 @@ static int ida_get_new_above(struct ida *ida, int start)
 	struct radix_tree_iter iter;
 	struct ida_bitmap *bitmap;
 	unsigned long index;
-	unsigned bit, ebit;
+	unsigned bit;
 	int new;
 
 	index = start / IDA_BITMAP_BITS;
 	bit = start % IDA_BITMAP_BITS;
-	ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
 
 	slot = radix_tree_iter_init(&iter, index);
 	for (;;) {
@@ -386,25 +382,23 @@ static int ida_get_new_above(struct ida *ida, int start)
 				return PTR_ERR(slot);
 			}
 		}
-		if (iter.index > index) {
+		if (iter.index > index)
 			bit = 0;
-			ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
-		}
 		new = iter.index * IDA_BITMAP_BITS;
 		bitmap = rcu_dereference_raw(*slot);
-		if (radix_tree_exception(bitmap)) {
-			unsigned long tmp = (unsigned long)bitmap;
-			ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
-			if (ebit < BITS_PER_LONG) {
-				tmp |= 1UL << ebit;
-				rcu_assign_pointer(*slot, (void *)tmp);
-				return new + ebit -
-					RADIX_TREE_EXCEPTIONAL_SHIFT;
+		if (xa_is_value(bitmap)) {
+			unsigned long tmp = xa_to_value(bitmap);
+			int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE,
+							bit);
+			if (vbit < BITS_PER_XA_VALUE) {
+				tmp |= 1UL << vbit;
+				rcu_assign_pointer(*slot, xa_mk_value(tmp));
+				return new + vbit;
 			}
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
+			bitmap->bitmap[0] = tmp;
 			rcu_assign_pointer(*slot, bitmap);
 		}
 
@@ -425,17 +419,14 @@ static int ida_get_new_above(struct ida *ida, int start)
 			new += bit;
 			if (new < 0)
 				return -ENOSPC;
-			if (ebit < BITS_PER_LONG) {
-				bitmap = (void *)((1UL << ebit) |
-						RADIX_TREE_EXCEPTIONAL_ENTRY);
-				radix_tree_iter_replace(root, &iter, slot,
-						bitmap);
-				return new;
+			if (bit < BITS_PER_XA_VALUE) {
+				bitmap = xa_mk_value(1UL << bit);
+			} else {
+				bitmap = this_cpu_xchg(ida_bitmap, NULL);
+				if (!bitmap)
+					return -EAGAIN;
+				__set_bit(bit, bitmap->bitmap);
 			}
-			bitmap = this_cpu_xchg(ida_bitmap, NULL);
-			if (!bitmap)
-				return -EAGAIN;
-			__set_bit(bit, bitmap->bitmap);
 			radix_tree_iter_replace(root, &iter, slot, bitmap);
 		}
 
@@ -457,9 +448,9 @@ static void ida_remove(struct ida *ida, int id)
 		goto err;
 
 	bitmap = rcu_dereference_raw(*slot);
-	if (radix_tree_exception(bitmap)) {
+	if (xa_is_value(bitmap)) {
 		btmp = (unsigned long *)slot;
-		offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
+		offset += 1; /* Intimate knowledge of the value encoding */
 		if (offset >= BITS_PER_LONG)
 			goto err;
 	} else {
@@ -470,9 +461,8 @@ static void ida_remove(struct ida *ida, int id)
 
 	__clear_bit(offset, btmp);
 	radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
-	if (radix_tree_exception(bitmap)) {
-		if (rcu_dereference_raw(*slot) ==
-					(void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
+	if (xa_is_value(bitmap)) {
+		if (xa_to_value(rcu_dereference_raw(*slot)) == 0)
 			radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	} else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
 		kfree(bitmap);
@@ -503,7 +493,7 @@ void ida_destroy(struct ida *ida)
 	xa_lock_irqsave(&ida->ida_rt, flags);
 	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
 		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
-		if (!radix_tree_exception(bitmap))
+		if (!xa_is_value(bitmap))
 			kfree(bitmap);
 		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
 	}
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index a904a8ddd174..b6c0e7f3a894 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -340,14 +340,12 @@ static void dump_ida_node(void *entry, unsigned long index)
 		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
 			dump_ida_node(node->slots[i],
 					index | (i << node->shift));
-	} else if (radix_tree_exceptional_entry(entry)) {
+	} else if (xa_is_value(entry)) {
 		pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
 				entry, (int)(index & RADIX_TREE_MAP_MASK),
 				index * IDA_BITMAP_BITS,
-				index * IDA_BITMAP_BITS + BITS_PER_LONG -
-					RADIX_TREE_EXCEPTIONAL_SHIFT,
-				(unsigned long)entry >>
-					RADIX_TREE_EXCEPTIONAL_SHIFT);
+				index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE,
+				xa_to_value(entry));
 	} else {
 		struct ida_bitmap *bitmap = entry;
 
@@ -656,7 +654,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		BUG_ON(shift > BITS_PER_LONG);
 		if (radix_tree_is_internal_node(entry)) {
 			entry_to_node(entry)->parent = node;
-		} else if (radix_tree_exceptional_entry(entry)) {
+		} else if (xa_is_value(entry)) {
 			/* Moving an exceptional root->rnode to a node */
 			node->exceptional = 1;
 		}
@@ -955,12 +953,12 @@ static inline int insert_entries(struct radix_tree_node *node,
 					!is_sibling_entry(node, old) &&
 					(old != RADIX_TREE_RETRY))
 			radix_tree_free_nodes(old);
-		if (radix_tree_exceptional_entry(old))
+		if (xa_is_value(old))
 			node->exceptional--;
 	}
 	if (node) {
 		node->count += n;
-		if (radix_tree_exceptional_entry(item))
+		if (xa_is_value(item))
 			node->exceptional += n;
 	}
 	return n;
@@ -974,7 +972,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 	rcu_assign_pointer(*slot, item);
 	if (node) {
 		node->count++;
-		if (radix_tree_exceptional_entry(item))
+		if (xa_is_value(item))
 			node->exceptional++;
 	}
 	return 1;
@@ -1190,8 +1188,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  radix_tree_update_node_t update_node)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional = !!radix_tree_exceptional_entry(item) -
-				!!radix_tree_exceptional_entry(old);
+	int exceptional = !!xa_is_value(item) - !!xa_is_value(old);
 	int count = calculate_count(root, node, slot, item, old);
 
 	/*
@@ -1992,7 +1989,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 				struct radix_tree_node *node, void __rcu **slot)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
+	int exceptional = xa_is_value(old) ? -1 : 0;
 	unsigned offset = get_slot_offset(node, slot);
 	int tag;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4..4de14e75c4ec 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -127,7 +127,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
 
 		p = radix_tree_deref_slot_protected(slot,
 						    &mapping->i_pages.xa_lock);
-		if (!radix_tree_exceptional_entry(p))
+		if (!xa_is_value(p))
 			return -EEXIST;
 
 		mapping->nrexceptional--;
@@ -336,7 +336,7 @@ page_cache_tree_delete_batch(struct address_space *mapping,
 			break;
 		page = radix_tree_deref_slot_protected(slot,
 						       &mapping->i_pages.xa_lock);
-		if (radix_tree_exceptional_entry(page))
+		if (xa_is_value(page))
 			continue;
 		if (!tail_pages) {
 			/*
@@ -1355,7 +1355,7 @@ pgoff_t page_cache_next_hole(struct address_space *mapping,
 		struct page *page;
 
 		page = radix_tree_lookup(&mapping->i_pages, index);
-		if (!page || radix_tree_exceptional_entry(page))
+		if (!page || xa_is_value(page))
 			break;
 		index++;
 		if (index == 0)
@@ -1396,7 +1396,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
 		struct page *page;
 
 		page = radix_tree_lookup(&mapping->i_pages, index);
-		if (!page || radix_tree_exceptional_entry(page))
+		if (!page || xa_is_value(page))
 			break;
 		index--;
 		if (index == ULONG_MAX)
@@ -1539,7 +1539,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 
 repeat:
 	page = find_get_entry(mapping, offset);
-	if (radix_tree_exceptional_entry(page))
+	if (xa_is_value(page))
 		page = NULL;
 	if (!page)
 		goto no_page;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index a31d740e6cd1..4820e4adf853 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1369,7 +1369,7 @@ static void collapse_shmem(struct mm_struct *mm,
 
 		page = radix_tree_deref_slot_protected(slot,
 				&mapping->i_pages.xa_lock);
-		if (radix_tree_exceptional_entry(page) || !PageUptodate(page)) {
+		if (xa_is_value(page) || !PageUptodate(page)) {
 			xa_unlock_irq(&mapping->i_pages);
 			/* swap in or instantiate fallocated page */
 			if (shmem_getpage(mapping->host, index, &page,
diff --git a/mm/madvise.c b/mm/madvise.c
index 972a9eaa898b..9d802566c494 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -251,7 +251,7 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 		index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
 
 		page = find_get_entry(mapping, index);
-		if (!radix_tree_exceptional_entry(page)) {
+		if (!xa_is_value(page)) {
 			if (page)
 				put_page(page);
 			continue;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e79cb59552d9..29d9d1a69b36 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4750,7 +4750,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 	/* shmem/tmpfs may report page out on swap: account for that too. */
 	if (shmem_mapping(mapping)) {
 		page = find_get_entry(mapping, pgoff);
-		if (radix_tree_exceptional_entry(page)) {
+		if (xa_is_value(page)) {
 			swp_entry_t swp = radix_to_swp_entry(page);
 			if (do_memsw_account())
 				*entry = swp;
diff --git a/mm/mincore.c b/mm/mincore.c
index fc37afe226e6..4985965aa20a 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -66,7 +66,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
 		 * shmem/tmpfs may return swap: account for swapcache
 		 * page too.
 		 */
-		if (radix_tree_exceptional_entry(page)) {
+		if (xa_is_value(page)) {
 			swp_entry_t swp = radix_to_swp_entry(page);
 			page = find_get_page(swap_address_space(swp),
 					     swp_offset(swp));
diff --git a/mm/readahead.c b/mm/readahead.c
index 4e630143a0ba..de657077d41d 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -179,7 +179,7 @@ unsigned int __do_page_cache_readahead(struct address_space *mapping,
 		rcu_read_lock();
 		page = radix_tree_lookup(&mapping->i_pages, page_offset);
 		rcu_read_unlock();
-		if (page && !radix_tree_exceptional_entry(page)) {
+		if (page && !xa_is_value(page)) {
 			/*
 			 * Page already present?  Kick off the current batch of
 			 * contiguous pages before continuing with the next
diff --git a/mm/shmem.c b/mm/shmem.c
index 446942677cd4..c1062760fe41 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -709,7 +709,7 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
 			continue;
 		}
 
-		if (radix_tree_exceptional_entry(page))
+		if (xa_is_value(page))
 			swapped++;
 
 		if (need_resched()) {
@@ -824,7 +824,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			if (index >= end)
 				break;
 
-			if (radix_tree_exceptional_entry(page)) {
+			if (xa_is_value(page)) {
 				if (unfalloc)
 					continue;
 				nr_swaps_freed += !shmem_free_swap(mapping,
@@ -921,7 +921,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 			if (index >= end)
 				break;
 
-			if (radix_tree_exceptional_entry(page)) {
+			if (xa_is_value(page)) {
 				if (unfalloc)
 					continue;
 				if (shmem_free_swap(mapping, index, page)) {
@@ -1643,7 +1643,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 repeat:
 	swap.val = 0;
 	page = find_lock_entry(mapping, index);
-	if (radix_tree_exceptional_entry(page)) {
+	if (xa_is_value(page)) {
 		swap = radix_to_swp_entry(page);
 		page = NULL;
 	}
@@ -2578,7 +2578,7 @@ static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
 				index = indices[i];
 			}
 			page = pvec.pages[i];
-			if (page && !radix_tree_exceptional_entry(page)) {
+			if (page && !xa_is_value(page)) {
 				if (!PageUptodate(page))
 					page = NULL;
 			}
diff --git a/mm/swap.c b/mm/swap.c
index 26fc9b5f1b6c..4c5c7fcc6e46 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -965,7 +965,7 @@ void pagevec_remove_exceptionals(struct pagevec *pvec)
 
 	for (i = 0, j = 0; i < pagevec_count(pvec); i++) {
 		struct page *page = pvec->pages[i];
-		if (!radix_tree_exceptional_entry(page))
+		if (!xa_is_value(page))
 			pvec->pages[j++] = page;
 	}
 	pvec->nr = j;
diff --git a/mm/truncate.c b/mm/truncate.c
index 1d2fb2dca96f..ed778555c9f3 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -70,7 +70,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 		return;
 
 	for (j = 0; j < pagevec_count(pvec); j++)
-		if (radix_tree_exceptional_entry(pvec->pages[j]))
+		if (xa_is_value(pvec->pages[j]))
 			break;
 
 	if (j == pagevec_count(pvec))
@@ -85,7 +85,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
 		struct page *page = pvec->pages[i];
 		pgoff_t index = indices[i];
 
-		if (!radix_tree_exceptional_entry(page)) {
+		if (!xa_is_value(page)) {
 			pvec->pages[j++] = page;
 			continue;
 		}
@@ -347,7 +347,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			if (index >= end)
 				break;
 
-			if (radix_tree_exceptional_entry(page))
+			if (xa_is_value(page))
 				continue;
 
 			if (!trylock_page(page))
@@ -442,7 +442,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 				break;
 			}
 
-			if (radix_tree_exceptional_entry(page))
+			if (xa_is_value(page))
 				continue;
 
 			lock_page(page);
@@ -561,7 +561,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
 			if (index > end)
 				break;
 
-			if (radix_tree_exceptional_entry(page)) {
+			if (xa_is_value(page)) {
 				invalidate_exceptional_entry(mapping, index,
 							     page);
 				continue;
@@ -692,7 +692,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 			if (index > end)
 				break;
 
-			if (radix_tree_exceptional_entry(page)) {
+			if (xa_is_value(page)) {
 				if (!invalidate_exceptional_entry2(mapping,
 								   index, page))
 					ret = -EBUSY;
diff --git a/mm/workingset.c b/mm/workingset.c
index 4516dd790129..bb109b3afac2 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -155,8 +155,8 @@
  * refault distance will immediately activate the refaulting page.
  */
 
-#define EVICTION_SHIFT	(RADIX_TREE_EXCEPTIONAL_ENTRY + \
-			 NODES_SHIFT +	\
+#define EVICTION_SHIFT	((BITS_PER_LONG - BITS_PER_XA_VALUE) +	\
+			 NODES_SHIFT +				\
 			 MEM_CGROUP_ID_SHIFT)
 #define EVICTION_MASK	(~0UL >> EVICTION_SHIFT)
 
@@ -173,20 +173,19 @@ static unsigned int bucket_order __read_mostly;
 static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
 {
 	eviction >>= bucket_order;
+	eviction &= EVICTION_MASK;
 	eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
 	eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
-	eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
 
-	return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
+	return xa_mk_value(eviction);
 }
 
 static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
 			  unsigned long *evictionp)
 {
-	unsigned long entry = (unsigned long)shadow;
+	unsigned long entry = xa_to_value(shadow);
 	int memcgid, nid;
 
-	entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
 	nid = entry & ((1UL << NODES_SHIFT) - 1);
 	entry >>= NODES_SHIFT;
 	memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
@@ -453,7 +452,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 		goto out_invalid;
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		if (node->slots[i]) {
-			if (WARN_ON_ONCE(!radix_tree_exceptional_entry(node->slots[i])))
+			if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
 				goto out_invalid;
 			if (WARN_ON_ONCE(!node->exceptional))
 				goto out_invalid;
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index f620c831a4b5..a5a4494922a6 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -19,7 +19,7 @@
 
 #include "test.h"
 
-#define DUMMY_PTR	((void *)0x12)
+#define DUMMY_PTR	((void *)0x10)
 
 int item_idr_free(int id, void *p, void *data)
 {
@@ -411,11 +411,11 @@ void ida_check_conv_user(void)
 		int id = ida_alloc(&ida, GFP_NOWAIT);
 		if (id == -ENOMEM) {
 			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
-					BITS_PER_LONG - 2);
+					BITS_PER_XA_VALUE);
 			id = ida_alloc(&ida, GFP_KERNEL);
 		} else {
 			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
-					BITS_PER_LONG - 2);
+					BITS_PER_XA_VALUE);
 		}
 		IDA_BUG_ON(&ida, id != i);
 	}
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
index 24f13d27a8da..d1635a5bef02 100644
--- a/tools/testing/radix-tree/linux/radix-tree.h
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -2,7 +2,6 @@
 #ifndef _TEST_RADIX_TREE_H
 #define _TEST_RADIX_TREE_H
 
-#include "generated/map-shift.h"
 #include "../../../../include/linux/radix-tree.h"
 
 extern int kmalloc_verbose;
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 7bf405638b0b..2b4f4dba1882 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -39,12 +39,11 @@ static void __multiorder_tag_test(int index, int order)
 
 	/*
 	 * Verify we get collisions for covered indices.  We try and fail to
-	 * insert an exceptional entry so we don't leak memory via
+	 * insert a value entry so we don't leak memory via
 	 * item_insert_order().
 	 */
 	for_each_index(i, base, order) {
-		err = __radix_tree_insert(&tree, i, order,
-				(void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY));
+		err = __radix_tree_insert(&tree, i, order, xa_mk_value(0xA0));
 		assert(err == -EEXIST);
 	}
 
@@ -380,8 +379,8 @@ static void multiorder_join1(unsigned long index,
 }
 
 /*
- * Check that the accounting of exceptional entries is handled correctly
- * by joining an exceptional entry to a normal pointer.
+ * Check that the accounting of value entries is handled correctly
+ * by joining a value entry to a normal pointer.
  */
 static void multiorder_join2(unsigned order1, unsigned order2)
 {
@@ -391,9 +390,9 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 	void *item2;
 
 	item_insert_order(&tree, 0, order2);
-	radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
+	radix_tree_insert(&tree, 1 << order2, xa_mk_value(5));
 	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == (void *)0x12UL);
+	assert(item2 == xa_mk_value(5));
 	assert(node->exceptional == 1);
 
 	item2 = radix_tree_lookup(&tree, 0);
@@ -407,7 +406,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 }
 
 /*
- * This test revealed an accounting bug for exceptional entries at one point.
+ * This test revealed an accounting bug for value entries at one point.
  * Nodes were being freed back into the pool with an elevated exception count
  * by radix_tree_join() and then radix_tree_split() was failing to zero the
  * count of exceptional entries.
@@ -421,16 +420,16 @@ static void multiorder_join3(unsigned int order)
 	unsigned long i;
 
 	for (i = 0; i < (1 << order); i++) {
-		radix_tree_insert(&tree, i, (void *)0x12UL);
+		radix_tree_insert(&tree, i, xa_mk_value(5));
 	}
 
-	radix_tree_join(&tree, 0, order, (void *)0x16UL);
+	radix_tree_join(&tree, 0, order, xa_mk_value(7));
 	rcu_barrier();
 
 	radix_tree_split(&tree, 0, 0);
 
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL);
+		radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(5));
 	}
 
 	__radix_tree_lookup(&tree, 0, &node, NULL);
@@ -517,10 +516,10 @@ static void __multiorder_split2(int old_order, int new_order)
 	struct radix_tree_node *node;
 	void *item;
 
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
+	assert(item == xa_mk_value(5));
 	assert(node->exceptional > 0);
 
 	radix_tree_split(&tree, 0, new_order);
@@ -530,7 +529,7 @@ static void __multiorder_split2(int old_order, int new_order)
 	}
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item != (void *)0x12);
+	assert(item != xa_mk_value(5));
 	assert(node->exceptional == 0);
 
 	item_kill_tree(&tree);
@@ -544,40 +543,40 @@ static void __multiorder_split3(int old_order, int new_order)
 	struct radix_tree_node *node;
 	void *item;
 
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
+	assert(item == xa_mk_value(5));
 	assert(node->exceptional > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
+		radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(7));
 	}
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x16);
+	assert(item == xa_mk_value(7));
 	assert(node->exceptional > 0);
 
 	item_kill_tree(&tree);
 
-	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == (void *)0x12);
+	assert(item == xa_mk_value(5));
 	assert(node->exceptional > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
 		if (iter.index == (1 << new_order))
 			radix_tree_iter_replace(&tree, &iter, slot,
-						(void *)0x16);
+						xa_mk_value(7));
 		else
 			radix_tree_iter_replace(&tree, &iter, slot, NULL);
 	}
 
 	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
-	assert(item == (void *)0x16);
+	assert(item == xa_mk_value(7));
 	assert(node->count == node->exceptional);
 	do {
 		node = node->parent;
@@ -610,13 +609,13 @@ static void multiorder_account(void)
 
 	item_insert_order(&tree, 0, 5);
 
-	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(node->count == node->exceptional * 2);
 	radix_tree_delete(&tree, 1 << 5);
 	assert(node->exceptional == 0);
 
-	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
 	assert(node->count == node->exceptional * 2);
 	__radix_tree_replace(&tree, node, slot, NULL, NULL);
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index def6015570b2..62de66c314b7 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -295,7 +295,7 @@ void item_kill_tree(struct radix_tree_root *root)
 	int nfound;
 
 	radix_tree_for_each_slot(slot, root, &iter, 0) {
-		if (radix_tree_exceptional_entry(*slot))
+		if (xa_is_value(*slot))
 			radix_tree_delete(root, iter.index);
 	}
 
-- 
cgit v1.2.3


From 02c02bf12c5d838603eed44195d3e91f094e2ab2 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 3 Nov 2017 23:09:45 -0400
Subject: xarray: Change definition of sibling entries

Instead of storing a pointer to the slot containing the canonical entry,
store the offset of the slot.  Produces slightly more efficient code
(~300 bytes) and simplifies the implementation.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 include/linux/radix-tree.h                    |  5 +-
 include/linux/xarray.h                        | 92 +++++++++++++++++++++++++++
 lib/Kconfig                                   |  7 ++
 lib/radix-tree.c                              | 64 ++++++-------------
 tools/testing/radix-tree/Makefile             |  2 +-
 tools/testing/radix-tree/generated/autoconf.h |  1 +
 6 files changed, 121 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index e9e76ab4fbbf..60f3d8eb2cb7 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -59,10 +59,7 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 #define RADIX_TREE_MAX_TAGS 3
 
-#ifndef RADIX_TREE_MAP_SHIFT
-#define RADIX_TREE_MAP_SHIFT	(CONFIG_BASE_SMALL ? 4 : 6)
-#endif
-
+#define RADIX_TREE_MAP_SHIFT	XA_CHUNK_SHIFT
 #define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
 #define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 5c8acfc4ff55..4d1cd7a083e8 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -22,6 +22,12 @@
  * x1: Value entry or tagged pointer
  *
  * Attempting to store internal entries in the XArray is a bug.
+ *
+ * Most internal entries are pointers to the next node in the tree.
+ * The following internal entries have a special meaning:
+ *
+ * 0-62: Sibling entries
+ * 256: Retry entry
  */
 
 #define BITS_PER_XA_VALUE	(BITS_PER_LONG - 1)
@@ -111,6 +117,42 @@ static inline unsigned int xa_pointer_tag(void *entry)
 	return (unsigned long)entry & 3UL;
 }
 
+/*
+ * xa_mk_internal() - Create an internal entry.
+ * @v: Value to turn into an internal entry.
+ *
+ * Context: Any context.
+ * Return: An XArray internal entry corresponding to this value.
+ */
+static inline void *xa_mk_internal(unsigned long v)
+{
+	return (void *)((v << 2) | 2);
+}
+
+/*
+ * xa_to_internal() - Extract the value from an internal entry.
+ * @entry: XArray entry.
+ *
+ * Context: Any context.
+ * Return: The value which was stored in the internal entry.
+ */
+static inline unsigned long xa_to_internal(const void *entry)
+{
+	return (unsigned long)entry >> 2;
+}
+
+/*
+ * xa_is_internal() - Is the entry an internal entry?
+ * @entry: XArray entry.
+ *
+ * Context: Any context.
+ * Return: %true if the entry is an internal entry.
+ */
+static inline bool xa_is_internal(const void *entry)
+{
+	return ((unsigned long)entry & 3) == 2;
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -123,4 +165,54 @@ static inline unsigned int xa_pointer_tag(void *entry)
 #define xa_unlock_irqrestore(xa, flags) \
 				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
 
+/* Everything below here is the Advanced API.  Proceed with caution. */
+
+/*
+ * The xarray is constructed out of a set of 'chunks' of pointers.  Choosing
+ * the best chunk size requires some tradeoffs.  A power of two recommends
+ * itself so that we can walk the tree based purely on shifts and masks.
+ * Generally, the larger the better; as the number of slots per level of the
+ * tree increases, the less tall the tree needs to be.  But that needs to be
+ * balanced against the memory consumption of each node.  On a 64-bit system,
+ * xa_node is currently 576 bytes, and we get 7 of them per 4kB page.  If we
+ * doubled the number of slots per node, we'd get only 3 nodes per 4kB page.
+ */
+#ifndef XA_CHUNK_SHIFT
+#define XA_CHUNK_SHIFT		(CONFIG_BASE_SMALL ? 4 : 6)
+#endif
+#define XA_CHUNK_SIZE		(1UL << XA_CHUNK_SHIFT)
+#define XA_CHUNK_MASK		(XA_CHUNK_SIZE - 1)
+
+/* Private */
+static inline bool xa_is_node(const void *entry)
+{
+	return xa_is_internal(entry) && (unsigned long)entry > 4096;
+}
+
+/* Private */
+static inline void *xa_mk_sibling(unsigned int offset)
+{
+	return xa_mk_internal(offset);
+}
+
+/* Private */
+static inline unsigned long xa_to_sibling(const void *entry)
+{
+	return xa_to_internal(entry);
+}
+
+/**
+ * xa_is_sibling() - Is the entry a sibling entry?
+ * @entry: Entry retrieved from the XArray
+ *
+ * Return: %true if the entry is a sibling entry.
+ */
+static inline bool xa_is_sibling(const void *entry)
+{
+	return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) &&
+		(entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
+}
+
+#define XA_RETRY_ENTRY		xa_mk_internal(256)
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index a3928d4438b5..40bfa6ccd294 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -399,8 +399,15 @@ config INTERVAL_TREE
 
 	  for more information.
 
+config XARRAY_MULTI
+	bool
+	help
+	  Support entries which occupy multiple consecutive indices in the
+	  XArray.
+
 config RADIX_TREE_MULTIORDER
 	bool
+	select XARRAY_MULTI
 
 config ASSOCIATIVE_ARRAY
 	bool
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index b6c0e7f3a894..59b28111eabc 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,6 +38,7 @@
 #include <linux/rcupdate.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/xarray.h>
 
 
 /* Number of nodes in fully populated tree of given height */
@@ -98,24 +99,7 @@ static inline void *node_to_entry(void *ptr)
 	return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
 }
 
-#define RADIX_TREE_RETRY	node_to_entry(NULL)
-
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/* Sibling slots point directly to another slot in the same node */
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
-	void __rcu **ptr = node;
-	return (parent->slots <= ptr) &&
-			(ptr < parent->slots + RADIX_TREE_MAP_SIZE);
-}
-#else
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
-	return false;
-}
-#endif
+#define RADIX_TREE_RETRY	XA_RETRY_ENTRY
 
 static inline unsigned long
 get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
@@ -129,16 +113,10 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
 	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	if (radix_tree_is_internal_node(entry)) {
-		if (is_sibling_entry(parent, entry)) {
-			void __rcu **sibentry;
-			sibentry = (void __rcu **) entry_to_node(entry);
-			offset = get_slot_offset(parent, sibentry);
-			entry = rcu_dereference_raw(*sibentry);
-		}
+	if (xa_is_sibling(entry)) {
+		offset = xa_to_sibling(entry);
+		entry = rcu_dereference_raw(parent->slots[offset]);
 	}
-#endif
 
 	*nodep = (void *)entry;
 	return offset;
@@ -300,10 +278,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 		} else if (!radix_tree_is_internal_node(entry)) {
 			pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
 					entry, i, first, last, node);
-		} else if (is_sibling_entry(node, entry)) {
+		} else if (xa_is_sibling(entry)) {
 			pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
 					entry, i, first, last, node,
-					*(void **)entry_to_node(entry));
+					node->slots[xa_to_sibling(entry)]);
 		} else {
 			dump_node(entry_to_node(entry), first);
 		}
@@ -881,8 +859,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 
 	for (;;) {
 		void *entry = rcu_dereference_raw(child->slots[offset]);
-		if (radix_tree_is_internal_node(entry) && child->shift &&
-				!is_sibling_entry(child, entry)) {
+		if (xa_is_node(entry) && child->shift) {
 			child = entry_to_node(entry);
 			offset = 0;
 			continue;
@@ -904,7 +881,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 static inline int insert_entries(struct radix_tree_node *node,
 		void __rcu **slot, void *item, unsigned order, bool replace)
 {
-	struct radix_tree_node *child;
+	void *sibling;
 	unsigned i, n, tag, offset, tags = 0;
 
 	if (node) {
@@ -922,7 +899,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 		offset = offset & ~(n - 1);
 		slot = &node->slots[offset];
 	}
-	child = node_to_entry(slot);
+	sibling = xa_mk_sibling(offset);
 
 	for (i = 0; i < n; i++) {
 		if (slot[i]) {
@@ -939,7 +916,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 	for (i = 0; i < n; i++) {
 		struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
 		if (i) {
-			rcu_assign_pointer(slot[i], child);
+			rcu_assign_pointer(slot[i], sibling);
 			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 				if (tags & (1 << tag))
 					tag_clear(node, tag, offset + i);
@@ -949,9 +926,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 				if (tags & (1 << tag))
 					tag_set(node, tag, offset);
 		}
-		if (radix_tree_is_internal_node(old) &&
-					!is_sibling_entry(node, old) &&
-					(old != RADIX_TREE_RETRY))
+		if (xa_is_node(old))
 			radix_tree_free_nodes(old);
 		if (xa_is_value(old))
 			node->exceptional--;
@@ -1112,10 +1087,10 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 				void __rcu **slot, int count, int exceptional)
 {
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-	void *ptr = node_to_entry(slot);
-	unsigned offset = get_slot_offset(node, slot) + 1;
+	unsigned offset = get_slot_offset(node, slot);
+	void *ptr = xa_mk_sibling(offset);
 
-	while (offset < RADIX_TREE_MAP_SIZE) {
+	while (++offset < RADIX_TREE_MAP_SIZE) {
 		if (rcu_dereference_raw(node->slots[offset]) != ptr)
 			break;
 		if (count < 0) {
@@ -1123,7 +1098,6 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 			node->count--;
 		}
 		node->exceptional += exceptional;
-		offset++;
 	}
 #endif
 }
@@ -1319,8 +1293,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 			tags |= 1 << tag;
 
 	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
-		if (!is_sibling_entry(parent,
-				rcu_dereference_raw(parent->slots[end])))
+		if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end])))
 			break;
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 			if (tags & (1 << tag))
@@ -1618,7 +1591,7 @@ static void __rcu **skip_siblings(struct radix_tree_node **nodep,
 {
 	while (iter->index < iter->next_index) {
 		*nodep = rcu_dereference_raw(*slot);
-		if (*nodep && !is_sibling_entry(iter->node, *nodep))
+		if (*nodep && !xa_is_sibling(*nodep))
 			return slot;
 		slot++;
 		iter->index = __radix_tree_iter_add(iter, 1);
@@ -1769,7 +1742,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 				while (++offset	< RADIX_TREE_MAP_SIZE) {
 					void *slot = rcu_dereference_raw(
 							node->slots[offset]);
-					if (is_sibling_entry(node, slot))
+					if (xa_is_sibling(slot))
 						continue;
 					if (slot)
 						break;
@@ -2283,6 +2256,7 @@ void __init radix_tree_init(void)
 
 	BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
 	BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
+	BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
 	radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 37baecc3766f..12adcf9ffe86 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -46,6 +46,6 @@ idr.c: ../../../lib/idr.c
 
 generated/map-shift.h:
 	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
-		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
+		echo "#define XA_CHUNK_SHIFT $(SHIFT)" >		\
 				generated/map-shift.h;			\
 	fi
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index cf88dc5b8832..ca8e03ad19ac 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1 +1,2 @@
 #define CONFIG_RADIX_TREE_MULTIORDER 1
+#define CONFIG_XARRAY_MULTI 1
-- 
cgit v1.2.3


From 492ecf6d6598e7c3b0b1372653ed21da50ddfb09 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@kernel.org>
Date: Wed, 12 Sep 2018 09:43:25 -0500
Subject: docs: fpga: document fpga manager flags

Add flags #defines to kerneldoc documentation in a
useful place.

Signed-off-by: Alan Tull <atull@kernel.org>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/fpga/fpga-mgr.rst |  5 +++++
 include/linux/fpga/fpga-mgr.h              | 20 ++++++++++++++------
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst
index 4b3825da48d9..82b6dbbd31cd 100644
--- a/Documentation/driver-api/fpga/fpga-mgr.rst
+++ b/Documentation/driver-api/fpga/fpga-mgr.rst
@@ -184,6 +184,11 @@ API for implementing a new FPGA Manager driver
 API for programming an FPGA
 ---------------------------
 
+FPGA Manager flags
+
+.. kernel-doc:: include/linux/fpga/fpga-mgr.h
+   :doc: FPGA Manager flags
+
 .. kernel-doc:: include/linux/fpga/fpga-mgr.h
    :functions: fpga_image_info
 
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 8942e61f0028..8ab5df769923 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -53,12 +53,20 @@ enum fpga_mgr_states {
 	FPGA_MGR_STATE_OPERATING,
 };
 
-/*
- * FPGA Manager flags
- * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported
- * FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting
- * FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first
- * FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed
+/**
+ * DOC: FPGA Manager flags
+ *
+ * Flags used in the &fpga_image_info->flags field
+ *
+ * %FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported
+ *
+ * %FPGA_MGR_EXTERNAL_CONFIG: FPGA has been configured prior to Linux booting
+ *
+ * %FPGA_MGR_ENCRYPTED_BITSTREAM: indicates bitstream is encrypted
+ *
+ * %FPGA_MGR_BITSTREAM_LSB_FIRST: SPI bitstream bit order is LSB first
+ *
+ * %FPGA_MGR_COMPRESSED_BITSTREAM: FPGA bitstream is compressed
  */
 #define FPGA_MGR_PARTIAL_RECONFIG	BIT(0)
 #define FPGA_MGR_EXTERNAL_CONFIG	BIT(1)
-- 
cgit v1.2.3


From 29efbc6aea9d9bd9aa9870a9afc1882046303cf9 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 19:07:27 +0200
Subject: Compiler Attributes: remove unused attributes

__optimize and __deprecate_for_modules are unused in
the whole kernel tree. Simply drop them.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-gcc.h   | 2 --
 include/linux/compiler.h       | 4 ----
 include/linux/compiler_types.h | 1 -
 3 files changed, 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 4d36b27214fd..1302b425e625 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -81,8 +81,6 @@
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
 
-#define __optimize(level)	__attribute__((__optimize__(level)))
-
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
 #ifndef __CHECKER__
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 681d866efb1e..7c0157d50964 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -301,10 +301,6 @@ static inline void *offset_to_ptr(const int *off)
 
 #endif /* __ASSEMBLY__ */
 
-#ifndef __optimize
-# define __optimize(level)
-#endif
-
 /* Compile time object size, -1 for unknown */
 #ifndef __compiletime_object_size
 # define __compiletime_object_size(obj) -1
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index db192becfec4..a19562cb047c 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -108,7 +108,6 @@ struct ftrace_likely_data {
 
 /* Don't. Just don't. */
 #define __deprecated
-#define __deprecated_for_modules
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From 5c67a52f3da0f0d22764f2daec417702695a8112 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 19:13:37 +0200
Subject: Compiler Attributes: always use the extra-underscores syntax

The attribute syntax optionally allows to surround attribute names
with "__" in order to avoid collisions with macros of the same name
(see https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html).

This homogenizes all attributes to use the syntax with underscores.
While there are currently only a handful of cases of some TUs defining
macros like "error" which may collide with the attributes,
this should prevent futures surprises.

This has been done only for "standard" attributes supported by
the major compilers. In other words, those of third-party tools
(e.g. sparse, plugins...) have not been changed for the moment.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-clang.h |  2 +-
 include/linux/compiler-gcc.h   | 12 ++++++------
 include/linux/compiler-intel.h |  2 +-
 include/linux/compiler.h       |  8 ++++----
 include/linux/compiler_types.h | 42 +++++++++++++++++++++---------------------
 5 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index b1ce500fe8b3..d11cad61ba5c 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -21,7 +21,7 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
-#define __no_sanitize_address __attribute__((no_sanitize("address")))
+#define __no_sanitize_address __attribute__((__no_sanitize__("address")))
 
 /*
  * Not all versions of clang implement the the type-generic versions
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 1302b425e625..7a1de7683df5 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -76,7 +76,7 @@
 #endif
 
 #ifdef RETPOLINE
-#define __noretpoline __attribute__((indirect_branch("keep")))
+#define __noretpoline __attribute__((__indirect_branch__("keep")))
 #endif
 
 #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
@@ -84,8 +84,8 @@
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
 #ifndef __CHECKER__
-#define __compiletime_warning(message) __attribute__((warning(message)))
-#define __compiletime_error(message) __attribute__((error(message)))
+#define __compiletime_warning(message) __attribute__((__warning__(message)))
+#define __compiletime_error(message) __attribute__((__error__(message)))
 
 #ifdef LATENT_ENTROPY_PLUGIN
 #define __latent_entropy __attribute__((latent_entropy))
@@ -134,7 +134,7 @@
  * optimizer that something else uses this function or variable, thus preventing
  * this.
  */
-#define __visible	__attribute__((externally_visible))
+#define __visible	__attribute__((__externally_visible__))
 
 /* gcc version specific checks */
 
@@ -191,7 +191,7 @@
  * should not be applied to that function.
  * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
  */
-#define __no_sanitize_address __attribute__((no_sanitize_address))
+#define __no_sanitize_address __attribute__((__no_sanitize_address__))
 #endif
 
 #if GCC_VERSION >= 50100
@@ -199,7 +199,7 @@
  * Mark structures as requiring designated initializers.
  * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
  */
-#define __designated_init __attribute__((designated_init))
+#define __designated_init __attribute__((__designated_init__))
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
 
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 4c7f9befa9f6..fef8bb3e53ef 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -42,4 +42,4 @@
  * and may be redefined here because they should not be shared with other
  * compilers, like clang.
  */
-#define __visible	__attribute__((externally_visible))
+#define __visible	__attribute__((__externally_visible__))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 7c0157d50964..ec4a28bad2c6 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -24,7 +24,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 			long ______r;					\
 			static struct ftrace_likely_data		\
 				__attribute__((__aligned__(4)))		\
-				__attribute__((section("_ftrace_annotated_branch"))) \
+				__attribute__((__section__("_ftrace_annotated_branch"))) \
 				______f = {				\
 				.data.func = __func__,			\
 				.data.file = __FILE__,			\
@@ -60,7 +60,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 		int ______r;						\
 		static struct ftrace_branch_data			\
 			__attribute__((__aligned__(4)))			\
-			__attribute__((section("_ftrace_branch")))	\
+			__attribute__((__section__("_ftrace_branch")))	\
 			______f = {					\
 				.func = __func__,			\
 				.file = __FILE__,			\
@@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	extern typeof(sym) sym;					\
 	static const unsigned long __kentry_##sym		\
 	__used							\
-	__attribute__((section("___kentry" "+" #sym ), used))	\
+	__attribute__((__section__("___kentry" "+" #sym ), used))	\
 	= (unsigned long)&sym;
 #endif
 
@@ -287,7 +287,7 @@ unsigned long read_word_at_a_time(const void *addr)
  * visible to the compiler.
  */
 #define __ADDRESSABLE(sym) \
-	static void * __attribute__((section(".discard.addressable"), used)) \
+	static void * __attribute__((__section__(".discard.addressable"), used)) \
 		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
 
 /**
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index a19562cb047c..8fbdd47dd3d0 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -195,26 +195,26 @@ struct ftrace_likely_data {
  * would be.
  * [...]
  */
-#define __pure			__attribute__((pure))
-#define __aligned(x)		__attribute__((aligned(x)))
-#define __aligned_largest	__attribute__((aligned))
-#define __printf(a, b)		__attribute__((format(printf, a, b)))
-#define __scanf(a, b)		__attribute__((format(scanf, a, b)))
-#define __maybe_unused		__attribute__((unused))
-#define __always_unused		__attribute__((unused))
-#define __mode(x)		__attribute__((mode(x)))
+#define __pure			__attribute__((__pure__))
+#define __aligned(x)		__attribute__((__aligned__(x)))
+#define __aligned_largest	__attribute__((__aligned__))
+#define __printf(a, b)		__attribute__((__format__(printf, a, b)))
+#define __scanf(a, b)		__attribute__((__format__(scanf, a, b)))
+#define __maybe_unused		__attribute__((__unused__))
+#define __always_unused		__attribute__((__unused__))
+#define __mode(x)		__attribute__((__mode__(x)))
 #define __malloc		__attribute__((__malloc__))
 #define __used			__attribute__((__used__))
-#define __noreturn		__attribute__((noreturn))
-#define __packed		__attribute__((packed))
-#define __weak			__attribute__((weak))
-#define __alias(symbol)		__attribute__((alias(#symbol)))
-#define __cold			__attribute__((cold))
+#define __noreturn		__attribute__((__noreturn__))
+#define __packed		__attribute__((__packed__))
+#define __weak			__attribute__((__weak__))
+#define __alias(symbol)		__attribute__((__alias__(#symbol)))
+#define __cold			__attribute__((__cold__))
 #define __section(S)		__attribute__((__section__(#S)))
 
 
 #ifdef CONFIG_ENABLE_MUST_CHECK
-#define __must_check		__attribute__((warn_unused_result))
+#define __must_check		__attribute__((__warn_unused_result__))
 #else
 #define __must_check
 #endif
@@ -222,7 +222,7 @@ struct ftrace_likely_data {
 #if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
 #define notrace			__attribute__((hotpatch(0, 0)))
 #else
-#define notrace			__attribute__((no_instrument_function))
+#define notrace			__attribute__((__no_instrument_function__))
 #endif
 
 /*
@@ -231,7 +231,7 @@ struct ftrace_likely_data {
  * stack and frame pointer being set up and there is no chance to
  * restore the lr register to the value before mcount was called.
  */
-#define __naked			__attribute__((naked)) notrace
+#define __naked			__attribute__((__naked__)) notrace
 
 #define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
 
@@ -242,7 +242,7 @@ struct ftrace_likely_data {
  * defined so the gnu89 semantics are the default.
  */
 #ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline	__attribute__((gnu_inline))
+# define __gnu_inline	__attribute__((__gnu_inline__))
 #else
 # define __gnu_inline
 #endif
@@ -262,17 +262,17 @@ struct ftrace_likely_data {
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
 	!defined(CONFIG_OPTIMIZE_INLINING)
 #define inline \
-	inline __attribute__((always_inline, unused)) notrace __gnu_inline
+	inline __attribute__((__always_inline__, __unused__)) notrace __gnu_inline
 #else
-#define inline inline	__attribute__((unused)) notrace __gnu_inline
+#define inline inline	__attribute__((__unused__)) notrace __gnu_inline
 #endif
 
 #define __inline__ inline
 #define __inline inline
-#define noinline	__attribute__((noinline))
+#define noinline	__attribute__((__noinline__))
 
 #ifndef __always_inline
-#define __always_inline inline __attribute__((always_inline))
+#define __always_inline inline __attribute__((__always_inline__))
 #endif
 
 /*
-- 
cgit v1.2.3


From c2c640aa04cc4e6caf0ff17ff18b3784e0c99566 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 19:45:01 +0200
Subject: Compiler Attributes: remove unneeded tests

Attributes const and always_inline have tests around them
which are unneeded, since they are supported by gcc >= 4.6,
clang >= 3 and icc >= 13. https://godbolt.org/z/DFPq37

In the case of gnu_inline, we do not need to test for
__GNUC_STDC_INLINE__ because, regardless of the current
inlining behavior, we can simply always force the old
GCC inlining behavior by using the attribute in all cases.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 8fbdd47dd3d0..5ff9cda893f4 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -158,10 +158,6 @@ struct ftrace_likely_data {
 	(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
 	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 
-#ifndef __attribute_const__
-#define __attribute_const__	__attribute__((__const__))
-#endif
-
 #ifndef __noclone
 #define __noclone
 #endif
@@ -196,6 +192,7 @@ struct ftrace_likely_data {
  * [...]
  */
 #define __pure			__attribute__((__pure__))
+#define __attribute_const__	__attribute__((__const__))
 #define __aligned(x)		__attribute__((__aligned__(x)))
 #define __aligned_largest	__attribute__((__aligned__))
 #define __printf(a, b)		__attribute__((__format__(printf, a, b)))
@@ -211,6 +208,8 @@ struct ftrace_likely_data {
 #define __alias(symbol)		__attribute__((__alias__(#symbol)))
 #define __cold			__attribute__((__cold__))
 #define __section(S)		__attribute__((__section__(#S)))
+#define __always_inline		inline __attribute__((__always_inline__))
+#define __gnu_inline		__attribute__((__gnu_inline__))
 
 
 #ifdef CONFIG_ENABLE_MUST_CHECK
@@ -235,18 +234,6 @@ struct ftrace_likely_data {
 
 #define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
 
-/*
- * Feature detection for gnu_inline (gnu89 extern inline semantics). Either
- * __GNUC_STDC_INLINE__ is defined (not using gnu89 extern inline semantics,
- * and we opt in to the gnu89 semantics), or __GNUC_STDC_INLINE__ is not
- * defined so the gnu89 semantics are the default.
- */
-#ifdef __GNUC_STDC_INLINE__
-# define __gnu_inline	__attribute__((__gnu_inline__))
-#else
-# define __gnu_inline
-#endif
-
 /*
  * Force always-inline if the user requests it so via the .config.
  * GCC does not warn about unused static inline functions for
@@ -271,10 +258,6 @@ struct ftrace_likely_data {
 #define __inline inline
 #define noinline	__attribute__((__noinline__))
 
-#ifndef __always_inline
-#define __always_inline inline __attribute__((__always_inline__))
-#endif
-
 /*
  * Rather then using noinline to prevent stack consumption, use
  * noinline_for_stack instead.  For documentation reasons.
-- 
cgit v1.2.3


From ec0bbef66f867854691d5af18c2231d746958e0e Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 19:25:14 +0200
Subject: Compiler Attributes: homogenize __must_be_array

Different definitions of __must_be_array:

  * gcc: disabled for __CHECKER__

  * clang: same definition as gcc's, but without __CHECKER__

  * intel: the comment claims __builtin_types_compatible_p()
    is unsupported; but icc seems to support it since 13.0.1
    (released in 2012). See https://godbolt.org/z/S0l6QQ

Therefore, we can remove all of them and have a single definition
in compiler.h

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-clang.h | 1 -
 include/linux/compiler-gcc.h   | 7 -------
 include/linux/compiler-intel.h | 3 ---
 include/linux/compiler.h       | 7 +++++++
 4 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index d11cad61ba5c..fa9532f8d885 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -41,6 +41,5 @@
  * compilers, like ICC.
  */
 #define barrier() __asm__ __volatile__("" : : : "memory")
-#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
 #define __assume_aligned(a, ...)	\
 	__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 7a1de7683df5..3b32bbfa5a49 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -68,13 +68,6 @@
  */
 #define uninitialized_var(x) x = x
 
-#ifdef __CHECKER__
-#define __must_be_array(a)	0
-#else
-/* &a[0] degrades to a pointer: a different type from an array */
-#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-#endif
-
 #ifdef RETPOLINE
 #define __noretpoline __attribute__((__indirect_branch__("keep")))
 #endif
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index fef8bb3e53ef..6004b4588bd4 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -29,9 +29,6 @@
  */
 #define OPTIMIZER_HIDE_VAR(var) barrier()
 
-/* Intel ECC compiler doesn't support __builtin_types_compatible_p() */
-#define __must_be_array(a) 0
-
 #endif
 
 /* icc has this, but it's called _bswap16 */
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index ec4a28bad2c6..165b1d5683ed 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -357,4 +357,11 @@ static inline void *offset_to_ptr(const int *off)
 	compiletime_assert(__native_word(t),				\
 		"Need native word sized stores/loads for atomicity.")
 
+#ifdef __CHECKER__
+#define __must_be_array(a)	0
+#else
+/* &a[0] degrades to a pointer: a different type from an array */
+#define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+#endif
+
 #endif /* __LINUX_COMPILER_H */
-- 
cgit v1.2.3


From 989bd5000f36052df604888ed12bb6ef390786b7 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Fri, 31 Aug 2018 18:00:16 +0200
Subject: Compiler Attributes: remove unneeded sparse (__CHECKER__) tests

Sparse knows about a few more attributes now, so we can remove
the __CHECKER__ conditions from them (which, in turn, allow us
to move some of them later on to compiler_attributes.h).

  * assume_aligned: since sparse's commit ffc860b ("sparse:
    ignore __assume_aligned__ attribute"), included in 0.5.1

  * error: since sparse's commit 0a04210 ("sparse: Add 'error'
    to ignored attributes"), included in 0.5.0

  * hotpatch: since sparse's commit 6043210 ("sparse/parse.c:
    ignore hotpatch attribute"), included in 0.5.1

  * warning: since sparse's commit 977365d ("Avoid "attribute
    'warning': unknown attribute" warning"), included in 0.4.2

On top of that, __must_be_array does not need it either because:

  * Even ancient versions of sparse do not have a problem

  * BUILD_BUG_ON_ZERO() is currently disabled for __CHECKER__

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-gcc.h   | 6 ++----
 include/linux/compiler.h       | 4 ----
 include/linux/compiler_types.h | 2 +-
 3 files changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 3b32bbfa5a49..1ca6a51cfaa9 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -76,14 +76,12 @@
 
 #define __compiletime_object_size(obj) __builtin_object_size(obj, 0)
 
-#ifndef __CHECKER__
 #define __compiletime_warning(message) __attribute__((__warning__(message)))
 #define __compiletime_error(message) __attribute__((__error__(message)))
 
-#ifdef LATENT_ENTROPY_PLUGIN
+#if defined(LATENT_ENTROPY_PLUGIN) && !defined(__CHECKER__)
 #define __latent_entropy __attribute__((latent_entropy))
 #endif
-#endif /* __CHECKER__ */
 
 /*
  * calling noreturn functions, __builtin_unreachable() and __builtin_trap()
@@ -131,7 +129,7 @@
 
 /* gcc version specific checks */
 
-#if GCC_VERSION >= 40900 && !defined(__CHECKER__)
+#if GCC_VERSION >= 40900
 /*
  * __assume_aligned(n, k): Tell the optimizer that the returned
  * pointer can be assumed to be k modulo n. The second argument is
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 165b1d5683ed..4030a2940d6b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -357,11 +357,7 @@ static inline void *offset_to_ptr(const int *off)
 	compiletime_assert(__native_word(t),				\
 		"Need native word sized stores/loads for atomicity.")
 
-#ifdef __CHECKER__
-#define __must_be_array(a)	0
-#else
 /* &a[0] degrades to a pointer: a different type from an array */
 #define __must_be_array(a)	BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
-#endif
 
 #endif /* __LINUX_COMPILER_H */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 5ff9cda893f4..a3eceb3ad1b3 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -218,7 +218,7 @@ struct ftrace_likely_data {
 #define __must_check
 #endif
 
-#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__)
+#if defined(CC_USING_HOTPATCH)
 #define notrace			__attribute__((hotpatch(0, 0)))
 #else
 #define notrace			__attribute__((__no_instrument_function__))
-- 
cgit v1.2.3


From 66dbeef915f275dad6c8656b31667ef9640f5639 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Mon, 3 Sep 2018 18:34:18 +0200
Subject: Compiler Attributes: add missing SPDX ID in compiler_types.h

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index a3eceb3ad1b3..ed7c0e4a180e 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __LINUX_COMPILER_TYPES_H
 #define __LINUX_COMPILER_TYPES_H
 
-- 
cgit v1.2.3


From a3f8a30f3f0079c7edfc72e329eee8594fb3e3cb Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Thu, 30 Aug 2018 20:36:59 +0200
Subject: Compiler Attributes: use feature checks instead of version checks

Instead of using version checks per-compiler to define (or not)
each attribute, use __has_attribute to test for them, following
the cleanup started with commit 815f0ddb346c
("include/linux/compiler*.h: make compiler-*.h mutually exclusive"),
which is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
In the meantime, to support 4.6 <= gcc < 5, we implement
__has_attribute by hand.

All the attributes that can be unconditionally defined and directly
map to compiler attribute(s) (even if optional) have been moved
to a new file include/linux/compiler_attributes.h

In an effort to make the file as regular as possible, comments
stating the purpose of attributes have been removed. Instead,
links to the compiler docs have been added (i.e. to gcc and,
if available, to clang as well). In addition, they have been sorted.

Finally, if an attribute is optional (i.e. if it is guarded
by __has_attribute), the reason has been stated for future reference.

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-clang.h      |   4 -
 include/linux/compiler-gcc.h        |  51 --------
 include/linux/compiler-intel.h      |   6 -
 include/linux/compiler_attributes.h | 244 ++++++++++++++++++++++++++++++++++++
 include/linux/compiler_types.h      |  74 ++---------
 5 files changed, 254 insertions(+), 125 deletions(-)
 create mode 100644 include/linux/compiler_attributes.h

(limited to 'include/linux')

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index fa9532f8d885..3e7dafb3ea80 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -21,8 +21,6 @@
 #define __SANITIZE_ADDRESS__
 #endif
 
-#define __no_sanitize_address __attribute__((__no_sanitize__("address")))
-
 /*
  * Not all versions of clang implement the the type-generic versions
  * of the builtin overflow checkers. Fortunately, clang implements
@@ -41,5 +39,3 @@
  * compilers, like ICC.
  */
 #define barrier() __asm__ __volatile__("" : : : "memory")
-#define __assume_aligned(a, ...)	\
-	__attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 1ca6a51cfaa9..cfac027e1625 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -108,9 +108,6 @@
 		__builtin_unreachable();	\
 	} while (0)
 
-/* Mark a function definition as prohibited from being cloned. */
-#define __noclone	__attribute__((__noclone__, __optimize__("no-tracer")))
-
 #if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__)
 #define __randomize_layout __attribute__((randomize_layout))
 #define __no_randomize_layout __attribute__((no_randomize_layout))
@@ -119,32 +116,6 @@
 #define randomized_struct_fields_end	} __randomize_layout;
 #endif
 
-/*
- * When used with Link Time Optimization, gcc can optimize away C functions or
- * variables which are referenced only from assembly code.  __visible tells the
- * optimizer that something else uses this function or variable, thus preventing
- * this.
- */
-#define __visible	__attribute__((__externally_visible__))
-
-/* gcc version specific checks */
-
-#if GCC_VERSION >= 40900
-/*
- * __assume_aligned(n, k): Tell the optimizer that the returned
- * pointer can be assumed to be k modulo n. The second argument is
- * optional (default 0), so we use a variadic macro to make the
- * shorthand.
- *
- * Beware: Do not apply this to functions which may return
- * ERR_PTRs. Also, it is probably unwise to apply it to functions
- * returning extra information in the low bits (but in that case the
- * compiler should see some alignment anyway, when the return value is
- * massaged by 'flags = ptr & 3; ptr &= ~3;').
- */
-#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
-#endif
-
 /*
  * GCC 'asm goto' miscompiles certain code sequences:
  *
@@ -176,32 +147,10 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
-#if GCC_VERSION >= 40902
-/*
- * Tell the compiler that address safety instrumentation (KASAN)
- * should not be applied to that function.
- * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- */
-#define __no_sanitize_address __attribute__((__no_sanitize_address__))
-#endif
-
 #if GCC_VERSION >= 50100
-/*
- * Mark structures as requiring designated initializers.
- * https://gcc.gnu.org/onlinedocs/gcc/Designated-Inits.html
- */
-#define __designated_init __attribute__((__designated_init__))
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
 
-#if !defined(__noclone)
-#define __noclone	/* not needed */
-#endif
-
-#if !defined(__no_sanitize_address)
-#define __no_sanitize_address
-#endif
-
 /*
  * Turn individual warnings and errors on and off locally, depending
  * on version.
diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h
index 6004b4588bd4..517bd14e1222 100644
--- a/include/linux/compiler-intel.h
+++ b/include/linux/compiler-intel.h
@@ -34,9 +34,3 @@
 /* icc has this, but it's called _bswap16 */
 #define __HAVE_BUILTIN_BSWAP16__
 #define __builtin_bswap16 _bswap16
-
-/* The following are for compatibility with GCC, from compiler-gcc.h,
- * and may be redefined here because they should not be shared with other
- * compilers, like clang.
- */
-#define __visible	__attribute__((__externally_visible__))
diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
new file mode 100644
index 000000000000..f0f9fc398440
--- /dev/null
+++ b/include/linux/compiler_attributes.h
@@ -0,0 +1,244 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_COMPILER_ATTRIBUTES_H
+#define __LINUX_COMPILER_ATTRIBUTES_H
+
+/*
+ * The attributes in this file are unconditionally defined and they directly
+ * map to compiler attribute(s) -- except those that are optional.
+ *
+ * Any other "attributes" (i.e. those that depend on a configuration option,
+ * on a compiler, on an architecture, on plugins, on other attributes...)
+ * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h).
+ *
+ * This file is meant to be sorted (by actual attribute name,
+ * not by #define identifier). Use the __attribute__((__name__)) syntax
+ * (i.e. with underscores) to avoid future collisions with other macros.
+ * If an attribute is optional, state the reason in the comment.
+ */
+
+/*
+ * To check for optional attributes, we use __has_attribute, which is supported
+ * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support
+ * 4.6 <= gcc < 5, we implement __has_attribute by hand.
+ *
+ * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__
+ * depending on the compiler used to build it; however, these attributes have
+ * no semantic effects for sparse, so it does not matter. Also note that,
+ * in order to avoid sparse's warnings, even the unsupported ones must be
+ * defined to 0.
+ */
+#ifndef __has_attribute
+# define __has_attribute(x) __GCC4_has_attribute_##x
+# define __GCC4_has_attribute___assume_aligned__      (__GNUC_MINOR__ >= 9)
+# define __GCC4_has_attribute___designated_init__     0
+# define __GCC4_has_attribute___externally_visible__  1
+# define __GCC4_has_attribute___noclone__             1
+# define __GCC4_has_attribute___optimize__            1
+# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-alias-function-attribute
+ */
+#define __alias(symbol)                 __attribute__((__alias__(#symbol)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-aligned-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-aligned-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-aligned-variable-attribute
+ */
+#define __aligned(x)                    __attribute__((__aligned__(x)))
+#define __aligned_largest               __attribute__((__aligned__))
+
+/*
+ * Note: users of __always_inline currently do not write "inline" themselves,
+ * which seems to be required by gcc to apply the attribute according
+ * to its docs (and also "warning: always_inline function might not be
+ * inlinable [-Wattributes]" is emitted).
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-always_005finline-function-attribute
+ * clang: mentioned
+ */
+#define __always_inline                 inline __attribute__((__always_inline__))
+
+/*
+ * The second argument is optional (default 0), so we use a variadic macro
+ * to make the shorthand.
+ *
+ * Beware: Do not apply this to functions which may return
+ * ERR_PTRs. Also, it is probably unwise to apply it to functions
+ * returning extra information in the low bits (but in that case the
+ * compiler should see some alignment anyway, when the return value is
+ * massaged by 'flags = ptr & 3; ptr &= ~3;').
+ *
+ * Optional: only supported since gcc >= 4.9
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-assume_005faligned-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#assume-aligned
+ */
+#if __has_attribute(__assume_aligned__)
+# define __assume_aligned(a, ...)       __attribute__((__assume_aligned__(a, ## __VA_ARGS__)))
+#else
+# define __assume_aligned(a, ...)
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-cold-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-cold-label-attribute
+ */
+#define __cold                          __attribute__((__cold__))
+
+/*
+ * Note the long name.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-const-function-attribute
+ */
+#define __attribute_const__             __attribute__((__const__))
+
+/*
+ * Don't. Just don't. See commit 771c035372a0 ("deprecate the '__deprecated'
+ * attribute warnings entirely and for good") for more information.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-deprecated-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-deprecated-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-deprecated-variable-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Enumerator-Attributes.html#index-deprecated-enumerator-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#deprecated
+ */
+#define __deprecated
+
+/*
+ * Optional: only supported since gcc >= 5.1
+ * Optional: not supported by clang
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-designated_005finit-type-attribute
+ */
+#if __has_attribute(__designated_init__)
+# define __designated_init              __attribute__((__designated_init__))
+#else
+# define __designated_init
+#endif
+
+/*
+ * Optional: not supported by clang
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-externally_005fvisible-function-attribute
+ */
+#if __has_attribute(__externally_visible__)
+# define __visible                      __attribute__((__externally_visible__))
+#else
+# define __visible
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-format-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#format
+ */
+#define __printf(a, b)                  __attribute__((__format__(printf, a, b)))
+#define __scanf(a, b)                   __attribute__((__format__(scanf, a, b)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-gnu_005finline-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#gnu-inline
+ */
+#define __gnu_inline                    __attribute__((__gnu_inline__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-malloc-function-attribute
+ */
+#define __malloc                        __attribute__((__malloc__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-mode-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-mode-variable-attribute
+ */
+#define __mode(x)                       __attribute__((__mode__(x)))
+
+/*
+ * Optional: not supported by clang
+ * Note: icc does not recognize gcc's no-tracer
+ *
+ *  gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noclone-function-attribute
+ *  gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-optimize-function-attribute
+ */
+#if __has_attribute(__noclone__)
+# if __has_attribute(__optimize__)
+#  define __noclone                     __attribute__((__noclone__, __optimize__("no-tracer")))
+# else
+#  define __noclone                     __attribute__((__noclone__))
+# endif
+#else
+# define __noclone
+#endif
+
+/*
+ * Note the missing underscores.
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute
+ * clang: mentioned
+ */
+#define   noinline                      __attribute__((__noinline__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#id1
+ */
+#define __noreturn                      __attribute__((__noreturn__))
+
+/*
+ * Optional: only supported since gcc >= 4.8
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-no_005fsanitize_005faddress-function-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#no-sanitize-address-no-address-safety-analysis
+ */
+#if __has_attribute(__no_sanitize_address__)
+# define __no_sanitize_address          __attribute__((__no_sanitize_address__))
+#else
+# define __no_sanitize_address
+#endif
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-packed-type-attribute
+ * clang: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-packed-variable-attribute
+ */
+#define __packed                        __attribute__((__packed__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-pure-function-attribute
+ */
+#define __pure                          __attribute__((__pure__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-section-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-section-variable-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#section-declspec-allocate
+ */
+#define __section(S)                    __attribute__((__section__(#S)))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-unused-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html#index-unused-type-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-unused-variable-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Label-Attributes.html#index-unused-label-attribute
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#maybe-unused-unused
+ */
+#define __always_unused                 __attribute__((__unused__))
+#define __maybe_unused                  __attribute__((__unused__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-used-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-used-variable-attribute
+ */
+#define __used                          __attribute__((__used__))
+
+/*
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-weak-function-attribute
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-weak-variable-attribute
+ */
+#define __weak                          __attribute__((__weak__))
+
+#endif /* __LINUX_COMPILER_ATTRIBUTES_H */
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index ed7c0e4a180e..3439d7d0249a 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -55,6 +55,9 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 
 #ifdef __KERNEL__
 
+/* Attributes */
+#include <linux/compiler_attributes.h>
+
 /* Compiler specific macros. */
 #ifdef __clang__
 #include <linux/compiler-clang.h>
@@ -79,12 +82,6 @@ extern void __chk_io_ptr(const volatile void __iomem *);
 #include <asm/compiler.h>
 #endif
 
-/*
- * Generic compiler-independent macros required for kernel
- * build go below this comment. Actual compiler/compiler version
- * specific implementations come from the above header files
- */
-
 struct ftrace_branch_data {
 	const char *func;
 	const char *file;
@@ -107,9 +104,6 @@ struct ftrace_likely_data {
 	unsigned long			constant;
 };
 
-/* Don't. Just don't. */
-#define __deprecated
-
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
@@ -119,10 +113,6 @@ struct ftrace_likely_data {
  * compilers. We don't consider that to be an error, so set them to nothing.
  * For example, some of them are for compiler specific plugins.
  */
-#ifndef __designated_init
-# define __designated_init
-#endif
-
 #ifndef __latent_entropy
 # define __latent_entropy
 #endif
@@ -140,17 +130,6 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
-#ifndef __visible
-#define __visible
-#endif
-
-/*
- * Assume alignment of return value.
- */
-#ifndef __assume_aligned
-#define __assume_aligned(a, ...)
-#endif
-
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
@@ -159,10 +138,6 @@ struct ftrace_likely_data {
 	(sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \
 	 sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
 
-#ifndef __noclone
-#define __noclone
-#endif
-
 /* Helpers for emitting diagnostics in pragmas. */
 #ifndef __diag
 #define __diag(string)
@@ -182,37 +157,6 @@ struct ftrace_likely_data {
 #define __diag_error(compiler, version, option, comment) \
 	__diag_ ## compiler(version, error, option)
 
-/*
- * From the GCC manual:
- *
- * Many functions have no effects except the return value and their
- * return value depends only on the parameters and/or global
- * variables.  Such a function can be subject to common subexpression
- * elimination and loop optimization just as an arithmetic operator
- * would be.
- * [...]
- */
-#define __pure			__attribute__((__pure__))
-#define __attribute_const__	__attribute__((__const__))
-#define __aligned(x)		__attribute__((__aligned__(x)))
-#define __aligned_largest	__attribute__((__aligned__))
-#define __printf(a, b)		__attribute__((__format__(printf, a, b)))
-#define __scanf(a, b)		__attribute__((__format__(scanf, a, b)))
-#define __maybe_unused		__attribute__((__unused__))
-#define __always_unused		__attribute__((__unused__))
-#define __mode(x)		__attribute__((__mode__(x)))
-#define __malloc		__attribute__((__malloc__))
-#define __used			__attribute__((__used__))
-#define __noreturn		__attribute__((__noreturn__))
-#define __packed		__attribute__((__packed__))
-#define __weak			__attribute__((__weak__))
-#define __alias(symbol)		__attribute__((__alias__(#symbol)))
-#define __cold			__attribute__((__cold__))
-#define __section(S)		__attribute__((__section__(#S)))
-#define __always_inline		inline __attribute__((__always_inline__))
-#define __gnu_inline		__attribute__((__gnu_inline__))
-
-
 #ifdef CONFIG_ENABLE_MUST_CHECK
 #define __must_check		__attribute__((__warn_unused_result__))
 #else
@@ -246,18 +190,20 @@ struct ftrace_likely_data {
  * semantics rather than c99. This prevents multiple symbol definition errors
  * of extern inline functions at link time.
  * A lot of inline functions can cause havoc with function tracing.
+ * Do not use __always_inline here, since currently it expands to inline again
+ * (which would break users of __always_inline).
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
 	!defined(CONFIG_OPTIMIZE_INLINING)
-#define inline \
-	inline __attribute__((__always_inline__, __unused__)) notrace __gnu_inline
+#define inline inline __attribute__((__always_inline__)) __gnu_inline \
+	__maybe_unused notrace
 #else
-#define inline inline	__attribute__((__unused__)) notrace __gnu_inline
+#define inline inline                                    __gnu_inline \
+	__maybe_unused notrace
 #endif
 
 #define __inline__ inline
-#define __inline inline
-#define noinline	__attribute__((__noinline__))
+#define __inline   inline
 
 /*
  * Rather then using noinline to prevent stack consumption, use
-- 
cgit v1.2.3


From 06e3727e02f9ee9cf571692cd5c74fc5a8a2af52 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Mon, 3 Sep 2018 19:22:13 +0200
Subject: Compiler Attributes: KENTRY used twice the "used" attribute

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 4030a2940d6b..17ee9165ca51 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	extern typeof(sym) sym;					\
 	static const unsigned long __kentry_##sym		\
 	__used							\
-	__attribute__((__section__("___kentry" "+" #sym ), used))	\
+	__attribute__((__section__("___kentry" "+" #sym )))	\
 	= (unsigned long)&sym;
 #endif
 
-- 
cgit v1.2.3


From e04462fb82f8dd98288c0e7ab1eec79c92537d25 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Mon, 3 Sep 2018 19:17:50 +0200
Subject: Compiler Attributes: remove uses of __attribute__ from compiler.h

Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 17ee9165ca51..b5fb034fa6fa 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -23,8 +23,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 #define __branch_check__(x, expect, is_constant) ({			\
 			long ______r;					\
 			static struct ftrace_likely_data		\
-				__attribute__((__aligned__(4)))		\
-				__attribute__((__section__("_ftrace_annotated_branch"))) \
+				__aligned(4)				\
+				__section("_ftrace_annotated_branch")	\
 				______f = {				\
 				.data.func = __func__,			\
 				.data.file = __FILE__,			\
@@ -59,8 +59,8 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	({								\
 		int ______r;						\
 		static struct ftrace_branch_data			\
-			__attribute__((__aligned__(4)))			\
-			__attribute__((__section__("_ftrace_branch")))	\
+			__aligned(4)					\
+			__section("_ftrace_branch")			\
 			______f = {					\
 				.func = __func__,			\
 				.file = __FILE__,			\
@@ -146,7 +146,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 	extern typeof(sym) sym;					\
 	static const unsigned long __kentry_##sym		\
 	__used							\
-	__attribute__((__section__("___kentry" "+" #sym )))	\
+	__section("___kentry" "+" #sym )			\
 	= (unsigned long)&sym;
 #endif
 
@@ -287,7 +287,7 @@ unsigned long read_word_at_a_time(const void *addr)
  * visible to the compiler.
  */
 #define __ADDRESSABLE(sym) \
-	static void * __attribute__((__section__(".discard.addressable"), used)) \
+	static void * __section(".discard.addressable") __used \
 		__PASTE(__addressable_##sym, __LINE__) = (void *)&sym;
 
 /**
-- 
cgit v1.2.3


From 92676236917d8e2e0de1d8612686d3d04a6a245b Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Wed, 19 Sep 2018 01:06:24 +0200
Subject: Compiler Attributes: add support for __nonstring (gcc >= 8)

From the GCC manual:

  nonstring

    The nonstring variable attribute specifies that an object or member
    declaration with type array of char, signed char, or unsigned char,
    or pointer to such a type is intended to store character arrays that
    do not necessarily contain a terminating NUL. This is useful in detecting
    uses of such arrays or pointers with functions that expect NUL-terminated
    strings, and to avoid warnings when such an array or pointer is used as
    an argument to a bounded string manipulation function such as strncpy.

  https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html

This attribute can be used for documentation purposes (i.e. replacing
comments), but it is most helpful when the following warnings are enabled:

  -Wstringop-overflow

    Warn for calls to string manipulation functions such as memcpy and
    strcpy that are determined to overflow the destination buffer.

    [...]

  -Wstringop-truncation

    Warn for calls to bounded string manipulation functions such as
    strncat, strncpy, and stpncpy that may either truncate the copied
    string or leave the destination unchanged.

    [...]

    In situations where a character array is intended to store a sequence
    of bytes with no terminating NUL such an array may be annotated with
    attribute nonstring to avoid this warning. Such arrays, however,
    are not suitable arguments to functions that expect NUL-terminated
    strings. To help detect accidental misuses of such arrays GCC issues
    warnings unless it can prove that the use is safe.

  https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html

Tested-by: Sedat Dilek <sedat.dilek@gmail.com> # on top of v4.19-rc5, clang 7
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_attributes.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index f0f9fc398440..6b28c1b7310c 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -34,6 +34,7 @@
 # define __GCC4_has_attribute___externally_visible__  1
 # define __GCC4_has_attribute___noclone__             1
 # define __GCC4_has_attribute___optimize__            1
+# define __GCC4_has_attribute___nonstring__           0
 # define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
 #endif
 
@@ -181,6 +182,19 @@
  */
 #define   noinline                      __attribute__((__noinline__))
 
+/*
+ * Optional: only supported since gcc >= 8
+ * Optional: not supported by clang
+ * Optional: not supported by icc
+ *
+ *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Variable-Attributes.html#index-nonstring-variable-attribute
+ */
+#if __has_attribute(__nonstring__)
+# define __nonstring                    __attribute__((__nonstring__))
+#else
+# define __nonstring
+#endif
+
 /*
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noreturn-function-attribute
  * clang: https://clang.llvm.org/docs/AttributeReference.html#noreturn
-- 
cgit v1.2.3


From 9dc6edcf676fe188430e8b119f91280bbf285163 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 22 Aug 2018 14:24:16 -0400
Subject: SUNRPC: Clean up initialisation of the struct rpc_rqst

Move the initialisation back into xprt.c.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  1 -
 net/sunrpc/clnt.c           |  1 -
 net/sunrpc/xprt.c           | 91 +++++++++++++++++++++++++--------------------
 3 files changed, 51 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 336fd1a19cca..3d80524e92d6 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -325,7 +325,6 @@ struct xprt_class {
 struct rpc_xprt		*xprt_create_transport(struct xprt_create *args);
 void			xprt_connect(struct rpc_task *task);
 void			xprt_reserve(struct rpc_task *task);
-void			xprt_request_init(struct rpc_task *task);
 void			xprt_retry_reserve(struct rpc_task *task);
 int			xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
 int			xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8ea2f5fadd96..bc9d020bf71f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1558,7 +1558,6 @@ call_reserveresult(struct rpc_task *task)
 	task->tk_status = 0;
 	if (status >= 0) {
 		if (task->tk_rqstp) {
-			xprt_request_init(task);
 			task->tk_action = call_refresh;
 			return;
 		}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a8db2e3f8904..6aa09edc9567 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1250,6 +1250,55 @@ void xprt_free(struct rpc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(xprt_free);
 
+static __be32
+xprt_alloc_xid(struct rpc_xprt *xprt)
+{
+	__be32 xid;
+
+	spin_lock(&xprt->reserve_lock);
+	xid = (__force __be32)xprt->xid++;
+	spin_unlock(&xprt->reserve_lock);
+	return xid;
+}
+
+static void
+xprt_init_xid(struct rpc_xprt *xprt)
+{
+	xprt->xid = prandom_u32();
+}
+
+static void
+xprt_request_init(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+	struct rpc_rqst	*req = task->tk_rqstp;
+
+	INIT_LIST_HEAD(&req->rq_list);
+	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+	req->rq_task	= task;
+	req->rq_xprt    = xprt;
+	req->rq_buffer  = NULL;
+	req->rq_xid	= xprt_alloc_xid(xprt);
+	req->rq_connect_cookie = xprt->connect_cookie - 1;
+	req->rq_bytes_sent = 0;
+	req->rq_snd_buf.len = 0;
+	req->rq_snd_buf.buflen = 0;
+	req->rq_rcv_buf.len = 0;
+	req->rq_rcv_buf.buflen = 0;
+	req->rq_release_snd_buf = NULL;
+	xprt_reset_majortimeo(req);
+	dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
+			req, ntohl(req->rq_xid));
+}
+
+static void
+xprt_do_reserve(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+	xprt->ops->alloc_slot(xprt, task);
+	if (task->tk_rqstp != NULL)
+		xprt_request_init(task);
+}
+
 /**
  * xprt_reserve - allocate an RPC request slot
  * @task: RPC task requesting a slot allocation
@@ -1269,7 +1318,7 @@ void xprt_reserve(struct rpc_task *task)
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
 	if (!xprt_throttle_congested(xprt, task))
-		xprt->ops->alloc_slot(xprt, task);
+		xprt_do_reserve(xprt, task);
 }
 
 /**
@@ -1291,45 +1340,7 @@ void xprt_retry_reserve(struct rpc_task *task)
 
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
-	xprt->ops->alloc_slot(xprt, task);
-}
-
-static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
-{
-	__be32 xid;
-
-	spin_lock(&xprt->reserve_lock);
-	xid = (__force __be32)xprt->xid++;
-	spin_unlock(&xprt->reserve_lock);
-	return xid;
-}
-
-static inline void xprt_init_xid(struct rpc_xprt *xprt)
-{
-	xprt->xid = prandom_u32();
-}
-
-void xprt_request_init(struct rpc_task *task)
-{
-	struct rpc_xprt *xprt = task->tk_xprt;
-	struct rpc_rqst	*req = task->tk_rqstp;
-
-	INIT_LIST_HEAD(&req->rq_list);
-	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
-	req->rq_task	= task;
-	req->rq_xprt    = xprt;
-	req->rq_buffer  = NULL;
-	req->rq_xid	= xprt_alloc_xid(xprt);
-	req->rq_connect_cookie = xprt->connect_cookie - 1;
-	req->rq_bytes_sent = 0;
-	req->rq_snd_buf.len = 0;
-	req->rq_snd_buf.buflen = 0;
-	req->rq_rcv_buf.len = 0;
-	req->rq_rcv_buf.buflen = 0;
-	req->rq_release_snd_buf = NULL;
-	xprt_reset_majortimeo(req);
-	dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
-			req, ntohl(req->rq_xid));
+	xprt_do_reserve(xprt, task);
 }
 
 /**
-- 
cgit v1.2.3


From 3021a5bbbf0aa0252f2993b84ee903a0eca0b690 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 14 Aug 2018 13:50:21 -0400
Subject: SUNRPC: The transmitted message must lie in the RPCSEC window of
 validity

If a message has been encoded using RPCSEC_GSS, the server is
maintaining a window of sequence numbers that it considers valid.
The client should normally be tracking that window, and needs to
verify that the sequence number used by the message being transmitted
still lies inside the window of validity.

So far, we've been able to assume this condition would be realised
automatically, since the client has been encoding the message only
after taking the socket lock. Once we change that condition, we
will need the explicit check.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/auth.h     |  2 ++
 include/linux/sunrpc/auth_gss.h |  1 +
 net/sunrpc/auth.c               | 10 ++++++++++
 net/sunrpc/auth_gss/auth_gss.c  | 41 +++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/clnt.c               |  3 +++
 net/sunrpc/xprt.c               |  7 +++++++
 6 files changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 58a6765c1c5e..2c97a3933ef9 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -157,6 +157,7 @@ struct rpc_credops {
 	int			(*crkey_timeout)(struct rpc_cred *);
 	bool			(*crkey_to_expire)(struct rpc_cred *);
 	char *			(*crstringify_acceptor)(struct rpc_cred *);
+	bool			(*crneed_reencode)(struct rpc_task *);
 };
 
 extern const struct rpc_authops	authunix_ops;
@@ -192,6 +193,7 @@ __be32 *		rpcauth_marshcred(struct rpc_task *, __be32 *);
 __be32 *		rpcauth_checkverf(struct rpc_task *, __be32 *);
 int			rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
 int			rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
+bool			rpcauth_xmit_need_reencode(struct rpc_task *task);
 int			rpcauth_refreshcred(struct rpc_task *);
 void			rpcauth_invalcred(struct rpc_task *);
 int			rpcauth_uptodatecred(struct rpc_task *);
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 0c9eac351aab..30427b729070 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -70,6 +70,7 @@ struct gss_cl_ctx {
 	refcount_t		count;
 	enum rpc_gss_proc	gc_proc;
 	u32			gc_seq;
+	u32			gc_seq_xmit;
 	spinlock_t		gc_seq_lock;
 	struct gss_ctx		*gc_gss_ctx;
 	struct xdr_netobj	gc_wire_ctx;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 305ecea92170..59df5cdba0ac 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -817,6 +817,16 @@ rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
 	return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
 }
 
+bool
+rpcauth_xmit_need_reencode(struct rpc_task *task)
+{
+	struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+	if (!cred || !cred->cr_ops->crneed_reencode)
+		return false;
+	return cred->cr_ops->crneed_reencode(task);
+}
+
 int
 rpcauth_refreshcred(struct rpc_task *task)
 {
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 21c0aa0a0d1d..c898a7c75e84 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1984,6 +1984,46 @@ gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
 	return decode(rqstp, &xdr, obj);
 }
 
+static bool
+gss_seq_is_newer(u32 new, u32 old)
+{
+	return (s32)(new - old) > 0;
+}
+
+static bool
+gss_xmit_need_reencode(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_cred *cred = req->rq_cred;
+	struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+	u32 win, seq_xmit;
+	bool ret = true;
+
+	if (!ctx)
+		return true;
+
+	if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
+		goto out;
+
+	seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
+	while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
+		u32 tmp = seq_xmit;
+
+		seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
+		if (seq_xmit == tmp) {
+			ret = false;
+			goto out;
+		}
+	}
+
+	win = ctx->gc_win;
+	if (win > 0)
+		ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
+out:
+	gss_put_ctx(ctx);
+	return ret;
+}
+
 static int
 gss_unwrap_resp(struct rpc_task *task,
 		kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
@@ -2052,6 +2092,7 @@ static const struct rpc_credops gss_credops = {
 	.crunwrap_resp		= gss_unwrap_resp,
 	.crkey_timeout		= gss_key_timeout,
 	.crstringify_acceptor	= gss_stringify_acceptor,
+	.crneed_reencode	= gss_xmit_need_reencode,
 };
 
 static const struct rpc_credops gss_nullops = {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4f1ec8013332..d41b5ac1d4e8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2184,6 +2184,9 @@ call_status(struct rpc_task *task)
 		/* shutdown or soft timeout */
 		rpc_exit(task, status);
 		break;
+	case -EBADMSG:
+		task->tk_action = call_transmit;
+		break;
 	default:
 		if (clnt->cl_chatty)
 			printk("%s: RPC call returned error %d\n",
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6aa09edc9567..3973e10ea2bd 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1014,6 +1014,13 @@ void xprt_transmit(struct rpc_task *task)
 	dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
 
 	if (!req->rq_reply_bytes_recvd) {
+
+		/* Verify that our message lies in the RPCSEC_GSS window */
+		if (!req->rq_bytes_sent && rpcauth_xmit_need_reencode(task)) {
+			task->tk_status = -EBADMSG;
+			return;
+		}
+
 		if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
 			/*
 			 * Add to the list only if we're expecting a reply
-- 
cgit v1.2.3


From 7ebbbc6e7bd023903daa5bd95726edf2d60b559c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 28 Aug 2018 09:00:27 -0400
Subject: SUNRPC: Simplify identification of when the message send/receive is
 complete

Add states to indicate that the message send and receive are not yet
complete.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |  6 ++++--
 net/sunrpc/clnt.c            | 19 +++++++------------
 net/sunrpc/xprt.c            | 17 ++++++++++++++---
 3 files changed, 25 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 592653becd91..9e655df70131 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -140,8 +140,10 @@ struct rpc_task_setup {
 #define RPC_TASK_RUNNING	0
 #define RPC_TASK_QUEUED		1
 #define RPC_TASK_ACTIVE		2
-#define RPC_TASK_MSG_RECV	3
-#define RPC_TASK_MSG_RECV_WAIT	4
+#define RPC_TASK_NEED_XMIT	3
+#define RPC_TASK_NEED_RECV	4
+#define RPC_TASK_MSG_RECV	5
+#define RPC_TASK_MSG_RECV_WAIT	6
 
 #define RPC_IS_RUNNING(t)	test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
 #define rpc_set_running(t)	set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d41b5ac1d4e8..e5ac35e803ad 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1156,6 +1156,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	 */
 	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
 			xbufp->tail[0].iov_len;
+	set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 
 	task->tk_action = call_bc_transmit;
 	atomic_inc(&task->tk_count);
@@ -1720,17 +1721,10 @@ call_allocate(struct rpc_task *task)
 	rpc_exit(task, -ERESTARTSYS);
 }
 
-static inline int
+static int
 rpc_task_need_encode(struct rpc_task *task)
 {
-	return task->tk_rqstp->rq_snd_buf.len == 0;
-}
-
-static inline void
-rpc_task_force_reencode(struct rpc_task *task)
-{
-	task->tk_rqstp->rq_snd_buf.len = 0;
-	task->tk_rqstp->rq_bytes_sent = 0;
+	return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0;
 }
 
 /*
@@ -1765,6 +1759,8 @@ rpc_xdr_encode(struct rpc_task *task)
 
 	task->tk_status = rpcauth_wrap_req(task, encode, req, p,
 			task->tk_msg.rpc_argp);
+	if (task->tk_status == 0)
+		set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 }
 
 /*
@@ -1999,7 +1995,6 @@ call_transmit_status(struct rpc_task *task)
 	 */
 	if (task->tk_status == 0) {
 		xprt_end_transmit(task);
-		rpc_task_force_reencode(task);
 		return;
 	}
 
@@ -2010,7 +2005,6 @@ call_transmit_status(struct rpc_task *task)
 	default:
 		dprint_status(task);
 		xprt_end_transmit(task);
-		rpc_task_force_reencode(task);
 		break;
 		/*
 		 * Special cases: if we've been waiting on the
@@ -2038,7 +2032,7 @@ call_transmit_status(struct rpc_task *task)
 	case -EADDRINUSE:
 	case -ENOTCONN:
 	case -EPIPE:
-		rpc_task_force_reencode(task);
+		break;
 	}
 }
 
@@ -2185,6 +2179,7 @@ call_status(struct rpc_task *task)
 		rpc_exit(task, status);
 		break;
 	case -EBADMSG:
+		clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 		task->tk_action = call_transmit;
 		break;
 	default:
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3973e10ea2bd..45d580cd93ac 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -936,10 +936,18 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 	/* req->rq_reply_bytes_recvd */
 	smp_wmb();
 	req->rq_reply_bytes_recvd = copied;
+	clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
 	rpc_wake_up_queued_task(&xprt->pending, task);
 }
 EXPORT_SYMBOL_GPL(xprt_complete_rqst);
 
+static bool
+xprt_request_data_received(struct rpc_task *task)
+{
+	return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+		task->tk_rqstp->rq_reply_bytes_recvd != 0;
+}
+
 static void xprt_timer(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
@@ -1031,12 +1039,13 @@ void xprt_transmit(struct rpc_task *task)
 			/* Add request to the receive list */
 			spin_lock(&xprt->recv_lock);
 			list_add_tail(&req->rq_list, &xprt->recv);
+			set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
 			spin_unlock(&xprt->recv_lock);
 			xprt_reset_majortimeo(req);
 			/* Turn off autodisconnect */
 			del_singleshot_timer_sync(&xprt->timer);
 		}
-	} else if (!req->rq_bytes_sent)
+	} else if (xprt_request_data_received(task) && !req->rq_bytes_sent)
 		return;
 
 	connect_cookie = xprt->connect_cookie;
@@ -1046,9 +1055,11 @@ void xprt_transmit(struct rpc_task *task)
 		task->tk_status = status;
 		return;
 	}
+
 	xprt_inject_disconnect(xprt);
 
 	dprintk("RPC: %5u xmit complete\n", task->tk_pid);
+	clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 	task->tk_flags |= RPC_TASK_SENT;
 	spin_lock_bh(&xprt->transport_lock);
 
@@ -1062,14 +1073,14 @@ void xprt_transmit(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 
 	req->rq_connect_cookie = connect_cookie;
-	if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
+	if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
 		/*
 		 * Sleep on the pending queue if we're expecting a reply.
 		 * The spinlock ensures atomicity between the test of
 		 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
 		 */
 		spin_lock(&xprt->recv_lock);
-		if (!req->rq_reply_bytes_recvd) {
+		if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
 			rpc_sleep_on(&xprt->pending, task, xprt_timer);
 			/*
 			 * Send an extra queue wakeup call if the
-- 
cgit v1.2.3


From d1109aa56c71e19fc117e75bff11384fc7279a3b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 13 Aug 2018 15:48:42 -0400
Subject: SUNRPC: Rename TCP receive-specific state variables

Since we will want to introduce similar TCP state variables for the
transmission of requests, let's rename the existing ones to label
that they are for the receive side.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprtsock.h |  16 ++--
 include/trace/events/sunrpc.h   |  10 +--
 net/sunrpc/xprtsock.c           | 178 ++++++++++++++++++++--------------------
 3 files changed, 103 insertions(+), 101 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index ae0f99b9b965..90d5ca8e65f4 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -30,15 +30,17 @@ struct sock_xprt {
 	/*
 	 * State of TCP reply receive
 	 */
-	__be32			tcp_fraghdr,
-				tcp_xid,
-				tcp_calldir;
+	struct {
+		__be32		fraghdr,
+				xid,
+				calldir;
 
-	u32			tcp_offset,
-				tcp_reclen;
+		u32		offset,
+				len;
 
-	unsigned long		tcp_copied,
-				tcp_flags;
+		unsigned long	copied,
+				flags;
+	} recv;
 
 	/*
 	 * Connection of transports
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index bbb08a3ef5cc..0aa347194e0f 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -525,11 +525,11 @@ TRACE_EVENT(xs_tcp_data_recv,
 	TP_fast_assign(
 		__assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]);
 		__assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]);
-		__entry->xid = be32_to_cpu(xs->tcp_xid);
-		__entry->flags = xs->tcp_flags;
-		__entry->copied = xs->tcp_copied;
-		__entry->reclen = xs->tcp_reclen;
-		__entry->offset = xs->tcp_offset;
+		__entry->xid = be32_to_cpu(xs->recv.xid);
+		__entry->flags = xs->recv.flags;
+		__entry->copied = xs->recv.copied;
+		__entry->reclen = xs->recv.len;
+		__entry->offset = xs->recv.offset;
 	),
 
 	TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu",
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6b7539c0466e..cd7d093721ae 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1169,42 +1169,42 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
 	size_t len, used;
 	char *p;
 
-	p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
-	len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
+	p = ((char *) &transport->recv.fraghdr) + transport->recv.offset;
+	len = sizeof(transport->recv.fraghdr) - transport->recv.offset;
 	used = xdr_skb_read_bits(desc, p, len);
-	transport->tcp_offset += used;
+	transport->recv.offset += used;
 	if (used != len)
 		return;
 
-	transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
-	if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
-		transport->tcp_flags |= TCP_RCV_LAST_FRAG;
+	transport->recv.len = ntohl(transport->recv.fraghdr);
+	if (transport->recv.len & RPC_LAST_STREAM_FRAGMENT)
+		transport->recv.flags |= TCP_RCV_LAST_FRAG;
 	else
-		transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
-	transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
+		transport->recv.flags &= ~TCP_RCV_LAST_FRAG;
+	transport->recv.len &= RPC_FRAGMENT_SIZE_MASK;
 
-	transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
-	transport->tcp_offset = 0;
+	transport->recv.flags &= ~TCP_RCV_COPY_FRAGHDR;
+	transport->recv.offset = 0;
 
 	/* Sanity check of the record length */
-	if (unlikely(transport->tcp_reclen < 8)) {
+	if (unlikely(transport->recv.len < 8)) {
 		dprintk("RPC:       invalid TCP record fragment length\n");
 		xs_tcp_force_close(xprt);
 		return;
 	}
 	dprintk("RPC:       reading TCP record fragment of length %d\n",
-			transport->tcp_reclen);
+			transport->recv.len);
 }
 
 static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
 {
-	if (transport->tcp_offset == transport->tcp_reclen) {
-		transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
-		transport->tcp_offset = 0;
-		if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
-			transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-			transport->tcp_flags |= TCP_RCV_COPY_XID;
-			transport->tcp_copied = 0;
+	if (transport->recv.offset == transport->recv.len) {
+		transport->recv.flags |= TCP_RCV_COPY_FRAGHDR;
+		transport->recv.offset = 0;
+		if (transport->recv.flags & TCP_RCV_LAST_FRAG) {
+			transport->recv.flags &= ~TCP_RCV_COPY_DATA;
+			transport->recv.flags |= TCP_RCV_COPY_XID;
+			transport->recv.copied = 0;
 		}
 	}
 }
@@ -1214,20 +1214,20 @@ static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_r
 	size_t len, used;
 	char *p;
 
-	len = sizeof(transport->tcp_xid) - transport->tcp_offset;
+	len = sizeof(transport->recv.xid) - transport->recv.offset;
 	dprintk("RPC:       reading XID (%zu bytes)\n", len);
-	p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
+	p = ((char *) &transport->recv.xid) + transport->recv.offset;
 	used = xdr_skb_read_bits(desc, p, len);
-	transport->tcp_offset += used;
+	transport->recv.offset += used;
 	if (used != len)
 		return;
-	transport->tcp_flags &= ~TCP_RCV_COPY_XID;
-	transport->tcp_flags |= TCP_RCV_READ_CALLDIR;
-	transport->tcp_copied = 4;
+	transport->recv.flags &= ~TCP_RCV_COPY_XID;
+	transport->recv.flags |= TCP_RCV_READ_CALLDIR;
+	transport->recv.copied = 4;
 	dprintk("RPC:       reading %s XID %08x\n",
-			(transport->tcp_flags & TCP_RPC_REPLY) ? "reply for"
+			(transport->recv.flags & TCP_RPC_REPLY) ? "reply for"
 							      : "request with",
-			ntohl(transport->tcp_xid));
+			ntohl(transport->recv.xid));
 	xs_tcp_check_fraghdr(transport);
 }
 
@@ -1239,34 +1239,34 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
 	char *p;
 
 	/*
-	 * We want transport->tcp_offset to be 8 at the end of this routine
+	 * We want transport->recv.offset to be 8 at the end of this routine
 	 * (4 bytes for the xid and 4 bytes for the call/reply flag).
 	 * When this function is called for the first time,
-	 * transport->tcp_offset is 4 (after having already read the xid).
+	 * transport->recv.offset is 4 (after having already read the xid).
 	 */
-	offset = transport->tcp_offset - sizeof(transport->tcp_xid);
-	len = sizeof(transport->tcp_calldir) - offset;
+	offset = transport->recv.offset - sizeof(transport->recv.xid);
+	len = sizeof(transport->recv.calldir) - offset;
 	dprintk("RPC:       reading CALL/REPLY flag (%zu bytes)\n", len);
-	p = ((char *) &transport->tcp_calldir) + offset;
+	p = ((char *) &transport->recv.calldir) + offset;
 	used = xdr_skb_read_bits(desc, p, len);
-	transport->tcp_offset += used;
+	transport->recv.offset += used;
 	if (used != len)
 		return;
-	transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
+	transport->recv.flags &= ~TCP_RCV_READ_CALLDIR;
 	/*
 	 * We don't yet have the XDR buffer, so we will write the calldir
 	 * out after we get the buffer from the 'struct rpc_rqst'
 	 */
-	switch (ntohl(transport->tcp_calldir)) {
+	switch (ntohl(transport->recv.calldir)) {
 	case RPC_REPLY:
-		transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
-		transport->tcp_flags |= TCP_RCV_COPY_DATA;
-		transport->tcp_flags |= TCP_RPC_REPLY;
+		transport->recv.flags |= TCP_RCV_COPY_CALLDIR;
+		transport->recv.flags |= TCP_RCV_COPY_DATA;
+		transport->recv.flags |= TCP_RPC_REPLY;
 		break;
 	case RPC_CALL:
-		transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
-		transport->tcp_flags |= TCP_RCV_COPY_DATA;
-		transport->tcp_flags &= ~TCP_RPC_REPLY;
+		transport->recv.flags |= TCP_RCV_COPY_CALLDIR;
+		transport->recv.flags |= TCP_RCV_COPY_DATA;
+		transport->recv.flags &= ~TCP_RPC_REPLY;
 		break;
 	default:
 		dprintk("RPC:       invalid request message type\n");
@@ -1287,21 +1287,21 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
 
 	rcvbuf = &req->rq_private_buf;
 
-	if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) {
+	if (transport->recv.flags & TCP_RCV_COPY_CALLDIR) {
 		/*
 		 * Save the RPC direction in the XDR buffer
 		 */
-		memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
-			&transport->tcp_calldir,
-			sizeof(transport->tcp_calldir));
-		transport->tcp_copied += sizeof(transport->tcp_calldir);
-		transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
+		memcpy(rcvbuf->head[0].iov_base + transport->recv.copied,
+			&transport->recv.calldir,
+			sizeof(transport->recv.calldir));
+		transport->recv.copied += sizeof(transport->recv.calldir);
+		transport->recv.flags &= ~TCP_RCV_COPY_CALLDIR;
 	}
 
 	len = desc->count;
-	if (len > transport->tcp_reclen - transport->tcp_offset)
-		desc->count = transport->tcp_reclen - transport->tcp_offset;
-	r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
+	if (len > transport->recv.len - transport->recv.offset)
+		desc->count = transport->recv.len - transport->recv.offset;
+	r = xdr_partial_copy_from_skb(rcvbuf, transport->recv.copied,
 					  desc, xdr_skb_read_bits);
 
 	if (desc->count) {
@@ -1314,31 +1314,31 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
 		 * Any remaining data from this record will
 		 * be discarded.
 		 */
-		transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
+		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
 		dprintk("RPC:       XID %08x truncated request\n",
-				ntohl(transport->tcp_xid));
-		dprintk("RPC:       xprt = %p, tcp_copied = %lu, "
-				"tcp_offset = %u, tcp_reclen = %u\n",
-				xprt, transport->tcp_copied,
-				transport->tcp_offset, transport->tcp_reclen);
+				ntohl(transport->recv.xid));
+		dprintk("RPC:       xprt = %p, recv.copied = %lu, "
+				"recv.offset = %u, recv.len = %u\n",
+				xprt, transport->recv.copied,
+				transport->recv.offset, transport->recv.len);
 		return;
 	}
 
-	transport->tcp_copied += r;
-	transport->tcp_offset += r;
+	transport->recv.copied += r;
+	transport->recv.offset += r;
 	desc->count = len - r;
 
 	dprintk("RPC:       XID %08x read %zd bytes\n",
-			ntohl(transport->tcp_xid), r);
-	dprintk("RPC:       xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
-			"tcp_reclen = %u\n", xprt, transport->tcp_copied,
-			transport->tcp_offset, transport->tcp_reclen);
-
-	if (transport->tcp_copied == req->rq_private_buf.buflen)
-		transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
-	else if (transport->tcp_offset == transport->tcp_reclen) {
-		if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
-			transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
+			ntohl(transport->recv.xid), r);
+	dprintk("RPC:       xprt = %p, recv.copied = %lu, recv.offset = %u, "
+			"recv.len = %u\n", xprt, transport->recv.copied,
+			transport->recv.offset, transport->recv.len);
+
+	if (transport->recv.copied == req->rq_private_buf.buflen)
+		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
+	else if (transport->recv.offset == transport->recv.len) {
+		if (transport->recv.flags & TCP_RCV_LAST_FRAG)
+			transport->recv.flags &= ~TCP_RCV_COPY_DATA;
 	}
 }
 
@@ -1353,14 +1353,14 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 				container_of(xprt, struct sock_xprt, xprt);
 	struct rpc_rqst *req;
 
-	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->tcp_xid));
+	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->recv.xid));
 
 	/* Find and lock the request corresponding to this xid */
 	spin_lock(&xprt->recv_lock);
-	req = xprt_lookup_rqst(xprt, transport->tcp_xid);
+	req = xprt_lookup_rqst(xprt, transport->recv.xid);
 	if (!req) {
 		dprintk("RPC:       XID %08x request not found!\n",
-				ntohl(transport->tcp_xid));
+				ntohl(transport->recv.xid));
 		spin_unlock(&xprt->recv_lock);
 		return -1;
 	}
@@ -1370,8 +1370,8 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	xs_tcp_read_common(xprt, desc, req);
 
 	spin_lock(&xprt->recv_lock);
-	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
-		xprt_complete_rqst(req->rq_task, transport->tcp_copied);
+	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
+		xprt_complete_rqst(req->rq_task, transport->recv.copied);
 	xprt_unpin_rqst(req);
 	spin_unlock(&xprt->recv_lock);
 	return 0;
@@ -1393,7 +1393,7 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 	struct rpc_rqst *req;
 
 	/* Look up the request corresponding to the given XID */
-	req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
+	req = xprt_lookup_bc_request(xprt, transport->recv.xid);
 	if (req == NULL) {
 		printk(KERN_WARNING "Callback slot table overflowed\n");
 		xprt_force_disconnect(xprt);
@@ -1403,8 +1403,8 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
 	dprintk("RPC:       read callback  XID %08x\n", ntohl(req->rq_xid));
 	xs_tcp_read_common(xprt, desc, req);
 
-	if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
-		xprt_complete_bc_request(req, transport->tcp_copied);
+	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
+		xprt_complete_bc_request(req, transport->recv.copied);
 
 	return 0;
 }
@@ -1415,7 +1415,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
 	struct sock_xprt *transport =
 				container_of(xprt, struct sock_xprt, xprt);
 
-	return (transport->tcp_flags & TCP_RPC_REPLY) ?
+	return (transport->recv.flags & TCP_RPC_REPLY) ?
 		xs_tcp_read_reply(xprt, desc) :
 		xs_tcp_read_callback(xprt, desc);
 }
@@ -1458,9 +1458,9 @@ static void xs_tcp_read_data(struct rpc_xprt *xprt,
 	else {
 		/*
 		 * The transport_lock protects the request handling.
-		 * There's no need to hold it to update the tcp_flags.
+		 * There's no need to hold it to update the recv.flags.
 		 */
-		transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
+		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
 	}
 }
 
@@ -1468,12 +1468,12 @@ static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_s
 {
 	size_t len;
 
-	len = transport->tcp_reclen - transport->tcp_offset;
+	len = transport->recv.len - transport->recv.offset;
 	if (len > desc->count)
 		len = desc->count;
 	desc->count -= len;
 	desc->offset += len;
-	transport->tcp_offset += len;
+	transport->recv.offset += len;
 	dprintk("RPC:       discarded %zu bytes\n", len);
 	xs_tcp_check_fraghdr(transport);
 }
@@ -1494,22 +1494,22 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
 		trace_xs_tcp_data_recv(transport);
 		/* Read in a new fragment marker if necessary */
 		/* Can we ever really expect to get completely empty fragments? */
-		if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
+		if (transport->recv.flags & TCP_RCV_COPY_FRAGHDR) {
 			xs_tcp_read_fraghdr(xprt, &desc);
 			continue;
 		}
 		/* Read in the xid if necessary */
-		if (transport->tcp_flags & TCP_RCV_COPY_XID) {
+		if (transport->recv.flags & TCP_RCV_COPY_XID) {
 			xs_tcp_read_xid(transport, &desc);
 			continue;
 		}
 		/* Read in the call/reply flag */
-		if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) {
+		if (transport->recv.flags & TCP_RCV_READ_CALLDIR) {
 			xs_tcp_read_calldir(transport, &desc);
 			continue;
 		}
 		/* Read in the request data */
-		if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
+		if (transport->recv.flags & TCP_RCV_COPY_DATA) {
 			xs_tcp_read_data(xprt, &desc);
 			continue;
 		}
@@ -1602,10 +1602,10 @@ static void xs_tcp_state_change(struct sock *sk)
 		if (!xprt_test_and_set_connected(xprt)) {
 
 			/* Reset TCP record info */
-			transport->tcp_offset = 0;
-			transport->tcp_reclen = 0;
-			transport->tcp_copied = 0;
-			transport->tcp_flags =
+			transport->recv.offset = 0;
+			transport->recv.len = 0;
+			transport->recv.copied = 0;
+			transport->recv.flags =
 				TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
 			xprt->connect_cookie++;
 			clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
-- 
cgit v1.2.3


From 6c7a64e5a44dbc6d073b83a56a48d0a4099f1dd2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 13 Aug 2018 16:54:57 -0400
Subject: SUNRPC: Add socket transmit queue offset tracking

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprtsock.h |  7 +++++++
 net/sunrpc/xprtsock.c           | 40 ++++++++++++++++++++++------------------
 2 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 90d5ca8e65f4..005cfb6e7238 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -42,6 +42,13 @@ struct sock_xprt {
 				flags;
 	} recv;
 
+	/*
+	 * State of TCP transmit queue
+	 */
+	struct {
+		u32		offset;
+	} xmit;
+
 	/*
 	 * Connection of transports
 	 */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ec1e3f93e707..629cc45e1e6c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -461,7 +461,7 @@ static int xs_nospace(struct rpc_task *task)
 	int ret = -EAGAIN;
 
 	dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
-			task->tk_pid, req->rq_slen - req->rq_bytes_sent,
+			task->tk_pid, req->rq_slen - transport->xmit.offset,
 			req->rq_slen);
 
 	/* Protect against races with write_space */
@@ -528,19 +528,22 @@ static int xs_local_send_request(struct rpc_task *task)
 			req->rq_svec->iov_base, req->rq_svec->iov_len);
 
 	req->rq_xtime = ktime_get();
-	status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
+	status = xs_sendpages(transport->sock, NULL, 0, xdr,
+			      transport->xmit.offset,
 			      true, &sent);
 	dprintk("RPC:       %s(%u) = %d\n",
-			__func__, xdr->len - req->rq_bytes_sent, status);
+			__func__, xdr->len - transport->xmit.offset, status);
 
 	if (status == -EAGAIN && sock_writeable(transport->inet))
 		status = -ENOBUFS;
 
 	if (likely(sent > 0) || status == 0) {
-		req->rq_bytes_sent += sent;
-		req->rq_xmit_bytes_sent += sent;
+		transport->xmit.offset += sent;
+		req->rq_bytes_sent = transport->xmit.offset;
 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+			req->rq_xmit_bytes_sent += transport->xmit.offset;
 			req->rq_bytes_sent = 0;
+			transport->xmit.offset = 0;
 			return 0;
 		}
 		status = -EAGAIN;
@@ -592,10 +595,10 @@ static int xs_udp_send_request(struct rpc_task *task)
 		return -ENOTCONN;
 	req->rq_xtime = ktime_get();
 	status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
-			      xdr, req->rq_bytes_sent, true, &sent);
+			      xdr, 0, true, &sent);
 
 	dprintk("RPC:       xs_udp_send_request(%u) = %d\n",
-			xdr->len - req->rq_bytes_sent, status);
+			xdr->len, status);
 
 	/* firewall is blocking us, don't return -EAGAIN or we end up looping */
 	if (status == -EPERM)
@@ -684,17 +687,20 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	while (1) {
 		sent = 0;
 		status = xs_sendpages(transport->sock, NULL, 0, xdr,
-				      req->rq_bytes_sent, zerocopy, &sent);
+				      transport->xmit.offset,
+				      zerocopy, &sent);
 
 		dprintk("RPC:       xs_tcp_send_request(%u) = %d\n",
-				xdr->len - req->rq_bytes_sent, status);
+				xdr->len - transport->xmit.offset, status);
 
 		/* If we've sent the entire packet, immediately
 		 * reset the count of bytes sent. */
-		req->rq_bytes_sent += sent;
-		req->rq_xmit_bytes_sent += sent;
+		transport->xmit.offset += sent;
+		req->rq_bytes_sent = transport->xmit.offset;
 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+			req->rq_xmit_bytes_sent += transport->xmit.offset;
 			req->rq_bytes_sent = 0;
+			transport->xmit.offset = 0;
 			return 0;
 		}
 
@@ -760,18 +766,13 @@ static int xs_tcp_send_request(struct rpc_task *task)
  */
 static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
-	struct rpc_rqst *req;
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
 	if (task != xprt->snd_task)
 		return;
 	if (task == NULL)
 		goto out_release;
-	req = task->tk_rqstp;
-	if (req == NULL)
-		goto out_release;
-	if (req->rq_bytes_sent == 0)
-		goto out_release;
-	if (req->rq_bytes_sent == req->rq_snd_buf.len)
+	if (transport->xmit.offset == 0 || !xprt_connected(xprt))
 		goto out_release;
 	set_bit(XPRT_CLOSE_WAIT, &xprt->state);
 out_release:
@@ -2021,6 +2022,8 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
 		write_unlock_bh(&sk->sk_callback_lock);
 	}
 
+	transport->xmit.offset = 0;
+
 	/* Tell the socket layer to start connecting... */
 	xprt->stat.connect_count++;
 	xprt->stat.connect_start = jiffies;
@@ -2384,6 +2387,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 	transport->recv.len = 0;
 	transport->recv.copied = 0;
 	transport->recv.flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
+	transport->xmit.offset = 0;
 
 	/* Tell the socket layer to start connecting... */
 	xprt->stat.connect_count++;
-- 
cgit v1.2.3


From cf9946cd6144410ced00d52586ff5a2cb4868fc5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 6 Aug 2018 12:55:34 -0400
Subject: SUNRPC: Refactor the transport request pinning

We are going to need to pin for both send and receive.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |  3 +--
 include/linux/sunrpc/xprt.h  |  1 +
 net/sunrpc/xprt.c            | 43 +++++++++++++++++++++++--------------------
 3 files changed, 25 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 9e655df70131..8062ce6b18e5 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -142,8 +142,7 @@ struct rpc_task_setup {
 #define RPC_TASK_ACTIVE		2
 #define RPC_TASK_NEED_XMIT	3
 #define RPC_TASK_NEED_RECV	4
-#define RPC_TASK_MSG_RECV	5
-#define RPC_TASK_MSG_RECV_WAIT	6
+#define RPC_TASK_MSG_PIN_WAIT	5
 
 #define RPC_IS_RUNNING(t)	test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
 #define rpc_set_running(t)	set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3d80524e92d6..bd743c51a865 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -103,6 +103,7 @@ struct rpc_rqst {
 						/* A cookie used to track the
 						   state of the transport
 						   connection */
+	atomic_t		rq_pin;
 	
 	/*
 	 * Partial send handling
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 45d580cd93ac..649a40cfae6d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -847,16 +847,22 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 }
 EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
 
+static bool
+xprt_is_pinned_rqst(struct rpc_rqst *req)
+{
+	return atomic_read(&req->rq_pin) != 0;
+}
+
 /**
  * xprt_pin_rqst - Pin a request on the transport receive list
  * @req: Request to pin
  *
  * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
- * so should be holding the xprt transport lock.
+ * so should be holding the xprt receive lock.
  */
 void xprt_pin_rqst(struct rpc_rqst *req)
 {
-	set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
+	atomic_inc(&req->rq_pin);
 }
 EXPORT_SYMBOL_GPL(xprt_pin_rqst);
 
@@ -864,31 +870,22 @@ EXPORT_SYMBOL_GPL(xprt_pin_rqst);
  * xprt_unpin_rqst - Unpin a request on the transport receive list
  * @req: Request to pin
  *
- * Caller should be holding the xprt transport lock.
+ * Caller should be holding the xprt receive lock.
  */
 void xprt_unpin_rqst(struct rpc_rqst *req)
 {
-	struct rpc_task *task = req->rq_task;
-
-	clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
-	if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
-		wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
+	if (!test_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate)) {
+		atomic_dec(&req->rq_pin);
+		return;
+	}
+	if (atomic_dec_and_test(&req->rq_pin))
+		wake_up_var(&req->rq_pin);
 }
 EXPORT_SYMBOL_GPL(xprt_unpin_rqst);
 
 static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
-__must_hold(&req->rq_xprt->recv_lock)
 {
-	struct rpc_task *task = req->rq_task;
-
-	if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
-		spin_unlock(&req->rq_xprt->recv_lock);
-		set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
-		wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
-				TASK_UNINTERRUPTIBLE);
-		clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
-		spin_lock(&req->rq_xprt->recv_lock);
-	}
+	wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req));
 }
 
 /**
@@ -1388,7 +1385,13 @@ void xprt_release(struct rpc_task *task)
 	spin_lock(&xprt->recv_lock);
 	if (!list_empty(&req->rq_list)) {
 		list_del_init(&req->rq_list);
-		xprt_wait_on_pinned_rqst(req);
+		if (xprt_is_pinned_rqst(req)) {
+			set_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate);
+			spin_unlock(&xprt->recv_lock);
+			xprt_wait_on_pinned_rqst(req);
+			spin_lock(&xprt->recv_lock);
+			clear_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate);
+		}
 	}
 	spin_unlock(&xprt->recv_lock);
 	spin_lock_bh(&xprt->transport_lock);
-- 
cgit v1.2.3


From 359c48c04af25397ecefec1ccf200ddd199617ce Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 29 Aug 2018 09:22:28 -0400
Subject: SUNRPC: Add a helper to wake up a sleeping rpc_task and set its
 status

Add a helper that will wake up a task that is sleeping on a specific
queue, and will set the value of task->tk_status. This is mainly
intended for use by the transport layer to notify the task of an
error condition.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |  3 ++
 net/sunrpc/sched.c           | 65 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 58 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8062ce6b18e5..8840a420cf4c 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -235,6 +235,9 @@ void rpc_wake_up_queued_task_on_wq(struct workqueue_struct *wq,
 		struct rpc_task *task);
 void		rpc_wake_up_queued_task(struct rpc_wait_queue *,
 					struct rpc_task *);
+void		rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *,
+						   struct rpc_task *,
+						   int);
 void		rpc_wake_up(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
 struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3fe5d60ab0e2..dec01bd1b71c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -440,14 +440,28 @@ static void __rpc_do_wake_up_task_on_wq(struct workqueue_struct *wq,
 /*
  * Wake up a queued task while the queue lock is being held
  */
-static void rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
-		struct rpc_wait_queue *queue, struct rpc_task *task)
+static struct rpc_task *
+rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
+		struct rpc_wait_queue *queue, struct rpc_task *task,
+		bool (*action)(struct rpc_task *, void *), void *data)
 {
 	if (RPC_IS_QUEUED(task)) {
 		smp_rmb();
-		if (task->tk_waitqueue == queue)
-			__rpc_do_wake_up_task_on_wq(wq, queue, task);
+		if (task->tk_waitqueue == queue) {
+			if (action == NULL || action(task, data)) {
+				__rpc_do_wake_up_task_on_wq(wq, queue, task);
+				return task;
+			}
+		}
 	}
+	return NULL;
+}
+
+static void
+rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
+		struct rpc_wait_queue *queue, struct rpc_task *task)
+{
+	rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL);
 }
 
 /*
@@ -481,6 +495,40 @@ void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task
 }
 EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
 
+static bool rpc_task_action_set_status(struct rpc_task *task, void *status)
+{
+	task->tk_status = *(int *)status;
+	return true;
+}
+
+static void
+rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue,
+		struct rpc_task *task, int status)
+{
+	rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+			task, rpc_task_action_set_status, &status);
+}
+
+/**
+ * rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status
+ * @queue: pointer to rpc_wait_queue
+ * @task: pointer to rpc_task
+ * @status: integer error value
+ *
+ * If @task is queued on @queue, then it is woken up, and @task->tk_status is
+ * set to the value of @status.
+ */
+void
+rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
+		struct rpc_task *task, int status)
+{
+	if (!RPC_IS_QUEUED(task))
+		return;
+	spin_lock_bh(&queue->lock);
+	rpc_wake_up_task_queue_set_status_locked(queue, task, status);
+	spin_unlock_bh(&queue->lock);
+}
+
 /*
  * Wake up the next task on a priority queue.
  */
@@ -553,12 +601,9 @@ struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
 			queue, rpc_qname(queue));
 	spin_lock_bh(&queue->lock);
 	task = __rpc_find_next_queued(queue);
-	if (task != NULL) {
-		if (func(task, data))
-			rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
-		else
-			task = NULL;
-	}
+	if (task != NULL)
+		task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
+				task, func, data);
 	spin_unlock_bh(&queue->lock);
 
 	return task;
-- 
cgit v1.2.3


From 75c84151a9dc7a755c607e6761d8f14a1690dbf0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 31 Aug 2018 10:21:00 -0400
Subject: SUNRPC: Rename xprt->recv_lock to xprt->queue_lock

We will use the same lock to protect both the transmit and receive queues.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h                |  2 +-
 net/sunrpc/svcsock.c                       |  6 +++---
 net/sunrpc/xprt.c                          | 24 ++++++++++++------------
 net/sunrpc/xprtrdma/rpc_rdma.c             | 10 +++++-----
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |  4 ++--
 net/sunrpc/xprtsock.c                      | 30 +++++++++++++++---------------
 6 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index bd743c51a865..c25d0a5fda69 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -235,7 +235,7 @@ struct rpc_xprt {
 	 */
 	spinlock_t		transport_lock;	/* lock transport info */
 	spinlock_t		reserve_lock;	/* lock slot table */
-	spinlock_t		recv_lock;	/* lock receive list */
+	spinlock_t		queue_lock;	/* send/receive queue lock */
 	u32			xid;		/* Next XID value to use */
 	struct rpc_task *	snd_task;	/* Task blocked in send */
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5445145e639c..db8bb6b3a2b0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1004,7 +1004,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 
 	if (!bc_xprt)
 		return -EAGAIN;
-	spin_lock(&bc_xprt->recv_lock);
+	spin_lock(&bc_xprt->queue_lock);
 	req = xprt_lookup_rqst(bc_xprt, xid);
 	if (!req)
 		goto unlock_notfound;
@@ -1022,7 +1022,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
 	memcpy(dst->iov_base, src->iov_base, src->iov_len);
 	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
 	rqstp->rq_arg.len = 0;
-	spin_unlock(&bc_xprt->recv_lock);
+	spin_unlock(&bc_xprt->queue_lock);
 	return 0;
 unlock_notfound:
 	printk(KERN_NOTICE
@@ -1031,7 +1031,7 @@ unlock_notfound:
 		__func__, ntohl(calldir),
 		bc_xprt, ntohl(xid));
 unlock_eagain:
-	spin_unlock(&bc_xprt->recv_lock);
+	spin_unlock(&bc_xprt->queue_lock);
 	return -EAGAIN;
 }
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3a3b3445a7c0..6e3d4b4ee79e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -826,7 +826,7 @@ static void xprt_connect_status(struct rpc_task *task)
  * @xprt: transport on which the original request was transmitted
  * @xid: RPC XID of incoming reply
  *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
  */
 struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 {
@@ -892,7 +892,7 @@ static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
  * xprt_update_rtt - Update RPC RTT statistics
  * @task: RPC request that recently completed
  *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
  */
 void xprt_update_rtt(struct rpc_task *task)
 {
@@ -914,7 +914,7 @@ EXPORT_SYMBOL_GPL(xprt_update_rtt);
  * @task: RPC request that recently completed
  * @copied: actual number of bytes received from the transport
  *
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
  */
 void xprt_complete_rqst(struct rpc_task *task, int copied)
 {
@@ -1034,10 +1034,10 @@ void xprt_transmit(struct rpc_task *task)
 			memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
 					sizeof(req->rq_private_buf));
 			/* Add request to the receive list */
-			spin_lock(&xprt->recv_lock);
+			spin_lock(&xprt->queue_lock);
 			list_add_tail(&req->rq_list, &xprt->recv);
 			set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
-			spin_unlock(&xprt->recv_lock);
+			spin_unlock(&xprt->queue_lock);
 			xprt_reset_majortimeo(req);
 			/* Turn off autodisconnect */
 			del_singleshot_timer_sync(&xprt->timer);
@@ -1076,7 +1076,7 @@ void xprt_transmit(struct rpc_task *task)
 		 * The spinlock ensures atomicity between the test of
 		 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
 		 */
-		spin_lock(&xprt->recv_lock);
+		spin_lock(&xprt->queue_lock);
 		if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
 			rpc_sleep_on(&xprt->pending, task, xprt_timer);
 			/* Wake up immediately if the connection was dropped */
@@ -1084,7 +1084,7 @@ void xprt_transmit(struct rpc_task *task)
 				rpc_wake_up_queued_task_set_status(&xprt->pending,
 						task, -ENOTCONN);
 		}
-		spin_unlock(&xprt->recv_lock);
+		spin_unlock(&xprt->queue_lock);
 	}
 }
 
@@ -1379,18 +1379,18 @@ void xprt_release(struct rpc_task *task)
 		task->tk_ops->rpc_count_stats(task, task->tk_calldata);
 	else if (task->tk_client)
 		rpc_count_iostats(task, task->tk_client->cl_metrics);
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	if (!list_empty(&req->rq_list)) {
 		list_del_init(&req->rq_list);
 		if (xprt_is_pinned_rqst(req)) {
 			set_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate);
-			spin_unlock(&xprt->recv_lock);
+			spin_unlock(&xprt->queue_lock);
 			xprt_wait_on_pinned_rqst(req);
-			spin_lock(&xprt->recv_lock);
+			spin_lock(&xprt->queue_lock);
 			clear_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate);
 		}
 	}
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	spin_lock_bh(&xprt->transport_lock);
 	xprt->ops->release_xprt(xprt, task);
 	if (xprt->ops->release_request)
@@ -1420,7 +1420,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 
 	spin_lock_init(&xprt->transport_lock);
 	spin_lock_init(&xprt->reserve_lock);
-	spin_lock_init(&xprt->recv_lock);
+	spin_lock_init(&xprt->queue_lock);
 
 	INIT_LIST_HEAD(&xprt->free);
 	INIT_LIST_HEAD(&xprt->recv);
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c8ae983c6cc0..0020dc401215 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1238,7 +1238,7 @@ void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
 		goto out_badheader;
 
 out:
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	cwnd = xprt->cwnd;
 	xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
 	if (xprt->cwnd > cwnd)
@@ -1246,7 +1246,7 @@ out:
 
 	xprt_complete_rqst(rqst->rq_task, status);
 	xprt_unpin_rqst(rqst);
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	return;
 
 /* If the incoming reply terminated a pending RPC, the next
@@ -1345,7 +1345,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 	/* Match incoming rpcrdma_rep to an rpcrdma_req to
 	 * get context for handling any incoming chunks.
 	 */
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
 	if (!rqst)
 		goto out_norqst;
@@ -1357,7 +1357,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
 		credits = buf->rb_max_requests;
 	buf->rb_credits = credits;
 
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 
 	req = rpcr_to_rdmar(rqst);
 	req->rl_reply = rep;
@@ -1378,7 +1378,7 @@ out_badversion:
  * is corrupt.
  */
 out_norqst:
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	trace_xprtrdma_reply_rqst(rep);
 	goto repost;
 
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index a68180090554..09b12b7568fe 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -56,7 +56,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
 	if (src->iov_len < 24)
 		goto out_shortreply;
 
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	req = xprt_lookup_rqst(xprt, xid);
 	if (!req)
 		goto out_notfound;
@@ -86,7 +86,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp,
 	rcvbuf->len = 0;
 
 out_unlock:
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 out:
 	return ret;
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3fbccebd0b10..8d6404259ff9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -966,12 +966,12 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
 		return;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	rovr = xprt_lookup_rqst(xprt, *xp);
 	if (!rovr)
 		goto out_unlock;
 	xprt_pin_rqst(rovr);
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	task = rovr->rq_task;
 
 	copied = rovr->rq_private_buf.buflen;
@@ -980,16 +980,16 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
 
 	if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
 		dprintk("RPC:       sk_buff copy failed\n");
-		spin_lock(&xprt->recv_lock);
+		spin_lock(&xprt->queue_lock);
 		goto out_unpin;
 	}
 
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	xprt_complete_rqst(task, copied);
 out_unpin:
 	xprt_unpin_rqst(rovr);
  out_unlock:
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 }
 
 static void xs_local_data_receive(struct sock_xprt *transport)
@@ -1058,13 +1058,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
 		return;
 
 	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	rovr = xprt_lookup_rqst(xprt, *xp);
 	if (!rovr)
 		goto out_unlock;
 	xprt_pin_rqst(rovr);
 	xprt_update_rtt(rovr->rq_task);
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	task = rovr->rq_task;
 
 	if ((copied = rovr->rq_private_buf.buflen) > repsize)
@@ -1072,7 +1072,7 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
 
 	/* Suck it into the iovec, verify checksum if not done by hw. */
 	if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
-		spin_lock(&xprt->recv_lock);
+		spin_lock(&xprt->queue_lock);
 		__UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
 		goto out_unpin;
 	}
@@ -1081,13 +1081,13 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
 	spin_lock_bh(&xprt->transport_lock);
 	xprt_adjust_cwnd(xprt, task, copied);
 	spin_unlock_bh(&xprt->transport_lock);
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	xprt_complete_rqst(task, copied);
 	__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
 out_unpin:
 	xprt_unpin_rqst(rovr);
  out_unlock:
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 }
 
 static void xs_udp_data_receive(struct sock_xprt *transport)
@@ -1356,24 +1356,24 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
 	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->recv.xid));
 
 	/* Find and lock the request corresponding to this xid */
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	req = xprt_lookup_rqst(xprt, transport->recv.xid);
 	if (!req) {
 		dprintk("RPC:       XID %08x request not found!\n",
 				ntohl(transport->recv.xid));
-		spin_unlock(&xprt->recv_lock);
+		spin_unlock(&xprt->queue_lock);
 		return -1;
 	}
 	xprt_pin_rqst(req);
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 
 	xs_tcp_read_common(xprt, desc, req);
 
-	spin_lock(&xprt->recv_lock);
+	spin_lock(&xprt->queue_lock);
 	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
 		xprt_complete_rqst(req->rq_task, transport->recv.copied);
 	xprt_unpin_rqst(req);
-	spin_unlock(&xprt->recv_lock);
+	spin_unlock(&xprt->queue_lock);
 	return 0;
 }
 
-- 
cgit v1.2.3


From edc81dcd5b7f699c4049042b35c904396642032e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 22 Aug 2018 17:55:46 -0400
Subject: SUNRPC: Refactor xprt_transmit() to remove the reply queue code

Separate out the action of adding a request to the reply queue so that the
backchannel code can simply skip calling it altogether.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h       |   1 +
 net/sunrpc/backchannel_rqst.c     |   1 -
 net/sunrpc/clnt.c                 |   5 ++
 net/sunrpc/xprt.c                 | 127 +++++++++++++++++++++++++-------------
 net/sunrpc/xprtrdma/backchannel.c |   1 -
 5 files changed, 89 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c25d0a5fda69..0250294c904a 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -334,6 +334,7 @@ void			xprt_free_slot(struct rpc_xprt *xprt,
 				       struct rpc_rqst *req);
 void			xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 bool			xprt_prepare_transmit(struct rpc_task *task);
+void			xprt_request_enqueue_receive(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
 void			xprt_end_transmit(struct rpc_task *task);
 int			xprt_adjust_timeout(struct rpc_rqst *req);
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 3c15a99b9700..fa5ba6ed3197 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -91,7 +91,6 @@ struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
 		return NULL;
 
 	req->rq_xprt = xprt;
-	INIT_LIST_HEAD(&req->rq_list);
 	INIT_LIST_HEAD(&req->rq_bc_list);
 
 	/* Preallocate one XDR receive buffer */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a858366cd15d..414966273a3f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1962,6 +1962,11 @@ call_transmit(struct rpc_task *task)
 			return;
 		}
 	}
+
+	/* Add task to reply queue before transmission to avoid races */
+	if (rpc_reply_expected(task))
+		xprt_request_enqueue_receive(task);
+
 	if (!xprt_prepare_transmit(task))
 		return;
 	task->tk_action = call_transmit_status;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6e3d4b4ee79e..2ae0a4c47d59 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -888,6 +888,62 @@ static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
 	wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req));
 }
 
+static bool
+xprt_request_data_received(struct rpc_task *task)
+{
+	return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+		READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) != 0;
+}
+
+static bool
+xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req)
+{
+	return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+		READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) == 0;
+}
+
+/**
+ * xprt_request_enqueue_receive - Add an request to the receive queue
+ * @task: RPC task
+ *
+ */
+void
+xprt_request_enqueue_receive(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (!xprt_request_need_enqueue_receive(task, req))
+		return;
+	spin_lock(&xprt->queue_lock);
+
+	/* Update the softirq receive buffer */
+	memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+			sizeof(req->rq_private_buf));
+
+	/* Add request to the receive list */
+	list_add_tail(&req->rq_list, &xprt->recv);
+	set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
+	spin_unlock(&xprt->queue_lock);
+
+	xprt_reset_majortimeo(req);
+	/* Turn off autodisconnect */
+	del_singleshot_timer_sync(&xprt->timer);
+}
+
+/**
+ * xprt_request_dequeue_receive_locked - Remove a request from the receive queue
+ * @task: RPC task
+ *
+ * Caller must hold xprt->queue_lock.
+ */
+static void
+xprt_request_dequeue_receive_locked(struct rpc_task *task)
+{
+	if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+		list_del(&task->tk_rqstp->rq_list);
+}
+
 /**
  * xprt_update_rtt - Update RPC RTT statistics
  * @task: RPC request that recently completed
@@ -927,24 +983,16 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 
 	xprt->stat.recvs++;
 
-	list_del_init(&req->rq_list);
 	req->rq_private_buf.len = copied;
 	/* Ensure all writes are done before we update */
 	/* req->rq_reply_bytes_recvd */
 	smp_wmb();
 	req->rq_reply_bytes_recvd = copied;
-	clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
+	xprt_request_dequeue_receive_locked(task);
 	rpc_wake_up_queued_task(&xprt->pending, task);
 }
 EXPORT_SYMBOL_GPL(xprt_complete_rqst);
 
-static bool
-xprt_request_data_received(struct rpc_task *task)
-{
-	return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
-		task->tk_rqstp->rq_reply_bytes_recvd != 0;
-}
-
 static void xprt_timer(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
@@ -1018,32 +1066,15 @@ void xprt_transmit(struct rpc_task *task)
 
 	dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
 
-	if (!req->rq_reply_bytes_recvd) {
-
+	if (!req->rq_bytes_sent) {
+		if (xprt_request_data_received(task))
+			return;
 		/* Verify that our message lies in the RPCSEC_GSS window */
-		if (!req->rq_bytes_sent && rpcauth_xmit_need_reencode(task)) {
+		if (rpcauth_xmit_need_reencode(task)) {
 			task->tk_status = -EBADMSG;
 			return;
 		}
-
-		if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
-			/*
-			 * Add to the list only if we're expecting a reply
-			 */
-			/* Update the softirq receive buffer */
-			memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
-					sizeof(req->rq_private_buf));
-			/* Add request to the receive list */
-			spin_lock(&xprt->queue_lock);
-			list_add_tail(&req->rq_list, &xprt->recv);
-			set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
-			spin_unlock(&xprt->queue_lock);
-			xprt_reset_majortimeo(req);
-			/* Turn off autodisconnect */
-			del_singleshot_timer_sync(&xprt->timer);
-		}
-	} else if (xprt_request_data_received(task) && !req->rq_bytes_sent)
-		return;
+	}
 
 	connect_cookie = xprt->connect_cookie;
 	status = xprt->ops->send_request(task);
@@ -1285,7 +1316,6 @@ xprt_request_init(struct rpc_task *task)
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_rqst	*req = task->tk_rqstp;
 
-	INIT_LIST_HEAD(&req->rq_list);
 	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
@@ -1355,6 +1385,26 @@ void xprt_retry_reserve(struct rpc_task *task)
 	xprt_do_reserve(xprt, task);
 }
 
+static void
+xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+	    xprt_is_pinned_rqst(req)) {
+		spin_lock(&xprt->queue_lock);
+		xprt_request_dequeue_receive_locked(task);
+		while (xprt_is_pinned_rqst(req)) {
+			set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+			spin_unlock(&xprt->queue_lock);
+			xprt_wait_on_pinned_rqst(req);
+			spin_lock(&xprt->queue_lock);
+			clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+		}
+		spin_unlock(&xprt->queue_lock);
+	}
+}
+
 /**
  * xprt_release - release an RPC request slot
  * @task: task which is finished with the slot
@@ -1379,18 +1429,7 @@ void xprt_release(struct rpc_task *task)
 		task->tk_ops->rpc_count_stats(task, task->tk_calldata);
 	else if (task->tk_client)
 		rpc_count_iostats(task, task->tk_client->cl_metrics);
-	spin_lock(&xprt->queue_lock);
-	if (!list_empty(&req->rq_list)) {
-		list_del_init(&req->rq_list);
-		if (xprt_is_pinned_rqst(req)) {
-			set_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate);
-			spin_unlock(&xprt->queue_lock);
-			xprt_wait_on_pinned_rqst(req);
-			spin_lock(&xprt->queue_lock);
-			clear_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate);
-		}
-	}
-	spin_unlock(&xprt->queue_lock);
+	xprt_request_dequeue_all(task, req);
 	spin_lock_bh(&xprt->transport_lock);
 	xprt->ops->release_xprt(xprt, task);
 	if (xprt->ops->release_request)
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 90adeff4c06b..ed58761e6b23 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -51,7 +51,6 @@ static int rpcrdma_bc_setup_reqs(struct rpcrdma_xprt *r_xprt,
 		rqst = &req->rl_slot;
 
 		rqst->rq_xprt = xprt;
-		INIT_LIST_HEAD(&rqst->rq_list);
 		INIT_LIST_HEAD(&rqst->rq_bc_list);
 		__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
 		spin_lock_bh(&xprt->bc_pa_lock);
-- 
cgit v1.2.3


From 7f3a1d1e1806a0eb9b200e3aed2a04431f2bcc6a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 23 Aug 2018 00:03:43 -0400
Subject: SUNRPC: Refactor xprt_transmit() to remove wait for reply code

Allow the caller in clnt.c to call into the code to wait for a reply
after calling xprt_transmit(). Again, the reason is that the backchannel
code does not need this functionality.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/clnt.c           | 10 +-----
 net/sunrpc/xprt.c           | 74 +++++++++++++++++++++++++++++++--------------
 3 files changed, 54 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 0250294c904a..4fa2af087cff 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -335,6 +335,7 @@ void			xprt_free_slot(struct rpc_xprt *xprt,
 void			xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 bool			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_receive(struct rpc_task *task);
+void			xprt_request_wait_receive(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
 void			xprt_end_transmit(struct rpc_task *task);
 int			xprt_adjust_timeout(struct rpc_rqst *req);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 414966273a3f..775d6e80b6e8 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1975,15 +1975,6 @@ call_transmit(struct rpc_task *task)
 		return;
 	if (is_retrans)
 		task->tk_client->cl_stats->rpcretrans++;
-	/*
-	 * On success, ensure that we call xprt_end_transmit() before sleeping
-	 * in order to allow access to the socket to other RPC requests.
-	 */
-	call_transmit_status(task);
-	if (rpc_reply_expected(task))
-		return;
-	task->tk_action = rpc_exit_task;
-	rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task);
 }
 
 /*
@@ -2000,6 +1991,7 @@ call_transmit_status(struct rpc_task *task)
 	 */
 	if (task->tk_status == 0) {
 		xprt_end_transmit(task);
+		xprt_request_wait_receive(task);
 		return;
 	}
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2ae0a4c47d59..a6a33c178870 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -654,6 +654,22 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(xprt_force_disconnect);
 
+static unsigned int
+xprt_connect_cookie(struct rpc_xprt *xprt)
+{
+	return READ_ONCE(xprt->connect_cookie);
+}
+
+static bool
+xprt_request_retransmit_after_disconnect(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	return req->rq_connect_cookie != xprt_connect_cookie(xprt) ||
+		!xprt_connected(xprt);
+}
+
 /**
  * xprt_conditional_disconnect - force a transport to disconnect
  * @xprt: transport to disconnect
@@ -1009,6 +1025,39 @@ static void xprt_timer(struct rpc_task *task)
 		task->tk_status = 0;
 }
 
+/**
+ * xprt_request_wait_receive - wait for the reply to an RPC request
+ * @task: RPC task about to send a request
+ *
+ */
+void xprt_request_wait_receive(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (!test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+		return;
+	/*
+	 * Sleep on the pending queue if we're expecting a reply.
+	 * The spinlock ensures atomicity between the test of
+	 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
+	 */
+	spin_lock(&xprt->queue_lock);
+	if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
+		xprt->ops->set_retrans_timeout(task);
+		rpc_sleep_on(&xprt->pending, task, xprt_timer);
+		/*
+		 * Send an extra queue wakeup call if the
+		 * connection was dropped in case the call to
+		 * rpc_sleep_on() raced.
+		 */
+		if (xprt_request_retransmit_after_disconnect(task))
+			rpc_wake_up_queued_task_set_status(&xprt->pending,
+					task, -ENOTCONN);
+	}
+	spin_unlock(&xprt->queue_lock);
+}
+
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
@@ -1028,9 +1077,8 @@ bool xprt_prepare_transmit(struct rpc_task *task)
 			task->tk_status = req->rq_reply_bytes_recvd;
 			goto out_unlock;
 		}
-		if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
-		    && xprt_connected(xprt)
-		    && req->rq_connect_cookie == xprt->connect_cookie) {
+		if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) &&
+		    !xprt_request_retransmit_after_disconnect(task)) {
 			xprt->ops->set_retrans_timeout(task);
 			rpc_sleep_on(&xprt->pending, task, xprt_timer);
 			goto out_unlock;
@@ -1091,8 +1139,6 @@ void xprt_transmit(struct rpc_task *task)
 	task->tk_flags |= RPC_TASK_SENT;
 	spin_lock_bh(&xprt->transport_lock);
 
-	xprt->ops->set_retrans_timeout(task);
-
 	xprt->stat.sends++;
 	xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
 	xprt->stat.bklog_u += xprt->backlog.qlen;
@@ -1101,22 +1147,6 @@ void xprt_transmit(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 
 	req->rq_connect_cookie = connect_cookie;
-	if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
-		/*
-		 * Sleep on the pending queue if we're expecting a reply.
-		 * The spinlock ensures atomicity between the test of
-		 * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
-		 */
-		spin_lock(&xprt->queue_lock);
-		if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
-			rpc_sleep_on(&xprt->pending, task, xprt_timer);
-			/* Wake up immediately if the connection was dropped */
-			if (!xprt_connected(xprt))
-				rpc_wake_up_queued_task_set_status(&xprt->pending,
-						task, -ENOTCONN);
-		}
-		spin_unlock(&xprt->queue_lock);
-	}
 }
 
 static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
@@ -1321,7 +1351,7 @@ xprt_request_init(struct rpc_task *task)
 	req->rq_xprt    = xprt;
 	req->rq_buffer  = NULL;
 	req->rq_xid	= xprt_alloc_xid(xprt);
-	req->rq_connect_cookie = xprt->connect_cookie - 1;
+	req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1;
 	req->rq_bytes_sent = 0;
 	req->rq_snd_buf.len = 0;
 	req->rq_snd_buf.buflen = 0;
-- 
cgit v1.2.3


From ef3f54347f690d06649c0d7a1f63d3410b3d08d3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 8 Aug 2018 09:23:32 -0400
Subject: SUNRPC: Distinguish between the slot allocation list and receive
 queue

When storing a struct rpc_rqst on the slot allocation list, we currently
use the same field 'rq_list' as we use to store the request on the
receive queue. Since the structure is never on both lists at the same
time, this is OK.
However, for clarity, let's make that a union with different names for
the different lists so that we can more easily distinguish between
the two states.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  9 +++++++--
 net/sunrpc/xprt.c           | 12 ++++++------
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 4fa2af087cff..9cec2d0811f2 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -82,7 +82,11 @@ struct rpc_rqst {
 	struct page		**rq_enc_pages;	/* scratch pages for use by
 						   gss privacy code */
 	void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
-	struct list_head	rq_list;
+
+	union {
+		struct list_head	rq_list;	/* Slot allocation list */
+		struct list_head	rq_recv;	/* Receive queue */
+	};
 
 	void			*rq_buffer;	/* Call XDR encode buffer */
 	size_t			rq_callsize;
@@ -249,7 +253,8 @@ struct rpc_xprt {
 	struct list_head	bc_pa_list;	/* List of preallocated
 						 * backchannel rpc_rqst's */
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
-	struct list_head	recv;
+
+	struct list_head	recv_queue;	/* Receive queue */
 
 	struct {
 		unsigned long		bind_count,	/* total number of binds */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a6a33c178870..d527dc08540e 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -708,7 +708,7 @@ static void
 xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
 	__must_hold(&xprt->transport_lock)
 {
-	if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+	if (list_empty(&xprt->recv_queue) && xprt_has_timer(xprt))
 		mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
 }
 
@@ -718,7 +718,7 @@ xprt_init_autodisconnect(struct timer_list *t)
 	struct rpc_xprt *xprt = from_timer(xprt, t, timer);
 
 	spin_lock(&xprt->transport_lock);
-	if (!list_empty(&xprt->recv))
+	if (!list_empty(&xprt->recv_queue))
 		goto out_abort;
 	/* Reset xprt->last_used to avoid connect/autodisconnect cycling */
 	xprt->last_used = jiffies;
@@ -848,7 +848,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 {
 	struct rpc_rqst *entry;
 
-	list_for_each_entry(entry, &xprt->recv, rq_list)
+	list_for_each_entry(entry, &xprt->recv_queue, rq_recv)
 		if (entry->rq_xid == xid) {
 			trace_xprt_lookup_rqst(xprt, xid, 0);
 			entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
@@ -938,7 +938,7 @@ xprt_request_enqueue_receive(struct rpc_task *task)
 			sizeof(req->rq_private_buf));
 
 	/* Add request to the receive list */
-	list_add_tail(&req->rq_list, &xprt->recv);
+	list_add_tail(&req->rq_recv, &xprt->recv_queue);
 	set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
 	spin_unlock(&xprt->queue_lock);
 
@@ -957,7 +957,7 @@ static void
 xprt_request_dequeue_receive_locked(struct rpc_task *task)
 {
 	if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
-		list_del(&task->tk_rqstp->rq_list);
+		list_del(&task->tk_rqstp->rq_recv);
 }
 
 /**
@@ -1492,7 +1492,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 	spin_lock_init(&xprt->queue_lock);
 
 	INIT_LIST_HEAD(&xprt->free);
-	INIT_LIST_HEAD(&xprt->recv);
+	INIT_LIST_HEAD(&xprt->recv_queue);
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	spin_lock_init(&xprt->bc_pa_lock);
 	INIT_LIST_HEAD(&xprt->bc_pa_list);
-- 
cgit v1.2.3


From 944b042921a17d1a4e51bb05f8edf2b93d26e36f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 9 Aug 2018 23:33:21 -0400
Subject: SUNRPC: Add a transmission queue for RPC requests

Add the queue that will enforce the ordering of RPC task transmission.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  6 ++++
 net/sunrpc/clnt.c           |  6 ++--
 net/sunrpc/xprt.c           | 84 ++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 83 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 9cec2d0811f2..81a6c2c8dfc7 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -88,6 +88,8 @@ struct rpc_rqst {
 		struct list_head	rq_recv;	/* Receive queue */
 	};
 
+	struct list_head	rq_xmit;	/* Send queue */
+
 	void			*rq_buffer;	/* Call XDR encode buffer */
 	size_t			rq_callsize;
 	void			*rq_rbuffer;	/* Reply XDR decode buffer */
@@ -242,6 +244,9 @@ struct rpc_xprt {
 	spinlock_t		queue_lock;	/* send/receive queue lock */
 	u32			xid;		/* Next XID value to use */
 	struct rpc_task *	snd_task;	/* Task blocked in send */
+
+	struct list_head	xmit_queue;	/* Send queue */
+
 	struct svc_xprt		*bc_xprt;	/* NFSv4.1 backchannel */
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	struct svc_serv		*bc_serv;       /* The RPC service which will */
@@ -339,6 +344,7 @@ void			xprt_free_slot(struct rpc_xprt *xprt,
 				       struct rpc_rqst *req);
 void			xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 bool			xprt_prepare_transmit(struct rpc_task *task);
+void			xprt_request_enqueue_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_receive(struct rpc_task *task);
 void			xprt_request_wait_receive(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index be0f06a8156b..c1a19a3e1356 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1156,11 +1156,11 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	 */
 	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
 			xbufp->tail[0].iov_len;
-	set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 
 	task->tk_action = call_bc_transmit;
 	atomic_inc(&task->tk_count);
 	WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
+	xprt_request_enqueue_transmit(task);
 	rpc_execute(task);
 
 	dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
@@ -1759,8 +1759,6 @@ rpc_xdr_encode(struct rpc_task *task)
 
 	task->tk_status = rpcauth_wrap_req(task, encode, req, p,
 			task->tk_msg.rpc_argp);
-	if (task->tk_status == 0)
-		set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 }
 
 /*
@@ -1964,6 +1962,7 @@ call_transmit(struct rpc_task *task)
 	/* Add task to reply queue before transmission to avoid races */
 	if (rpc_reply_expected(task))
 		xprt_request_enqueue_receive(task);
+	xprt_request_enqueue_transmit(task);
 
 	if (!xprt_prepare_transmit(task))
 		return;
@@ -1998,7 +1997,6 @@ call_transmit_status(struct rpc_task *task)
 		xprt_end_transmit(task);
 		break;
 	case -EBADMSG:
-		clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 		task->tk_action = call_transmit;
 		task->tk_status = 0;
 		xprt_end_transmit(task);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index d527dc08540e..1f69d9f219af 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1058,6 +1058,72 @@ void xprt_request_wait_receive(struct rpc_task *task)
 	spin_unlock(&xprt->queue_lock);
 }
 
+static bool
+xprt_request_need_transmit(struct rpc_task *task)
+{
+	return !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) ||
+		xprt_request_retransmit_after_disconnect(task);
+}
+
+static bool
+xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req)
+{
+	return xprt_request_need_transmit(task) &&
+		!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+}
+
+/**
+ * xprt_request_enqueue_transmit - queue a task for transmission
+ * @task: pointer to rpc_task
+ *
+ * Add a task to the transmission queue.
+ */
+void
+xprt_request_enqueue_transmit(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (xprt_request_need_enqueue_transmit(task, req)) {
+		spin_lock(&xprt->queue_lock);
+		list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
+		set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+		spin_unlock(&xprt->queue_lock);
+	}
+}
+
+/**
+ * xprt_request_dequeue_transmit_locked - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ * Caller must hold xprt->queue_lock
+ */
+static void
+xprt_request_dequeue_transmit_locked(struct rpc_task *task)
+{
+	xprt_task_clear_bytes_sent(task);
+	if (test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+		list_del(&task->tk_rqstp->rq_xmit);
+}
+
+/**
+ * xprt_request_dequeue_transmit - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ */
+static void
+xprt_request_dequeue_transmit(struct rpc_task *task)
+{
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	spin_lock(&xprt->queue_lock);
+	xprt_request_dequeue_transmit_locked(task);
+	spin_unlock(&xprt->queue_lock);
+}
+
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
@@ -1077,12 +1143,8 @@ bool xprt_prepare_transmit(struct rpc_task *task)
 			task->tk_status = req->rq_reply_bytes_recvd;
 			goto out_unlock;
 		}
-		if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) &&
-		    !xprt_request_retransmit_after_disconnect(task)) {
-			xprt->ops->set_retrans_timeout(task);
-			rpc_sleep_on(&xprt->pending, task, xprt_timer);
+		if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
 			goto out_unlock;
-		}
 	}
 	if (!xprt->ops->reserve_xprt(xprt, task)) {
 		task->tk_status = -EAGAIN;
@@ -1116,11 +1178,11 @@ void xprt_transmit(struct rpc_task *task)
 
 	if (!req->rq_bytes_sent) {
 		if (xprt_request_data_received(task))
-			return;
+			goto out_dequeue;
 		/* Verify that our message lies in the RPCSEC_GSS window */
 		if (rpcauth_xmit_need_reencode(task)) {
 			task->tk_status = -EBADMSG;
-			return;
+			goto out_dequeue;
 		}
 	}
 
@@ -1135,7 +1197,6 @@ void xprt_transmit(struct rpc_task *task)
 	xprt_inject_disconnect(xprt);
 
 	dprintk("RPC: %5u xmit complete\n", task->tk_pid);
-	clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 	task->tk_flags |= RPC_TASK_SENT;
 	spin_lock_bh(&xprt->transport_lock);
 
@@ -1147,6 +1208,8 @@ void xprt_transmit(struct rpc_task *task)
 	spin_unlock_bh(&xprt->transport_lock);
 
 	req->rq_connect_cookie = connect_cookie;
+out_dequeue:
+	xprt_request_dequeue_transmit(task);
 }
 
 static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
@@ -1420,9 +1483,11 @@ xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 
-	if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
+	    test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
 	    xprt_is_pinned_rqst(req)) {
 		spin_lock(&xprt->queue_lock);
+		xprt_request_dequeue_transmit_locked(task);
 		xprt_request_dequeue_receive_locked(task);
 		while (xprt_is_pinned_rqst(req)) {
 			set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
@@ -1493,6 +1558,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 
 	INIT_LIST_HEAD(&xprt->free);
 	INIT_LIST_HEAD(&xprt->recv_queue);
+	INIT_LIST_HEAD(&xprt->xmit_queue);
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	spin_lock_init(&xprt->bc_pa_lock);
 	INIT_LIST_HEAD(&xprt->bc_pa_list);
-- 
cgit v1.2.3


From 762e4e67b356ab7b8fbfc39bc07dc6110121505e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 24 Aug 2018 16:28:28 -0400
Subject: SUNRPC: Refactor RPC call encoding

Move the call encoding so that it occurs before the transport connection
etc.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/clnt.c           | 81 +++++++++++++++++++++++++++------------------
 net/sunrpc/xprt.c           | 22 +++++++-----
 3 files changed, 63 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 81a6c2c8dfc7..b8a7de161f67 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -347,6 +347,7 @@ bool			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_receive(struct rpc_task *task);
 void			xprt_request_wait_receive(struct rpc_task *task);
+bool			xprt_request_need_retransmit(struct rpc_task *task);
 void			xprt_transmit(struct rpc_task *task);
 void			xprt_end_transmit(struct rpc_task *task);
 int			xprt_adjust_timeout(struct rpc_rqst *req);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c1a19a3e1356..64159716be30 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -61,6 +61,7 @@ static void	call_start(struct rpc_task *task);
 static void	call_reserve(struct rpc_task *task);
 static void	call_reserveresult(struct rpc_task *task);
 static void	call_allocate(struct rpc_task *task);
+static void	call_encode(struct rpc_task *task);
 static void	call_decode(struct rpc_task *task);
 static void	call_bind(struct rpc_task *task);
 static void	call_bind_status(struct rpc_task *task);
@@ -1140,7 +1141,8 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	struct xdr_buf *xbufp = &req->rq_snd_buf;
 	struct rpc_task_setup task_setup_data = {
 		.callback_ops = &rpc_default_ops,
-		.flags = RPC_TASK_SOFTCONN,
+		.flags = RPC_TASK_SOFTCONN |
+			RPC_TASK_NO_RETRANS_TIMEOUT,
 	};
 
 	dprintk("RPC: rpc_run_bc_task req= %p\n", req);
@@ -1160,7 +1162,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	task->tk_action = call_bc_transmit;
 	atomic_inc(&task->tk_count);
 	WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
-	xprt_request_enqueue_transmit(task);
 	rpc_execute(task);
 
 	dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
@@ -1680,7 +1681,7 @@ call_allocate(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_status = 0;
-	task->tk_action = call_bind;
+	task->tk_action = call_encode;
 
 	if (req->rq_buffer)
 		return;
@@ -1724,12 +1725,12 @@ call_allocate(struct rpc_task *task)
 static int
 rpc_task_need_encode(struct rpc_task *task)
 {
-	return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0;
+	return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0 &&
+		(!(task->tk_flags & RPC_TASK_SENT) ||
+		 !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) ||
+		 xprt_request_need_retransmit(task));
 }
 
-/*
- * 3.	Encode arguments of an RPC call
- */
 static void
 rpc_xdr_encode(struct rpc_task *task)
 {
@@ -1745,6 +1746,7 @@ rpc_xdr_encode(struct rpc_task *task)
 	xdr_buf_init(&req->rq_rcv_buf,
 		     req->rq_rbuffer,
 		     req->rq_rcvsize);
+	req->rq_bytes_sent = 0;
 
 	p = rpc_encode_header(task);
 	if (p == NULL) {
@@ -1761,6 +1763,34 @@ rpc_xdr_encode(struct rpc_task *task)
 			task->tk_msg.rpc_argp);
 }
 
+/*
+ * 3.	Encode arguments of an RPC call
+ */
+static void
+call_encode(struct rpc_task *task)
+{
+	if (!rpc_task_need_encode(task))
+		goto out;
+	/* Encode here so that rpcsec_gss can use correct sequence number. */
+	rpc_xdr_encode(task);
+	/* Did the encode result in an error condition? */
+	if (task->tk_status != 0) {
+		/* Was the error nonfatal? */
+		if (task->tk_status == -EAGAIN)
+			rpc_delay(task, HZ >> 4);
+		else
+			rpc_exit(task, task->tk_status);
+		return;
+	}
+
+	/* Add task to reply queue before transmission to avoid races */
+	if (rpc_reply_expected(task))
+		xprt_request_enqueue_receive(task);
+	xprt_request_enqueue_transmit(task);
+out:
+	task->tk_action = call_bind;
+}
+
 /*
  * 4.	Get the server port number if not yet set
  */
@@ -1945,24 +1975,8 @@ call_transmit(struct rpc_task *task)
 	dprint_status(task);
 
 	task->tk_action = call_transmit_status;
-	/* Encode here so that rpcsec_gss can use correct sequence number. */
-	if (rpc_task_need_encode(task)) {
-		rpc_xdr_encode(task);
-		/* Did the encode result in an error condition? */
-		if (task->tk_status != 0) {
-			/* Was the error nonfatal? */
-			if (task->tk_status == -EAGAIN)
-				rpc_delay(task, HZ >> 4);
-			else
-				rpc_exit(task, task->tk_status);
-			return;
-		}
-	}
-
-	/* Add task to reply queue before transmission to avoid races */
-	if (rpc_reply_expected(task))
-		xprt_request_enqueue_receive(task);
-	xprt_request_enqueue_transmit(task);
+	if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+		return;
 
 	if (!xprt_prepare_transmit(task))
 		return;
@@ -1997,9 +2011,9 @@ call_transmit_status(struct rpc_task *task)
 		xprt_end_transmit(task);
 		break;
 	case -EBADMSG:
-		task->tk_action = call_transmit;
-		task->tk_status = 0;
 		xprt_end_transmit(task);
+		task->tk_status = 0;
+		task->tk_action = call_encode;
 		break;
 		/*
 		 * Special cases: if we've been waiting on the
@@ -2048,6 +2062,9 @@ call_bc_transmit(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 
+	if (rpc_task_need_encode(task))
+		xprt_request_enqueue_transmit(task);
+
 	if (!xprt_prepare_transmit(task))
 		goto out_retry;
 
@@ -2169,7 +2186,7 @@ call_status(struct rpc_task *task)
 	case -EPIPE:
 	case -ENOTCONN:
 	case -EAGAIN:
-		task->tk_action = call_bind;
+		task->tk_action = call_encode;
 		break;
 	case -EIO:
 		/* shutdown or soft timeout */
@@ -2234,7 +2251,7 @@ call_timeout(struct rpc_task *task)
 	rpcauth_invalcred(task);
 
 retry:
-	task->tk_action = call_bind;
+	task->tk_action = call_encode;
 	task->tk_status = 0;
 }
 
@@ -2278,7 +2295,7 @@ call_decode(struct rpc_task *task)
 
 	if (req->rq_rcv_buf.len < 12) {
 		if (!RPC_IS_SOFT(task)) {
-			task->tk_action = call_bind;
+			task->tk_action = call_encode;
 			goto out_retry;
 		}
 		dprintk("RPC:       %s: too small RPC reply size (%d bytes)\n",
@@ -2409,7 +2426,7 @@ rpc_verify_header(struct rpc_task *task)
 			task->tk_garb_retry--;
 			dprintk("RPC: %5u %s: retry garbled creds\n",
 					task->tk_pid, __func__);
-			task->tk_action = call_bind;
+			task->tk_action = call_encode;
 			goto out_retry;
 		case RPC_AUTH_TOOWEAK:
 			printk(KERN_NOTICE "RPC: server %s requires stronger "
@@ -2478,7 +2495,7 @@ out_garbage:
 		task->tk_garb_retry--;
 		dprintk("RPC: %5u %s: retrying\n",
 				task->tk_pid, __func__);
-		task->tk_action = call_bind;
+		task->tk_action = call_encode;
 out_retry:
 		return ERR_PTR(-EAGAIN);
 	}
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1f69d9f219af..613f558a3791 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1058,18 +1058,10 @@ void xprt_request_wait_receive(struct rpc_task *task)
 	spin_unlock(&xprt->queue_lock);
 }
 
-static bool
-xprt_request_need_transmit(struct rpc_task *task)
-{
-	return !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) ||
-		xprt_request_retransmit_after_disconnect(task);
-}
-
 static bool
 xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req)
 {
-	return xprt_request_need_transmit(task) &&
-		!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+	return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 }
 
 /**
@@ -1124,6 +1116,18 @@ xprt_request_dequeue_transmit(struct rpc_task *task)
 	spin_unlock(&xprt->queue_lock);
 }
 
+/**
+ * xprt_request_need_retransmit - Test if a task needs retransmission
+ * @task: pointer to rpc_task
+ *
+ * Test for whether a connection breakage requires the task to retransmit
+ */
+bool
+xprt_request_need_retransmit(struct rpc_task *task)
+{
+	return xprt_request_retransmit_after_disconnect(task);
+}
+
 /**
  * xprt_prepare_transmit - reserve the transport before sending a request
  * @task: RPC task about to send a request
-- 
cgit v1.2.3


From 902c58872e1e9a2c146a55b0701c0b26cc5a4b24 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 1 Sep 2018 17:21:01 -0400
Subject: SUNRPC: Fix up the back channel transmit

Fix up the back channel code to recognise that it has already been
transmitted, so does not need to be called again.
Also ensure that we set req->rq_task.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/bc_xprt.h |  1 +
 net/sunrpc/clnt.c              | 19 +++++--------------
 net/sunrpc/xprt.c              | 27 ++++++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 4397a4824c81..28721cf73ec3 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -34,6 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifdef CONFIG_SUNRPC_BACKCHANNEL
 struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid);
 void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied);
+void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task);
 void xprt_free_bc_request(struct rpc_rqst *req);
 int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
 void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 64159716be30..dcefbf406482 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1138,7 +1138,6 @@ EXPORT_SYMBOL_GPL(rpc_call_async);
 struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 {
 	struct rpc_task *task;
-	struct xdr_buf *xbufp = &req->rq_snd_buf;
 	struct rpc_task_setup task_setup_data = {
 		.callback_ops = &rpc_default_ops,
 		.flags = RPC_TASK_SOFTCONN |
@@ -1150,14 +1149,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
 	 * Create an rpc_task to send the data
 	 */
 	task = rpc_new_task(&task_setup_data);
-	task->tk_rqstp = req;
-
-	/*
-	 * Set up the xdr_buf length.
-	 * This also indicates that the buffer is XDR encoded already.
-	 */
-	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
-			xbufp->tail[0].iov_len;
+	xprt_init_bc_request(req, task);
 
 	task->tk_action = call_bc_transmit;
 	atomic_inc(&task->tk_count);
@@ -2064,6 +2056,8 @@ call_bc_transmit(struct rpc_task *task)
 
 	if (rpc_task_need_encode(task))
 		xprt_request_enqueue_transmit(task);
+	if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+		goto out_wakeup;
 
 	if (!xprt_prepare_transmit(task))
 		goto out_retry;
@@ -2073,13 +2067,11 @@ call_bc_transmit(struct rpc_task *task)
 			"error: %d\n", task->tk_status);
 		goto out_done;
 	}
-	if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
-		req->rq_bytes_sent = 0;
 
 	xprt_transmit(task);
 
 	if (task->tk_status == -EAGAIN)
-		goto out_nospace;
+		goto out_retry;
 
 	xprt_end_transmit(task);
 	dprint_status(task);
@@ -2119,12 +2111,11 @@ call_bc_transmit(struct rpc_task *task)
 			"error: %d\n", task->tk_status);
 		break;
 	}
+out_wakeup:
 	rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
 out_done:
 	task->tk_action = rpc_exit_task;
 	return;
-out_nospace:
-	req->rq_connect_cookie = req->rq_xprt->connect_cookie;
 out_retry:
 	task->tk_status = 0;
 }
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 613f558a3791..f5be739492d4 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1390,6 +1390,12 @@ void xprt_free(struct rpc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(xprt_free);
 
+static void
+xprt_init_connect_cookie(struct rpc_rqst *req, struct rpc_xprt *xprt)
+{
+	req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1;
+}
+
 static __be32
 xprt_alloc_xid(struct rpc_xprt *xprt)
 {
@@ -1418,7 +1424,7 @@ xprt_request_init(struct rpc_task *task)
 	req->rq_xprt    = xprt;
 	req->rq_buffer  = NULL;
 	req->rq_xid	= xprt_alloc_xid(xprt);
-	req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1;
+	xprt_init_connect_cookie(req, xprt);
 	req->rq_bytes_sent = 0;
 	req->rq_snd_buf.len = 0;
 	req->rq_snd_buf.buflen = 0;
@@ -1552,6 +1558,25 @@ void xprt_release(struct rpc_task *task)
 		xprt_free_bc_request(req);
 }
 
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+void
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+{
+	struct xdr_buf *xbufp = &req->rq_snd_buf;
+
+	task->tk_rqstp = req;
+	req->rq_task = task;
+	xprt_init_connect_cookie(req, req->rq_xprt);
+	/*
+	 * Set up the xdr_buf length.
+	 * This also indicates that the buffer is XDR encoded already.
+	 */
+	xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
+		xbufp->tail[0].iov_len;
+	req->rq_bytes_sent = 0;
+}
+#endif
+
 static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 {
 	kref_init(&xprt->kref);
-- 
cgit v1.2.3


From 50f484e298218b7271fad8a23bd44c82fb3110e1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 30 Aug 2018 13:27:29 -0400
Subject: SUNRPC: Treat the task and request as separate in the
 xprt_ops->send_request()

When we shift to using the transmit queue, then the task that holds the
write lock will not necessarily be the same as the one being transmitted.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h                |  2 +-
 net/sunrpc/xprt.c                          |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |  3 +--
 net/sunrpc/xprtrdma/transport.c            |  5 ++---
 net/sunrpc/xprtsock.c                      | 27 +++++++++++++--------------
 5 files changed, 18 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index b8a7de161f67..8c2bb078f00c 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -140,7 +140,7 @@ struct rpc_xprt_ops {
 	void		(*connect)(struct rpc_xprt *xprt, struct rpc_task *task);
 	int		(*buf_alloc)(struct rpc_task *task);
 	void		(*buf_free)(struct rpc_task *task);
-	int		(*send_request)(struct rpc_task *task);
+	int		(*send_request)(struct rpc_rqst *req, struct rpc_task *task);
 	void		(*set_retrans_timeout)(struct rpc_task *task);
 	void		(*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*release_request)(struct rpc_task *task);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f5be739492d4..6e735dd1fde0 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1191,7 +1191,7 @@ void xprt_transmit(struct rpc_task *task)
 	}
 
 	connect_cookie = xprt->connect_cookie;
-	status = xprt->ops->send_request(task);
+	status = xprt->ops->send_request(req, task);
 	trace_xprt_transmit(xprt, req->rq_xid, status);
 	if (status != 0) {
 		task->tk_status = status;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 09b12b7568fe..d1618c70edb4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -215,9 +215,8 @@ drop_connection:
  * connection.
  */
 static int
-xprt_rdma_bc_send_request(struct rpc_task *task)
+xprt_rdma_bc_send_request(struct rpc_rqst *rqst, struct rpc_task *task)
 {
-	struct rpc_rqst *rqst = task->tk_rqstp;
 	struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
 	struct svcxprt_rdma *rdma;
 	int ret;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 143ce2579ba9..fa684bf4d090 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -706,9 +706,8 @@ xprt_rdma_free(struct rpc_task *task)
  *		sent. Do not try to send this message again.
  */
 static int
-xprt_rdma_send_request(struct rpc_task *task)
+xprt_rdma_send_request(struct rpc_rqst *rqst, struct rpc_task *task)
 {
-	struct rpc_rqst *rqst = task->tk_rqstp;
 	struct rpc_xprt *xprt = rqst->rq_xprt;
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
@@ -741,7 +740,7 @@ xprt_rdma_send_request(struct rpc_task *task)
 	/* An RPC with no reply will throw off credit accounting,
 	 * so drop the connection to reset the credit grant.
 	 */
-	if (!rpc_reply_expected(task))
+	if (!rpc_reply_expected(rqst->rq_task))
 		goto drop_connection;
 	return 0;
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8d6404259ff9..b8143eded4af 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -449,12 +449,12 @@ static void xs_nospace_callback(struct rpc_task *task)
 
 /**
  * xs_nospace - place task on wait queue if transmit was incomplete
+ * @req: pointer to RPC request
  * @task: task to put to sleep
  *
  */
-static int xs_nospace(struct rpc_task *task)
+static int xs_nospace(struct rpc_rqst *req, struct rpc_task *task)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct sock *sk = transport->inet;
@@ -513,6 +513,7 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
 
 /**
  * xs_local_send_request - write an RPC request to an AF_LOCAL socket
+ * @req: pointer to RPC request
  * @task: RPC task that manages the state of an RPC request
  *
  * Return values:
@@ -522,9 +523,8 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
  * ENOTCONN:	Caller needs to invoke connect logic then call again
  *    other:	Some other error occured, the request was not sent
  */
-static int xs_local_send_request(struct rpc_task *task)
+static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport =
 				container_of(xprt, struct sock_xprt, xprt);
@@ -569,7 +569,7 @@ static int xs_local_send_request(struct rpc_task *task)
 	case -ENOBUFS:
 		break;
 	case -EAGAIN:
-		status = xs_nospace(task);
+		status = xs_nospace(req, task);
 		break;
 	default:
 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
@@ -585,6 +585,7 @@ static int xs_local_send_request(struct rpc_task *task)
 
 /**
  * xs_udp_send_request - write an RPC request to a UDP socket
+ * @req: pointer to RPC request
  * @task: address of RPC task that manages the state of an RPC request
  *
  * Return values:
@@ -594,9 +595,8 @@ static int xs_local_send_request(struct rpc_task *task)
  * ENOTCONN:	Caller needs to invoke connect logic then call again
  *    other:	Some other error occurred, the request was not sent
  */
-static int xs_udp_send_request(struct rpc_task *task)
+static int xs_udp_send_request(struct rpc_rqst *req, struct rpc_task *task)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
@@ -638,7 +638,7 @@ process_status:
 		/* Should we call xs_close() here? */
 		break;
 	case -EAGAIN:
-		status = xs_nospace(task);
+		status = xs_nospace(req, task);
 		break;
 	case -ENETUNREACH:
 	case -ENOBUFS:
@@ -658,6 +658,7 @@ process_status:
 
 /**
  * xs_tcp_send_request - write an RPC request to a TCP socket
+ * @req: pointer to RPC request
  * @task: address of RPC task that manages the state of an RPC request
  *
  * Return values:
@@ -670,9 +671,8 @@ process_status:
  * XXX: In the case of soft timeouts, should we eventually give up
  *	if sendmsg is not able to make progress?
  */
-static int xs_tcp_send_request(struct rpc_task *task)
+static int xs_tcp_send_request(struct rpc_rqst *req, struct rpc_task *task)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	struct xdr_buf *xdr = &req->rq_snd_buf;
@@ -697,7 +697,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	 * completes while the socket holds a reference to the pages,
 	 * then we may end up resending corrupted data.
 	 */
-	if (task->tk_flags & RPC_TASK_SENT)
+	if (req->rq_task->tk_flags & RPC_TASK_SENT)
 		zerocopy = false;
 
 	if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
@@ -761,7 +761,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 		/* Should we call xs_close() here? */
 		break;
 	case -EAGAIN:
-		status = xs_nospace(task);
+		status = xs_nospace(req, task);
 		break;
 	case -ECONNRESET:
 	case -ECONNREFUSED:
@@ -2706,9 +2706,8 @@ static int bc_sendto(struct rpc_rqst *req)
 /*
  * The send routine. Borrows from svc_send
  */
-static int bc_send_request(struct rpc_task *task)
+static int bc_send_request(struct rpc_rqst *req, struct rpc_task *task)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
 	struct svc_xprt	*xprt;
 	int len;
 
-- 
cgit v1.2.3


From 918f3c1fe83c5baa4892b943d3f5ac7191d8fb74 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sun, 9 Sep 2018 11:37:22 -0400
Subject: SUNRPC: Improve latency for interactive tasks

One of the intentions with the priority queues was to ensure that no
single process can hog the transport. The field task->tk_owner therefore
identifies the RPC call's origin, and is intended to allow the RPC layer
to organise queues for fairness.
This commit therefore modifies the transmit queue to group requests
by task->tk_owner, and ensures that we round robin among those groups.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/xprt.c           | 27 ++++++++++++++++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 8c2bb078f00c..e377620b9744 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -89,6 +89,7 @@ struct rpc_rqst {
 	};
 
 	struct list_head	rq_xmit;	/* Send queue */
+	struct list_head	rq_xmit2;	/* Send queue */
 
 	void			*rq_buffer;	/* Call XDR encode buffer */
 	size_t			rq_callsize;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9c5a8514d264..44d0eeaddaac 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1053,12 +1053,21 @@ xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req)
 void
 xprt_request_enqueue_transmit(struct rpc_task *task)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_rqst *pos, *req = task->tk_rqstp;
 	struct rpc_xprt *xprt = req->rq_xprt;
 
 	if (xprt_request_need_enqueue_transmit(task, req)) {
 		spin_lock(&xprt->queue_lock);
+		list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+			if (pos->rq_task->tk_owner != task->tk_owner)
+				continue;
+			list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
+			INIT_LIST_HEAD(&req->rq_xmit);
+			goto out;
+		}
 		list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
+		INIT_LIST_HEAD(&req->rq_xmit2);
+out:
 		set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
 		spin_unlock(&xprt->queue_lock);
 	}
@@ -1074,8 +1083,20 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
 static void
 xprt_request_dequeue_transmit_locked(struct rpc_task *task)
 {
-	if (test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
-		list_del(&task->tk_rqstp->rq_xmit);
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+		return;
+	if (!list_empty(&req->rq_xmit)) {
+		list_del(&req->rq_xmit);
+		if (!list_empty(&req->rq_xmit2)) {
+			struct rpc_rqst *next = list_first_entry(&req->rq_xmit2,
+					struct rpc_rqst, rq_xmit2);
+			list_del(&req->rq_xmit2);
+			list_add_tail(&next->rq_xmit, &next->rq_xprt->xmit_queue);
+		}
+	} else
+		list_del(&req->rq_xmit2);
 }
 
 /**
-- 
cgit v1.2.3


From 75891f502f5fc70f52a01af5b924384ed4866907 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 3 Sep 2018 17:37:36 -0400
Subject: SUNRPC: Support for congestion control when queuing is enabled

Both RDMA and UDP transports require the request to get a "congestion control"
credit before they can be transmitted. Right now, this is done when
the request locks the socket. We'd like it to happen when a request attempts
to be transmitted for the first time.
In order to support retransmission of requests that already hold such
credits, we also want to ensure that they get queued first, so that we
don't deadlock with requests that have yet to obtain a credit.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h       |   2 +
 net/sunrpc/clnt.c                 |   5 ++
 net/sunrpc/xprt.c                 | 128 +++++++++++++++++++++++++++-----------
 net/sunrpc/xprtrdma/backchannel.c |   3 +
 net/sunrpc/xprtrdma/transport.c   |   3 +
 net/sunrpc/xprtsock.c             |   4 ++
 6 files changed, 109 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index e377620b9744..0d0cc127615e 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -397,6 +397,7 @@ void			xprt_complete_rqst(struct rpc_task *task, int copied);
 void			xprt_pin_rqst(struct rpc_rqst *req);
 void			xprt_unpin_rqst(struct rpc_rqst *req);
 void			xprt_release_rqst_cong(struct rpc_task *task);
+bool			xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req);
 void			xprt_disconnect_done(struct rpc_xprt *xprt);
 void			xprt_force_disconnect(struct rpc_xprt *xprt);
 void			xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
@@ -415,6 +416,7 @@ void			xprt_unlock_connect(struct rpc_xprt *, void *);
 #define XPRT_BINDING		(5)
 #define XPRT_CLOSING		(6)
 #define XPRT_CONGESTED		(9)
+#define XPRT_CWND_WAIT		(10)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8dc3d33827c4..f03911f84953 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1996,6 +1996,11 @@ call_transmit_status(struct rpc_task *task)
 		dprint_status(task);
 		xprt_end_transmit(task);
 		break;
+	case -EBADSLT:
+		xprt_end_transmit(task);
+		task->tk_action = call_transmit;
+		task->tk_status = 0;
+		break;
 	case -EBADMSG:
 		xprt_end_transmit(task);
 		task->tk_status = 0;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 44d0eeaddaac..b03355ae7b16 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -68,8 +68,6 @@
 static void	 xprt_init(struct rpc_xprt *xprt, struct net *net);
 static __be32	xprt_alloc_xid(struct rpc_xprt *xprt);
 static void	xprt_connect_status(struct rpc_task *task);
-static int      __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
-static void     __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
 static void	 xprt_destroy(struct rpc_xprt *xprt);
 
 static DEFINE_SPINLOCK(xprt_list_lock);
@@ -221,6 +219,31 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
 		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
 }
 
+static bool
+xprt_need_congestion_window_wait(struct rpc_xprt *xprt)
+{
+	return test_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_set_congestion_window_wait(struct rpc_xprt *xprt)
+{
+	if (!list_empty(&xprt->xmit_queue)) {
+		/* Peek at head of queue to see if it can make progress */
+		if (list_first_entry(&xprt->xmit_queue, struct rpc_rqst,
+					rq_xmit)->rq_cong)
+			return;
+	}
+	set_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_test_and_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+	if (!RPCXPRT_CONGESTED(xprt))
+		clear_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
 /*
  * xprt_reserve_xprt_cong - serialize write access to transports
  * @task: task that is requesting access to the transport
@@ -228,6 +251,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
  * Same as xprt_reserve_xprt, but Van Jacobson congestion control is
  * integrated into the decision of whether a request is allowed to be
  * woken up and given access to the transport.
+ * Note that the lock is only granted if we know there are free slots.
  */
 int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
@@ -243,14 +267,12 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 		xprt->snd_task = task;
 		return 1;
 	}
-	if (__xprt_get_cong(xprt, task)) {
+	if (!xprt_need_congestion_window_wait(xprt)) {
 		xprt->snd_task = task;
 		return 1;
 	}
 	xprt_clear_locked(xprt);
 out_sleep:
-	if (req)
-		__xprt_put_cong(xprt, req);
 	dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
 	task->tk_timeout = 0;
 	task->tk_status = -EAGAIN;
@@ -294,32 +316,14 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
 	xprt_clear_locked(xprt);
 }
 
-static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
-{
-	struct rpc_xprt *xprt = data;
-	struct rpc_rqst *req;
-
-	req = task->tk_rqstp;
-	if (req == NULL) {
-		xprt->snd_task = task;
-		return true;
-	}
-	if (__xprt_get_cong(xprt, task)) {
-		xprt->snd_task = task;
-		req->rq_ntrans++;
-		return true;
-	}
-	return false;
-}
-
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 {
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		return;
-	if (RPCXPRT_CONGESTED(xprt))
+	if (xprt_need_congestion_window_wait(xprt))
 		goto out_unlock;
 	if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
-				__xprt_lock_write_cong_func, xprt))
+				__xprt_lock_write_func, xprt))
 		return;
 out_unlock:
 	xprt_clear_locked(xprt);
@@ -370,16 +374,16 @@ static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *ta
  * overflowed. Put the task to sleep if this is the case.
  */
 static int
-__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
-
 	if (req->rq_cong)
 		return 1;
 	dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
-			task->tk_pid, xprt->cong, xprt->cwnd);
-	if (RPCXPRT_CONGESTED(xprt))
+			req->rq_task->tk_pid, xprt->cong, xprt->cwnd);
+	if (RPCXPRT_CONGESTED(xprt)) {
+		xprt_set_congestion_window_wait(xprt);
 		return 0;
+	}
 	req->rq_cong = 1;
 	xprt->cong += RPC_CWNDSCALE;
 	return 1;
@@ -396,9 +400,31 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
 		return;
 	req->rq_cong = 0;
 	xprt->cong -= RPC_CWNDSCALE;
+	xprt_test_and_clear_congestion_window_wait(xprt);
 	__xprt_lock_write_next_cong(xprt);
 }
 
+/**
+ * xprt_request_get_cong - Request congestion control credits
+ * @xprt: pointer to transport
+ * @req: pointer to RPC request
+ *
+ * Useful for transports that require congestion control.
+ */
+bool
+xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	bool ret = false;
+
+	if (req->rq_cong)
+		return true;
+	spin_lock_bh(&xprt->transport_lock);
+	ret = __xprt_get_cong(xprt, req) != 0;
+	spin_unlock_bh(&xprt->transport_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(xprt_request_get_cong);
+
 /**
  * xprt_release_rqst_cong - housekeeping when request is complete
  * @task: RPC request that recently completed
@@ -413,6 +439,20 @@ void xprt_release_rqst_cong(struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
 
+/*
+ * Clear the congestion window wait flag and wake up the next
+ * entry on xprt->sending
+ */
+static void
+xprt_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+	if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) {
+		spin_lock_bh(&xprt->transport_lock);
+		__xprt_lock_write_next_cong(xprt);
+		spin_unlock_bh(&xprt->transport_lock);
+	}
+}
+
 /**
  * xprt_adjust_cwnd - adjust transport congestion window
  * @xprt: pointer to xprt
@@ -1058,12 +1098,28 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
 
 	if (xprt_request_need_enqueue_transmit(task, req)) {
 		spin_lock(&xprt->queue_lock);
-		list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
-			if (pos->rq_task->tk_owner != task->tk_owner)
-				continue;
-			list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
-			INIT_LIST_HEAD(&req->rq_xmit);
-			goto out;
+		/*
+		 * Requests that carry congestion control credits are added
+		 * to the head of the list to avoid starvation issues.
+		 */
+		if (req->rq_cong) {
+			xprt_clear_congestion_window_wait(xprt);
+			list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+				if (pos->rq_cong)
+					continue;
+				/* Note: req is added _before_ pos */
+				list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+				INIT_LIST_HEAD(&req->rq_xmit2);
+				goto out;
+			}
+		} else {
+			list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+				if (pos->rq_task->tk_owner != task->tk_owner)
+					continue;
+				list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
+				INIT_LIST_HEAD(&req->rq_xmit);
+				goto out;
+			}
 		}
 		list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
 		INIT_LIST_HEAD(&req->rq_xmit2);
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index ed58761e6b23..e7c445cee16f 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -200,6 +200,9 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
 	if (!xprt_connected(rqst->rq_xprt))
 		goto drop_connection;
 
+	if (!xprt_request_get_cong(rqst->rq_xprt, rqst))
+		return -EBADSLT;
+
 	rc = rpcrdma_bc_marshal_reply(rqst);
 	if (rc < 0)
 		goto failed_marshal;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index fa684bf4d090..9ff322e53f37 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -721,6 +721,9 @@ xprt_rdma_send_request(struct rpc_rqst *rqst, struct rpc_task *task)
 	if (!xprt_connected(xprt))
 		goto drop_connection;
 
+	if (!xprt_request_get_cong(xprt, rqst))
+		return -EBADSLT;
+
 	rc = rpcrdma_marshal_req(r_xprt, rqst);
 	if (rc < 0)
 		goto failed_marshal;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b8143eded4af..8831e84a058a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -609,6 +609,10 @@ static int xs_udp_send_request(struct rpc_rqst *req, struct rpc_task *task)
 
 	if (!xprt_bound(xprt))
 		return -ENOTCONN;
+
+	if (!xprt_request_get_cong(xprt, req))
+		return -EBADSLT;
+
 	req->rq_xtime = ktime_get();
 	status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
 			      xdr, 0, true, &sent);
-- 
cgit v1.2.3


From 36bd7de949f41d586ef7794169af75462b67acbc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 3 Sep 2018 18:41:32 -0400
Subject: SUNRPC: Turn off throttling of RPC slots for TCP sockets

The theory was that we would need to grab the socket lock anyway, so we
might as well use it to gate the allocation of RPC slots for a TCP
socket.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  1 -
 net/sunrpc/xprt.c           | 14 --------------
 net/sunrpc/xprtsock.c       |  2 +-
 3 files changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 0d0cc127615e..14c9b4d49fb4 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -343,7 +343,6 @@ int			xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_free_slot(struct rpc_xprt *xprt,
 				       struct rpc_rqst *req);
-void			xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 bool			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_receive(struct rpc_task *task);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 533df198a0e9..849e102e3c5a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1429,20 +1429,6 @@ out_init_req:
 }
 EXPORT_SYMBOL_GPL(xprt_alloc_slot);
 
-void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
-{
-	/* Note: grabbing the xprt_lock_write() ensures that we throttle
-	 * new slot allocation if the transport is congested (i.e. when
-	 * reconnecting a stream transport or when out of socket write
-	 * buffer space).
-	 */
-	if (xprt_lock_write(xprt, task)) {
-		xprt_alloc_slot(xprt, task);
-		xprt_release_write(xprt, task);
-	}
-}
-EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
-
 void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
 {
 	spin_lock(&xprt->reserve_lock);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8831e84a058a..f54e8110f4c6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2809,7 +2809,7 @@ static const struct rpc_xprt_ops xs_udp_ops = {
 static const struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xprt_release_xprt,
-	.alloc_slot		= xprt_lock_and_alloc_slot,
+	.alloc_slot		= xprt_alloc_slot,
 	.free_slot		= xprt_free_slot,
 	.rpcbind		= rpcb_getport_async,
 	.set_port		= xs_set_port,
-- 
cgit v1.2.3


From c544577daddb618c7dd5fa7fb98d6a41782f020e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 3 Sep 2018 23:39:27 -0400
Subject: SUNRPC: Clean up transport write space handling

Treat socket write space handling in the same way we now treat transport
congestion: by denying the XPRT_LOCK until the transport signals that it
has free buffer space.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/svc_xprt.h            |  1 -
 include/linux/sunrpc/xprt.h                |  5 +-
 net/sunrpc/clnt.c                          | 28 ++++-------
 net/sunrpc/svc_xprt.c                      |  2 -
 net/sunrpc/xprt.c                          | 77 ++++++++++++++++++------------
 net/sunrpc/xprtrdma/rpc_rdma.c             |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |  7 +--
 net/sunrpc/xprtsock.c                      | 33 +++++--------
 8 files changed, 73 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index c3d72066d4b1..6b7a86c4d6e6 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -84,7 +84,6 @@ struct svc_xprt {
 	struct sockaddr_storage	xpt_remote;	/* remote peer's address */
 	size_t			xpt_remotelen;	/* length of address */
 	char			xpt_remotebuf[INET6_ADDRSTRLEN + 10];
-	struct rpc_wait_queue	xpt_bc_pending;	/* backchannel wait queue */
 	struct list_head	xpt_users;	/* callbacks on free */
 
 	struct net		*xpt_net;
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 14c9b4d49fb4..5600242ccbf9 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -387,8 +387,8 @@ int			xprt_load_transport(const char *);
 void			xprt_set_retrans_timeout_def(struct rpc_task *task);
 void			xprt_set_retrans_timeout_rtt(struct rpc_task *task);
 void			xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
-void			xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
-void			xprt_write_space(struct rpc_xprt *xprt);
+void			xprt_wait_for_buffer_space(struct rpc_xprt *xprt);
+bool			xprt_write_space(struct rpc_xprt *xprt);
 void			xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result);
 struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
 void			xprt_update_rtt(struct rpc_task *task);
@@ -416,6 +416,7 @@ void			xprt_unlock_connect(struct rpc_xprt *, void *);
 #define XPRT_CLOSING		(6)
 #define XPRT_CONGESTED		(9)
 #define XPRT_CWND_WAIT		(10)
+#define XPRT_WRITE_SPACE	(11)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f03911f84953..0c4b2e7d791f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1964,13 +1964,14 @@ call_transmit(struct rpc_task *task)
 {
 	dprint_status(task);
 
+	task->tk_status = 0;
+	if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+		if (!xprt_prepare_transmit(task))
+			return;
+		xprt_transmit(task);
+	}
 	task->tk_action = call_transmit_status;
-	if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
-		return;
-
-	if (!xprt_prepare_transmit(task))
-		return;
-	xprt_transmit(task);
+	xprt_end_transmit(task);
 }
 
 /*
@@ -1986,7 +1987,6 @@ call_transmit_status(struct rpc_task *task)
 	 * test first.
 	 */
 	if (task->tk_status == 0) {
-		xprt_end_transmit(task);
 		xprt_request_wait_receive(task);
 		return;
 	}
@@ -1994,15 +1994,8 @@ call_transmit_status(struct rpc_task *task)
 	switch (task->tk_status) {
 	default:
 		dprint_status(task);
-		xprt_end_transmit(task);
-		break;
-	case -EBADSLT:
-		xprt_end_transmit(task);
-		task->tk_action = call_transmit;
-		task->tk_status = 0;
 		break;
 	case -EBADMSG:
-		xprt_end_transmit(task);
 		task->tk_status = 0;
 		task->tk_action = call_encode;
 		break;
@@ -2015,6 +2008,7 @@ call_transmit_status(struct rpc_task *task)
 	case -ENOBUFS:
 		rpc_delay(task, HZ>>2);
 		/* fall through */
+	case -EBADSLT:
 	case -EAGAIN:
 		task->tk_action = call_transmit;
 		task->tk_status = 0;
@@ -2026,7 +2020,6 @@ call_transmit_status(struct rpc_task *task)
 	case -ENETUNREACH:
 	case -EPERM:
 		if (RPC_IS_SOFTCONN(task)) {
-			xprt_end_transmit(task);
 			if (!task->tk_msg.rpc_proc->p_proc)
 				trace_xprt_ping(task->tk_xprt,
 						task->tk_status);
@@ -2069,9 +2062,6 @@ call_bc_transmit(struct rpc_task *task)
 
 	xprt_transmit(task);
 
-	if (task->tk_status == -EAGAIN)
-		goto out_retry;
-
 	xprt_end_transmit(task);
 	dprint_status(task);
 	switch (task->tk_status) {
@@ -2087,6 +2077,8 @@ call_bc_transmit(struct rpc_task *task)
 	case -ENOTCONN:
 	case -EPIPE:
 		break;
+	case -EAGAIN:
+		goto out_retry;
 	case -ETIMEDOUT:
 		/*
 		 * Problem reaching the server.  Disconnect and let the
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5185efb9027b..87533fbb96cf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -171,7 +171,6 @@ void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
 	mutex_init(&xprt->xpt_mutex);
 	spin_lock_init(&xprt->xpt_lock);
 	set_bit(XPT_BUSY, &xprt->xpt_flags);
-	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
 	xprt->xpt_net = get_net(net);
 	strcpy(xprt->xpt_remotebuf, "uninitialized");
 }
@@ -895,7 +894,6 @@ int svc_send(struct svc_rqst *rqstp)
 	else
 		len = xprt->xpt_ops->xpo_sendto(rqstp);
 	mutex_unlock(&xprt->xpt_mutex);
-	rpc_wake_up(&xprt->xpt_bc_pending);
 	trace_svc_send(rqstp, len);
 	svc_xprt_release(rqstp);
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 849e102e3c5a..55dc5c7069b9 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -169,6 +169,17 @@ out:
 }
 EXPORT_SYMBOL_GPL(xprt_load_transport);
 
+static void xprt_clear_locked(struct rpc_xprt *xprt)
+{
+	xprt->snd_task = NULL;
+	if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
+		smp_mb__before_atomic();
+		clear_bit(XPRT_LOCKED, &xprt->state);
+		smp_mb__after_atomic();
+	} else
+		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+}
+
 /**
  * xprt_reserve_xprt - serialize write access to transports
  * @task: task that is requesting access to the transport
@@ -188,10 +199,14 @@ int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 			return 1;
 		goto out_sleep;
 	}
+	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		goto out_unlock;
 	xprt->snd_task = task;
 
 	return 1;
 
+out_unlock:
+	xprt_clear_locked(xprt);
 out_sleep:
 	dprintk("RPC: %5u failed to lock transport %p\n",
 			task->tk_pid, xprt);
@@ -208,17 +223,6 @@ out_sleep:
 }
 EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
 
-static void xprt_clear_locked(struct rpc_xprt *xprt)
-{
-	xprt->snd_task = NULL;
-	if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
-		smp_mb__before_atomic();
-		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_atomic();
-	} else
-		queue_work(xprtiod_workqueue, &xprt->task_cleanup);
-}
-
 static bool
 xprt_need_congestion_window_wait(struct rpc_xprt *xprt)
 {
@@ -267,10 +271,13 @@ int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 		xprt->snd_task = task;
 		return 1;
 	}
+	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		goto out_unlock;
 	if (!xprt_need_congestion_window_wait(xprt)) {
 		xprt->snd_task = task;
 		return 1;
 	}
+out_unlock:
 	xprt_clear_locked(xprt);
 out_sleep:
 	dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
@@ -309,10 +316,12 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
 {
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		return;
-
+	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		goto out_unlock;
 	if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
 				__xprt_lock_write_func, xprt))
 		return;
+out_unlock:
 	xprt_clear_locked(xprt);
 }
 
@@ -320,6 +329,8 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 {
 	if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
 		return;
+	if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		goto out_unlock;
 	if (xprt_need_congestion_window_wait(xprt))
 		goto out_unlock;
 	if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
@@ -510,39 +521,46 @@ EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks);
 
 /**
  * xprt_wait_for_buffer_space - wait for transport output buffer to clear
- * @task: task to be put to sleep
- * @action: function pointer to be executed after wait
+ * @xprt: transport
  *
  * Note that we only set the timer for the case of RPC_IS_SOFT(), since
  * we don't in general want to force a socket disconnection due to
  * an incomplete RPC call transmission.
  */
-void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action)
+void xprt_wait_for_buffer_space(struct rpc_xprt *xprt)
 {
-	struct rpc_rqst *req = task->tk_rqstp;
-	struct rpc_xprt *xprt = req->rq_xprt;
-
-	task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
-	rpc_sleep_on(&xprt->pending, task, action);
+	set_bit(XPRT_WRITE_SPACE, &xprt->state);
 }
 EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
 
+static bool
+xprt_clear_write_space_locked(struct rpc_xprt *xprt)
+{
+	if (test_and_clear_bit(XPRT_WRITE_SPACE, &xprt->state)) {
+		__xprt_lock_write_next(xprt);
+		dprintk("RPC:       write space: waking waiting task on "
+				"xprt %p\n", xprt);
+		return true;
+	}
+	return false;
+}
+
 /**
  * xprt_write_space - wake the task waiting for transport output buffer space
  * @xprt: transport with waiting tasks
  *
  * Can be called in a soft IRQ context, so xprt_write_space never sleeps.
  */
-void xprt_write_space(struct rpc_xprt *xprt)
+bool xprt_write_space(struct rpc_xprt *xprt)
 {
+	bool ret;
+
+	if (!test_bit(XPRT_WRITE_SPACE, &xprt->state))
+		return false;
 	spin_lock_bh(&xprt->transport_lock);
-	if (xprt->snd_task) {
-		dprintk("RPC:       write space: waking waiting task on "
-				"xprt %p\n", xprt);
-		rpc_wake_up_queued_task_on_wq(xprtiod_workqueue,
-				&xprt->pending, xprt->snd_task);
-	}
+	ret = xprt_clear_write_space_locked(xprt);
 	spin_unlock_bh(&xprt->transport_lock);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(xprt_write_space);
 
@@ -653,6 +671,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
 	dprintk("RPC:       disconnected transport %p\n", xprt);
 	spin_lock_bh(&xprt->transport_lock);
 	xprt_clear_connected(xprt);
+	xprt_clear_write_space_locked(xprt);
 	xprt_wake_pending_tasks(xprt, -EAGAIN);
 	spin_unlock_bh(&xprt->transport_lock);
 }
@@ -1326,9 +1345,7 @@ xprt_transmit(struct rpc_task *task)
 			if (!xprt_request_data_received(task) ||
 			    test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
 				continue;
-		} else if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
-			rpc_wake_up_queued_task(&xprt->pending, task);
-		else
+		} else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
 			task->tk_status = status;
 		break;
 	}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 0020dc401215..53fa95d60015 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -866,7 +866,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
 out_err:
 	switch (ret) {
 	case -EAGAIN:
-		xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+		xprt_wait_for_buffer_space(rqst->rq_xprt);
 		break;
 	case -ENOBUFS:
 		break;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index d1618c70edb4..35a8c3aab302 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -224,12 +224,7 @@ xprt_rdma_bc_send_request(struct rpc_rqst *rqst, struct rpc_task *task)
 	dprintk("svcrdma: sending bc call with xid: %08x\n",
 		be32_to_cpu(rqst->rq_xid));
 
-	if (!mutex_trylock(&sxprt->xpt_mutex)) {
-		rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
-		if (!mutex_trylock(&sxprt->xpt_mutex))
-			return -EAGAIN;
-		rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
-	}
+	mutex_lock(&sxprt->xpt_mutex);
 
 	ret = -ENOTCONN;
 	rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index f54e8110f4c6..ef8d0e81cbda 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -440,20 +440,12 @@ out:
 	return err;
 }
 
-static void xs_nospace_callback(struct rpc_task *task)
-{
-	struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
-
-	transport->inet->sk_write_pending--;
-}
-
 /**
- * xs_nospace - place task on wait queue if transmit was incomplete
+ * xs_nospace - handle transmit was incomplete
  * @req: pointer to RPC request
- * @task: task to put to sleep
  *
  */
-static int xs_nospace(struct rpc_rqst *req, struct rpc_task *task)
+static int xs_nospace(struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -461,7 +453,8 @@ static int xs_nospace(struct rpc_rqst *req, struct rpc_task *task)
 	int ret = -EAGAIN;
 
 	dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
-			task->tk_pid, req->rq_slen - transport->xmit.offset,
+			req->rq_task->tk_pid,
+			req->rq_slen - transport->xmit.offset,
 			req->rq_slen);
 
 	/* Protect against races with write_space */
@@ -471,7 +464,7 @@ static int xs_nospace(struct rpc_rqst *req, struct rpc_task *task)
 	if (xprt_connected(xprt)) {
 		/* wait for more buffer space */
 		sk->sk_write_pending++;
-		xprt_wait_for_buffer_space(task, xs_nospace_callback);
+		xprt_wait_for_buffer_space(xprt);
 	} else
 		ret = -ENOTCONN;
 
@@ -569,7 +562,7 @@ static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task)
 	case -ENOBUFS:
 		break;
 	case -EAGAIN:
-		status = xs_nospace(req, task);
+		status = xs_nospace(req);
 		break;
 	default:
 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
@@ -642,7 +635,7 @@ process_status:
 		/* Should we call xs_close() here? */
 		break;
 	case -EAGAIN:
-		status = xs_nospace(req, task);
+		status = xs_nospace(req);
 		break;
 	case -ENETUNREACH:
 	case -ENOBUFS:
@@ -765,7 +758,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req, struct rpc_task *task)
 		/* Should we call xs_close() here? */
 		break;
 	case -EAGAIN:
-		status = xs_nospace(req, task);
+		status = xs_nospace(req);
 		break;
 	case -ECONNRESET:
 	case -ECONNREFUSED:
@@ -1672,7 +1665,8 @@ static void xs_write_space(struct sock *sk)
 	if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
 		goto out;
 
-	xprt_write_space(xprt);
+	if (xprt_write_space(xprt))
+		sk->sk_write_pending--;
 out:
 	rcu_read_unlock();
 }
@@ -2725,12 +2719,7 @@ static int bc_send_request(struct rpc_rqst *req, struct rpc_task *task)
 	 * Grab the mutex to serialize data as the connection is shared
 	 * with the fore channel
 	 */
-	if (!mutex_trylock(&xprt->xpt_mutex)) {
-		rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
-		if (!mutex_trylock(&xprt->xpt_mutex))
-			return -EAGAIN;
-		rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
-	}
+	mutex_lock(&xprt->xpt_mutex);
 	if (test_bit(XPT_DEAD, &xprt->xpt_flags))
 		len = -ENOTCONN;
 	else
-- 
cgit v1.2.3


From adfa71446dd0943ba376eff3e05c7c89582f8038 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 3 Sep 2018 23:58:59 -0400
Subject: SUNRPC: Cleanup: remove the unused 'task' argument from the
 request_send()

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h                |  2 +-
 net/sunrpc/xprt.c                          |  2 +-
 net/sunrpc/xprtrdma/svc_rdma_backchannel.c |  2 +-
 net/sunrpc/xprtrdma/transport.c            |  4 ++--
 net/sunrpc/xprtsock.c                      | 11 ++++-------
 5 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 5600242ccbf9..823860cce0bc 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -141,7 +141,7 @@ struct rpc_xprt_ops {
 	void		(*connect)(struct rpc_xprt *xprt, struct rpc_task *task);
 	int		(*buf_alloc)(struct rpc_task *task);
 	void		(*buf_free)(struct rpc_task *task);
-	int		(*send_request)(struct rpc_rqst *req, struct rpc_task *task);
+	int		(*send_request)(struct rpc_rqst *req);
 	void		(*set_retrans_timeout)(struct rpc_task *task);
 	void		(*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*release_request)(struct rpc_task *task);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 55dc5c7069b9..c86a5df6c338 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1283,7 +1283,7 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
 	req->rq_ntrans++;
 
 	connect_cookie = xprt->connect_cookie;
-	status = xprt->ops->send_request(req, snd_task);
+	status = xprt->ops->send_request(req);
 	trace_xprt_transmit(xprt, req->rq_xid, status);
 	if (status != 0) {
 		req->rq_ntrans--;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 35a8c3aab302..992312504cfd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -215,7 +215,7 @@ drop_connection:
  * connection.
  */
 static int
-xprt_rdma_bc_send_request(struct rpc_rqst *rqst, struct rpc_task *task)
+xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
 {
 	struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
 	struct svcxprt_rdma *rdma;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9ff322e53f37..a5a6a4a353f2 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -693,7 +693,7 @@ xprt_rdma_free(struct rpc_task *task)
 
 /**
  * xprt_rdma_send_request - marshal and send an RPC request
- * @task: RPC task with an RPC message in rq_snd_buf
+ * @rqst: RPC message in rq_snd_buf
  *
  * Caller holds the transport's write lock.
  *
@@ -706,7 +706,7 @@ xprt_rdma_free(struct rpc_task *task)
  *		sent. Do not try to send this message again.
  */
 static int
-xprt_rdma_send_request(struct rpc_rqst *rqst, struct rpc_task *task)
+xprt_rdma_send_request(struct rpc_rqst *rqst)
 {
 	struct rpc_xprt *xprt = rqst->rq_xprt;
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ef8d0e81cbda..f16406228ead 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -507,7 +507,6 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
 /**
  * xs_local_send_request - write an RPC request to an AF_LOCAL socket
  * @req: pointer to RPC request
- * @task: RPC task that manages the state of an RPC request
  *
  * Return values:
  *        0:	The request has been sent
@@ -516,7 +515,7 @@ static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
  * ENOTCONN:	Caller needs to invoke connect logic then call again
  *    other:	Some other error occured, the request was not sent
  */
-static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task)
+static int xs_local_send_request(struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport =
@@ -579,7 +578,6 @@ static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task)
 /**
  * xs_udp_send_request - write an RPC request to a UDP socket
  * @req: pointer to RPC request
- * @task: address of RPC task that manages the state of an RPC request
  *
  * Return values:
  *        0:	The request has been sent
@@ -588,7 +586,7 @@ static int xs_local_send_request(struct rpc_rqst *req, struct rpc_task *task)
  * ENOTCONN:	Caller needs to invoke connect logic then call again
  *    other:	Some other error occurred, the request was not sent
  */
-static int xs_udp_send_request(struct rpc_rqst *req, struct rpc_task *task)
+static int xs_udp_send_request(struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -656,7 +654,6 @@ process_status:
 /**
  * xs_tcp_send_request - write an RPC request to a TCP socket
  * @req: pointer to RPC request
- * @task: address of RPC task that manages the state of an RPC request
  *
  * Return values:
  *        0:	The request has been sent
@@ -668,7 +665,7 @@ process_status:
  * XXX: In the case of soft timeouts, should we eventually give up
  *	if sendmsg is not able to make progress?
  */
-static int xs_tcp_send_request(struct rpc_rqst *req, struct rpc_task *task)
+static int xs_tcp_send_request(struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2704,7 +2701,7 @@ static int bc_sendto(struct rpc_rqst *req)
 /*
  * The send routine. Borrows from svc_send
  */
-static int bc_send_request(struct rpc_rqst *req, struct rpc_task *task)
+static int bc_send_request(struct rpc_rqst *req)
 {
 	struct svc_xprt	*xprt;
 	int len;
-- 
cgit v1.2.3


From 95f7691daa57bbd68caac2bdad79e0b08f4d46c1 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 7 Sep 2018 08:35:22 -0400
Subject: SUNRPC: Convert xprt receive queue to use an rbtree

If the server is slow, we can find ourselves with quite a lot of entries
on the receive queue. Converting the search from an O(n) to O(log(n))
can make a significant difference, particularly since we have to hold
a number of locks while searching.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprt.h |  4 +-
 net/sunrpc/xprt.c           | 93 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 84 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 823860cce0bc..9be399020dab 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -85,7 +85,7 @@ struct rpc_rqst {
 
 	union {
 		struct list_head	rq_list;	/* Slot allocation list */
-		struct list_head	rq_recv;	/* Receive queue */
+		struct rb_node		rq_recv;	/* Receive queue */
 	};
 
 	struct list_head	rq_xmit;	/* Send queue */
@@ -260,7 +260,7 @@ struct rpc_xprt {
 						 * backchannel rpc_rqst's */
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-	struct list_head	recv_queue;	/* Receive queue */
+	struct rb_root		recv_queue;	/* Receive queue */
 
 	struct {
 		unsigned long		bind_count,	/* total number of binds */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 11133ba716b9..480461ad0c86 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -753,7 +753,7 @@ static void
 xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
 	__must_hold(&xprt->transport_lock)
 {
-	if (list_empty(&xprt->recv_queue) && xprt_has_timer(xprt))
+	if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt))
 		mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
 }
 
@@ -763,7 +763,7 @@ xprt_init_autodisconnect(struct timer_list *t)
 	struct rpc_xprt *xprt = from_timer(xprt, t, timer);
 
 	spin_lock(&xprt->transport_lock);
-	if (!list_empty(&xprt->recv_queue))
+	if (!RB_EMPTY_ROOT(&xprt->recv_queue))
 		goto out_abort;
 	/* Reset xprt->last_used to avoid connect/autodisconnect cycling */
 	xprt->last_used = jiffies;
@@ -880,6 +880,75 @@ static void xprt_connect_status(struct rpc_task *task)
 	}
 }
 
+enum xprt_xid_rb_cmp {
+	XID_RB_EQUAL,
+	XID_RB_LEFT,
+	XID_RB_RIGHT,
+};
+static enum xprt_xid_rb_cmp
+xprt_xid_cmp(__be32 xid1, __be32 xid2)
+{
+	if (xid1 == xid2)
+		return XID_RB_EQUAL;
+	if ((__force u32)xid1 < (__force u32)xid2)
+		return XID_RB_LEFT;
+	return XID_RB_RIGHT;
+}
+
+static struct rpc_rqst *
+xprt_request_rb_find(struct rpc_xprt *xprt, __be32 xid)
+{
+	struct rb_node *n = xprt->recv_queue.rb_node;
+	struct rpc_rqst *req;
+
+	while (n != NULL) {
+		req = rb_entry(n, struct rpc_rqst, rq_recv);
+		switch (xprt_xid_cmp(xid, req->rq_xid)) {
+		case XID_RB_LEFT:
+			n = n->rb_left;
+			break;
+		case XID_RB_RIGHT:
+			n = n->rb_right;
+			break;
+		case XID_RB_EQUAL:
+			return req;
+		}
+	}
+	return NULL;
+}
+
+static void
+xprt_request_rb_insert(struct rpc_xprt *xprt, struct rpc_rqst *new)
+{
+	struct rb_node **p = &xprt->recv_queue.rb_node;
+	struct rb_node *n = NULL;
+	struct rpc_rqst *req;
+
+	while (*p != NULL) {
+		n = *p;
+		req = rb_entry(n, struct rpc_rqst, rq_recv);
+		switch(xprt_xid_cmp(new->rq_xid, req->rq_xid)) {
+		case XID_RB_LEFT:
+			p = &n->rb_left;
+			break;
+		case XID_RB_RIGHT:
+			p = &n->rb_right;
+			break;
+		case XID_RB_EQUAL:
+			WARN_ON_ONCE(new != req);
+			return;
+		}
+	}
+	rb_link_node(&new->rq_recv, n, p);
+	rb_insert_color(&new->rq_recv, &xprt->recv_queue);
+}
+
+static void
+xprt_request_rb_remove(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+	rb_erase(&req->rq_recv, &xprt->recv_queue);
+}
+
 /**
  * xprt_lookup_rqst - find an RPC request corresponding to an XID
  * @xprt: transport on which the original request was transmitted
@@ -891,12 +960,12 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 {
 	struct rpc_rqst *entry;
 
-	list_for_each_entry(entry, &xprt->recv_queue, rq_recv)
-		if (entry->rq_xid == xid) {
-			trace_xprt_lookup_rqst(xprt, xid, 0);
-			entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
-			return entry;
-		}
+	entry = xprt_request_rb_find(xprt, xid);
+	if (entry != NULL) {
+		trace_xprt_lookup_rqst(xprt, xid, 0);
+		entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
+		return entry;
+	}
 
 	dprintk("RPC:       xprt_lookup_rqst did not find xid %08x\n",
 			ntohl(xid));
@@ -981,7 +1050,7 @@ xprt_request_enqueue_receive(struct rpc_task *task)
 			sizeof(req->rq_private_buf));
 
 	/* Add request to the receive list */
-	list_add_tail(&req->rq_recv, &xprt->recv_queue);
+	xprt_request_rb_insert(xprt, req);
 	set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
 	spin_unlock(&xprt->queue_lock);
 
@@ -999,8 +1068,10 @@ xprt_request_enqueue_receive(struct rpc_task *task)
 static void
 xprt_request_dequeue_receive_locked(struct rpc_task *task)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
+
 	if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
-		list_del(&task->tk_rqstp->rq_recv);
+		xprt_request_rb_remove(req->rq_xprt, req);
 }
 
 /**
@@ -1711,7 +1782,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
 	spin_lock_init(&xprt->queue_lock);
 
 	INIT_LIST_HEAD(&xprt->free);
-	INIT_LIST_HEAD(&xprt->recv_queue);
+	xprt->recv_queue = RB_ROOT;
 	INIT_LIST_HEAD(&xprt->xmit_queue);
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
 	spin_lock_init(&xprt->bc_pa_lock);
-- 
cgit v1.2.3


From f42f7c283078ce3c1e8368b140e270755b1ae313 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 8 Sep 2018 22:09:48 -0400
Subject: SUNRPC: Fix priority queue fairness

Fix up the priority queue to not batch by owner, but by queue, so that
we allow '1 << priority' elements to be dequeued before switching to
the next priority queue.
The owner field is still used to wake up requests in round robin order
by owner to avoid single processes hogging the RPC layer by loading the
queues.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |   2 -
 net/sunrpc/sched.c           | 109 +++++++++++++++++++++----------------------
 2 files changed, 54 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8840a420cf4c..7b540c066594 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -189,7 +189,6 @@ struct rpc_timer {
 struct rpc_wait_queue {
 	spinlock_t		lock;
 	struct list_head	tasks[RPC_NR_PRIORITY];	/* task queue for each priority level */
-	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
 	unsigned char		nr;			/* # tasks remaining for cookie */
@@ -205,7 +204,6 @@ struct rpc_wait_queue {
  * from a single cookie.  The aim is to improve
  * performance of NFS operations such as read/write.
  */
-#define RPC_BATCH_COUNT			16
 #define RPC_IS_PRIORITY(q)		((q)->maxpriority > 0)
 
 /*
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9a8ec012b449..57ca5bead1cb 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -99,64 +99,78 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
 	list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
 }
 
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
-	struct list_head *q = &queue->tasks[queue->priority];
-	struct rpc_task *task;
-
-	if (!list_empty(q)) {
-		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
-		if (task->tk_owner == queue->owner)
-			list_move_tail(&task->u.tk_wait.list, q);
-	}
-}
-
 static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
 {
 	if (queue->priority != priority) {
-		/* Fairness: rotate the list when changing priority */
-		rpc_rotate_queue_owner(queue);
 		queue->priority = priority;
+		queue->nr = 1U << priority;
 	}
 }
 
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
-	queue->owner = pid;
-	queue->nr = RPC_BATCH_COUNT;
-}
-
 static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
 {
 	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_owner(queue, 0);
 }
 
 /*
- * Add new request to a priority queue.
+ * Add a request to a queue list
  */
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
-		struct rpc_task *task,
-		unsigned char queue_priority)
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
 {
-	struct list_head *q;
 	struct rpc_task *t;
 
-	INIT_LIST_HEAD(&task->u.tk_wait.links);
-	if (unlikely(queue_priority > queue->maxpriority))
-		queue_priority = queue->maxpriority;
-	if (queue_priority > queue->priority)
-		rpc_set_waitqueue_priority(queue, queue_priority);
-	q = &queue->tasks[queue_priority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
 		if (t->tk_owner == task->tk_owner) {
-			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+			list_add_tail(&task->u.tk_wait.links,
+					&t->u.tk_wait.links);
+			/* Cache the queue head in task->u.tk_wait.list */
+			task->u.tk_wait.list.next = q;
+			task->u.tk_wait.list.prev = NULL;
 			return;
 		}
 	}
+	INIT_LIST_HEAD(&task->u.tk_wait.links);
 	list_add_tail(&task->u.tk_wait.list, q);
 }
 
+/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+	struct list_head *q;
+	struct rpc_task *t;
+
+	if (task->u.tk_wait.list.prev == NULL) {
+		list_del(&task->u.tk_wait.links);
+		return;
+	}
+	if (!list_empty(&task->u.tk_wait.links)) {
+		t = list_first_entry(&task->u.tk_wait.links,
+				struct rpc_task,
+				u.tk_wait.links);
+		/* Assume __rpc_list_enqueue_task() cached the queue head */
+		q = t->u.tk_wait.list.next;
+		list_add_tail(&t->u.tk_wait.list, q);
+		list_del(&task->u.tk_wait.links);
+	}
+	list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+		struct rpc_task *task,
+		unsigned char queue_priority)
+{
+	if (unlikely(queue_priority > queue->maxpriority))
+		queue_priority = queue->maxpriority;
+	__rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
+}
+
 /*
  * Add new request to wait queue.
  *
@@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
  */
 static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
 {
-	struct rpc_task *t;
-
-	if (!list_empty(&task->u.tk_wait.links)) {
-		t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
-		list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
-		list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
-	}
+	__rpc_list_dequeue_task(task);
 }
 
 /*
@@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas
 	__rpc_disable_timer(queue, task);
 	if (RPC_IS_PRIORITY(queue))
 		__rpc_remove_wait_queue_priority(task);
-	list_del(&task->u.tk_wait.list);
+	else
+		list_del(&task->u.tk_wait.list);
 	queue->qlen--;
 	dprintk("RPC: %5u removed from queue %p \"%s\"\n",
 			task->tk_pid, queue, rpc_qname(queue));
@@ -545,17 +554,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 	 * Service a batch of tasks from a single owner.
 	 */
 	q = &queue->tasks[queue->priority];
-	if (!list_empty(q)) {
-		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
-		if (queue->owner == task->tk_owner) {
-			if (--queue->nr)
-				goto out;
-			list_move_tail(&task->u.tk_wait.list, q);
-		}
-		/*
-		 * Check if we need to switch queues.
-		 */
-		goto new_owner;
+	if (!list_empty(q) && --queue->nr) {
+		task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+		goto out;
 	}
 
 	/*
@@ -567,7 +568,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 		else
 			q = q - 1;
 		if (!list_empty(q)) {
-			task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+			task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
 			goto new_queue;
 		}
 	} while (q != &queue->tasks[queue->priority]);
@@ -577,8 +578,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
 
 new_queue:
 	rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
-	rpc_set_waitqueue_owner(queue, task->tk_owner);
 out:
 	return task;
 }
-- 
cgit v1.2.3


From 431f6eb3570f286036bc8718a908a283f5d99473 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sun, 16 Sep 2018 00:08:20 -0400
Subject: SUNRPC: Add a label for RPC calls that require allocation on receive

If the RPC call relies on the receive call allocating pages as buffers,
then let's label it so that we
a) Don't leak memory by allocating pages for requests that do not expect
   this behaviour
b) Can optimise for the common case where calls do not require allocation.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs3xdr.c                  | 4 +++-
 include/linux/sunrpc/xdr.h        | 1 +
 net/sunrpc/auth_gss/gss_rpc_xdr.c | 1 +
 net/sunrpc/socklib.c              | 2 +-
 4 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 64e4fa33d89f..d8c4c10b15f7 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -1364,10 +1364,12 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 
 	encode_nfs_fh3(xdr, args->fh);
 	encode_uint32(xdr, args->mask);
-	if (args->mask & (NFS_ACL | NFS_DFACL))
+	if (args->mask & (NFS_ACL | NFS_DFACL)) {
 		prepare_reply_buffer(req, args->pages, 0,
 					NFSACL_MAXPAGES << PAGE_SHIFT,
 					ACL3_getaclres_sz);
+		req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+	}
 }
 
 static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 2bd68177a442..431829233392 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -58,6 +58,7 @@ struct xdr_buf {
 			flags;		/* Flags for data disposition */
 #define XDRBUF_READ		0x01		/* target of file read */
 #define XDRBUF_WRITE		0x02		/* source of file write */
+#define XDRBUF_SPARSE_PAGES	0x04		/* Page array is sparse */
 
 	unsigned int	buflen,		/* Total length of storage buffer */
 			len;		/* Length of XDR encoded message */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 444380f968f1..006062ad5f58 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -784,6 +784,7 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
 	xdr_inline_pages(&req->rq_rcv_buf,
 		PAGE_SIZE/2 /* pretty arbitrary */,
 		arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
+	req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
 done:
 	if (err)
 		dprintk("RPC:       gssx_enc_accept_sec_context: %d\n", err);
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index f217c348b341..08f00a98151f 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -104,7 +104,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
 
 		/* ACL likes to be lazy in allocating pages - ACLs
 		 * are small by default but can get huge. */
-		if (unlikely(*ppage == NULL)) {
+		if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
 			*ppage = alloc_page(GFP_ATOMIC);
 			if (unlikely(*ppage == NULL)) {
 				if (copied == 0)
-- 
cgit v1.2.3


From 9d96acbc7f376dc1ffcedca0c349dd3389187a38 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 13 Sep 2018 12:22:04 -0400
Subject: SUNRPC: Add a bvec array to struct xdr_buf for use with iovec_iter()

Add a bvec array to struct xdr_buf, and have the client allocate it
when we need to receive data into pages.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xdr.h  |  7 +++++++
 include/linux/sunrpc/xprt.h |  2 ++
 net/sunrpc/clnt.c           |  4 +++-
 net/sunrpc/xdr.c            | 34 ++++++++++++++++++++++++++++++++++
 net/sunrpc/xprt.c           | 17 +++++++++++++++++
 5 files changed, 63 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 431829233392..745587132a87 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -18,6 +18,7 @@
 #include <asm/unaligned.h>
 #include <linux/scatterlist.h>
 
+struct bio_vec;
 struct rpc_rqst;
 
 /*
@@ -52,6 +53,7 @@ struct xdr_buf {
 	struct kvec	head[1],	/* RPC header + non-page data */
 			tail[1];	/* Appended after page data */
 
+	struct bio_vec	*bvec;
 	struct page **	pages;		/* Array of pages */
 	unsigned int	page_base,	/* Start of page data */
 			page_len,	/* Length of page data */
@@ -70,6 +72,8 @@ xdr_buf_init(struct xdr_buf *buf, void *start, size_t len)
 	buf->head[0].iov_base = start;
 	buf->head[0].iov_len = len;
 	buf->tail[0].iov_len = 0;
+	buf->bvec = NULL;
+	buf->pages = NULL;
 	buf->page_len = 0;
 	buf->flags = 0;
 	buf->len = 0;
@@ -116,6 +120,9 @@ __be32 *xdr_decode_netobj(__be32 *p, struct xdr_netobj *);
 void	xdr_inline_pages(struct xdr_buf *, unsigned int,
 			 struct page **, unsigned int, unsigned int);
 void	xdr_terminate_string(struct xdr_buf *, const u32);
+size_t	xdr_buf_pagecount(struct xdr_buf *buf);
+int	xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
+void	xdr_free_bvec(struct xdr_buf *buf);
 
 static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
 {
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 9be399020dab..a4ab4f8d9140 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -141,6 +141,7 @@ struct rpc_xprt_ops {
 	void		(*connect)(struct rpc_xprt *xprt, struct rpc_task *task);
 	int		(*buf_alloc)(struct rpc_task *task);
 	void		(*buf_free)(struct rpc_task *task);
+	void		(*prepare_request)(struct rpc_rqst *req);
 	int		(*send_request)(struct rpc_rqst *req);
 	void		(*set_retrans_timeout)(struct rpc_task *task);
 	void		(*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
@@ -343,6 +344,7 @@ int			xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
 void			xprt_free_slot(struct rpc_xprt *xprt,
 				       struct rpc_rqst *req);
+void			xprt_request_prepare(struct rpc_rqst *req);
 bool			xprt_prepare_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_transmit(struct rpc_task *task);
 void			xprt_request_enqueue_receive(struct rpc_task *task);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0c4b2e7d791f..ae3b8145da35 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1753,6 +1753,8 @@ rpc_xdr_encode(struct rpc_task *task)
 
 	task->tk_status = rpcauth_wrap_req(task, encode, req, p,
 			task->tk_msg.rpc_argp);
+	if (task->tk_status == 0)
+		xprt_request_prepare(req);
 }
 
 /*
@@ -1768,7 +1770,7 @@ call_encode(struct rpc_task *task)
 	/* Did the encode result in an error condition? */
 	if (task->tk_status != 0) {
 		/* Was the error nonfatal? */
-		if (task->tk_status == -EAGAIN)
+		if (task->tk_status == -EAGAIN || task->tk_status == -ENOMEM)
 			rpc_delay(task, HZ >> 4);
 		else
 			rpc_exit(task, task->tk_status);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 30afbd236656..2bbb8d38d2bf 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -15,6 +15,7 @@
 #include <linux/errno.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
+#include <linux/bvec.h>
 
 /*
  * XDR functions for basic NFS types
@@ -128,6 +129,39 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
 }
 EXPORT_SYMBOL_GPL(xdr_terminate_string);
 
+size_t
+xdr_buf_pagecount(struct xdr_buf *buf)
+{
+	if (!buf->page_len)
+		return 0;
+	return (buf->page_base + buf->page_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
+int
+xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
+{
+	size_t i, n = xdr_buf_pagecount(buf);
+
+	if (n != 0 && buf->bvec == NULL) {
+		buf->bvec = kmalloc_array(n, sizeof(buf->bvec[0]), gfp);
+		if (!buf->bvec)
+			return -ENOMEM;
+		for (i = 0; i < n; i++) {
+			buf->bvec[i].bv_page = buf->pages[i];
+			buf->bvec[i].bv_len = PAGE_SIZE;
+			buf->bvec[i].bv_offset = 0;
+		}
+	}
+	return 0;
+}
+
+void
+xdr_free_bvec(struct xdr_buf *buf)
+{
+	kfree(buf->bvec);
+	buf->bvec = NULL;
+}
+
 void
 xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
 		 struct page **pages, unsigned int base, unsigned int len)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 7333874c6595..7ee9f1e996db 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1263,6 +1263,22 @@ xprt_request_dequeue_transmit(struct rpc_task *task)
 	spin_unlock(&xprt->queue_lock);
 }
 
+/**
+ * xprt_request_prepare - prepare an encoded request for transport
+ * @req: pointer to rpc_rqst
+ *
+ * Calls into the transport layer to do whatever is needed to prepare
+ * the request for transmission or receive.
+ */
+void
+xprt_request_prepare(struct rpc_rqst *req)
+{
+	struct rpc_xprt *xprt = req->rq_xprt;
+
+	if (xprt->ops->prepare_request)
+		xprt->ops->prepare_request(req);
+}
+
 /**
  * xprt_request_need_retransmit - Test if a task needs retransmission
  * @task: pointer to rpc_task
@@ -1727,6 +1743,7 @@ void xprt_release(struct rpc_task *task)
 	if (req->rq_buffer)
 		xprt->ops->buf_free(task);
 	xprt_inject_disconnect(xprt);
+	xdr_free_bvec(&req->rq_rcv_buf);
 	if (req->rq_cred != NULL)
 		put_rpccred(req->rq_cred);
 	task->tk_rqstp = NULL;
-- 
cgit v1.2.3


From 277e4ab7d530bf287e02b65cfcd3ea8f489784f6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 14 Sep 2018 09:49:06 -0400
Subject: SUNRPC: Simplify TCP receive code by switching to using iterators

Most of this code should also be reusable with other socket types.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xprtsock.h |  19 +-
 include/trace/events/sunrpc.h   |  15 +-
 net/sunrpc/xprtsock.c           | 697 +++++++++++++++++++---------------------
 3 files changed, 338 insertions(+), 393 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 005cfb6e7238..458bfe0137f5 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -31,15 +31,16 @@ struct sock_xprt {
 	 * State of TCP reply receive
 	 */
 	struct {
-		__be32		fraghdr,
+		struct {
+			__be32	fraghdr,
 				xid,
 				calldir;
+		} __attribute__((packed));
 
 		u32		offset,
 				len;
 
-		unsigned long	copied,
-				flags;
+		unsigned long	copied;
 	} recv;
 
 	/*
@@ -76,21 +77,9 @@ struct sock_xprt {
 	void			(*old_error_report)(struct sock *);
 };
 
-/*
- * TCP receive state flags
- */
-#define TCP_RCV_LAST_FRAG	(1UL << 0)
-#define TCP_RCV_COPY_FRAGHDR	(1UL << 1)
-#define TCP_RCV_COPY_XID	(1UL << 2)
-#define TCP_RCV_COPY_DATA	(1UL << 3)
-#define TCP_RCV_READ_CALLDIR	(1UL << 4)
-#define TCP_RCV_COPY_CALLDIR	(1UL << 5)
-
 /*
  * TCP RPC flags
  */
-#define TCP_RPC_REPLY		(1UL << 6)
-
 #define XPRT_SOCK_CONNECTING	1U
 #define XPRT_SOCK_DATA_READY	(2)
 #define XPRT_SOCK_UPD_TIMEOUT	(3)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 0aa347194e0f..19e08d12696c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -497,16 +497,6 @@ TRACE_EVENT(xs_tcp_data_ready,
 			__get_str(port), __entry->err, __entry->total)
 );
 
-#define rpc_show_sock_xprt_flags(flags) \
-	__print_flags(flags, "|", \
-		{ TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \
-		{ TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \
-		{ TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \
-		{ TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \
-		{ TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \
-		{ TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \
-		{ TCP_RPC_REPLY, "TCP_RPC_REPLY" })
-
 TRACE_EVENT(xs_tcp_data_recv,
 	TP_PROTO(struct sock_xprt *xs),
 
@@ -516,7 +506,6 @@ TRACE_EVENT(xs_tcp_data_recv,
 		__string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR])
 		__string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT])
 		__field(u32, xid)
-		__field(unsigned long, flags)
 		__field(unsigned long, copied)
 		__field(unsigned int, reclen)
 		__field(unsigned long, offset)
@@ -526,15 +515,13 @@ TRACE_EVENT(xs_tcp_data_recv,
 		__assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]);
 		__assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]);
 		__entry->xid = be32_to_cpu(xs->recv.xid);
-		__entry->flags = xs->recv.flags;
 		__entry->copied = xs->recv.copied;
 		__entry->reclen = xs->recv.len;
 		__entry->offset = xs->recv.offset;
 	),
 
-	TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu",
+	TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u offset=%lu",
 			__get_str(addr), __get_str(port), __entry->xid,
-			rpc_show_sock_xprt_flags(__entry->flags),
 			__entry->copied, __entry->reclen, __entry->offset)
 );
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index f16406228ead..06aa75008708 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,13 +47,13 @@
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <net/tcp.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
 
 #include <trace/events/sunrpc.h>
 
 #include "sunrpc.h"
 
-#define RPC_TCP_READ_CHUNK_SZ	(3*512*1024)
-
 static void xs_close(struct rpc_xprt *xprt);
 static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 		struct socket *sock);
@@ -325,6 +325,323 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt)
 		}
 }
 
+static size_t
+xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
+{
+	size_t i,n;
+
+	if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+		return want;
+	if (want > buf->page_len)
+		want = buf->page_len;
+	n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	for (i = 0; i < n; i++) {
+		if (buf->pages[i])
+			continue;
+		buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
+		if (!buf->pages[i]) {
+			buf->page_len = (i * PAGE_SIZE) - buf->page_base;
+			return buf->page_len;
+		}
+	}
+	return want;
+}
+
+static ssize_t
+xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
+{
+	ssize_t ret;
+	if (seek != 0)
+		iov_iter_advance(&msg->msg_iter, seek);
+	ret = sock_recvmsg(sock, msg, flags);
+	return ret > 0 ? ret + seek : ret;
+}
+
+static ssize_t
+xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
+		struct kvec *kvec, size_t count, size_t seek)
+{
+	iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+	return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
+		struct bio_vec *bvec, unsigned long nr, size_t count,
+		size_t seek)
+{
+	iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+	return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
+		size_t count)
+{
+	struct kvec kvec = { 0 };
+	return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+}
+
+static ssize_t
+xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
+		struct xdr_buf *buf, size_t count, size_t seek, size_t *read)
+{
+	size_t want, seek_init = seek, offset = 0;
+	ssize_t ret;
+
+	if (seek < buf->head[0].iov_len) {
+		want = min_t(size_t, count, buf->head[0].iov_len);
+		ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
+		if (ret <= 0)
+			goto sock_err;
+		offset += ret;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto eagain;
+		seek = 0;
+	} else {
+		seek -= buf->head[0].iov_len;
+		offset += buf->head[0].iov_len;
+	}
+	if (seek < buf->page_len) {
+		want = xs_alloc_sparse_pages(buf,
+				min_t(size_t, count - offset, buf->page_len),
+				GFP_NOWAIT);
+		ret = xs_read_bvec(sock, msg, flags, buf->bvec,
+				xdr_buf_pagecount(buf),
+				want + buf->page_base,
+				seek + buf->page_base);
+		if (ret <= 0)
+			goto sock_err;
+		offset += ret - buf->page_base;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto eagain;
+		seek = 0;
+	} else {
+		seek -= buf->page_len;
+		offset += buf->page_len;
+	}
+	if (seek < buf->tail[0].iov_len) {
+		want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+		ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
+		if (ret <= 0)
+			goto sock_err;
+		offset += ret;
+		if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+			goto out;
+		if (ret != want)
+			goto eagain;
+	} else
+		offset += buf->tail[0].iov_len;
+	ret = -EMSGSIZE;
+	msg->msg_flags |= MSG_TRUNC;
+out:
+	*read = offset - seek_init;
+	return ret;
+eagain:
+	ret = -EAGAIN;
+	goto out;
+sock_err:
+	offset += seek;
+	goto out;
+}
+
+static void
+xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf)
+{
+	if (!transport->recv.copied) {
+		if (buf->head[0].iov_len >= transport->recv.offset)
+			memcpy(buf->head[0].iov_base,
+					&transport->recv.xid,
+					transport->recv.offset);
+		transport->recv.copied = transport->recv.offset;
+	}
+}
+
+static bool
+xs_read_stream_request_done(struct sock_xprt *transport)
+{
+	return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
+}
+
+static ssize_t
+xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
+		int flags, struct rpc_rqst *req)
+{
+	struct xdr_buf *buf = &req->rq_private_buf;
+	size_t want, read;
+	ssize_t ret;
+
+	xs_read_header(transport, buf);
+
+	want = transport->recv.len - transport->recv.offset;
+	ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+			transport->recv.copied + want, transport->recv.copied,
+			&read);
+	transport->recv.offset += read;
+	transport->recv.copied += read;
+	if (transport->recv.offset == transport->recv.len) {
+		if (xs_read_stream_request_done(transport))
+			msg->msg_flags |= MSG_EOR;
+		return transport->recv.copied;
+	}
+
+	switch (ret) {
+	case -EMSGSIZE:
+		return transport->recv.copied;
+	case 0:
+		return -ESHUTDOWN;
+	default:
+		if (ret < 0)
+			return ret;
+	}
+	return -EAGAIN;
+}
+
+static size_t
+xs_read_stream_headersize(bool isfrag)
+{
+	if (isfrag)
+		return sizeof(__be32);
+	return 3 * sizeof(__be32);
+}
+
+static ssize_t
+xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg,
+		int flags, size_t want, size_t seek)
+{
+	struct kvec kvec = {
+		.iov_base = &transport->recv.fraghdr,
+		.iov_len = want,
+	};
+	return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek);
+}
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct rpc_rqst *req;
+	ssize_t ret;
+
+	/* Look up and lock the request corresponding to the given XID */
+	req = xprt_lookup_bc_request(xprt, transport->recv.xid);
+	if (!req) {
+		printk(KERN_WARNING "Callback slot table overflowed\n");
+		return -ESHUTDOWN;
+	}
+
+	ret = xs_read_stream_request(transport, msg, flags, req);
+	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+		xprt_complete_bc_request(req, ret);
+
+	return ret;
+}
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	return -ESHUTDOWN;
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+static ssize_t
+xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+	struct rpc_xprt *xprt = &transport->xprt;
+	struct rpc_rqst *req;
+	ssize_t ret = 0;
+
+	/* Look up and lock the request corresponding to the given XID */
+	spin_lock(&xprt->queue_lock);
+	req = xprt_lookup_rqst(xprt, transport->recv.xid);
+	if (!req) {
+		msg->msg_flags |= MSG_TRUNC;
+		goto out;
+	}
+	xprt_pin_rqst(req);
+	spin_unlock(&xprt->queue_lock);
+
+	ret = xs_read_stream_request(transport, msg, flags, req);
+
+	spin_lock(&xprt->queue_lock);
+	if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+		xprt_complete_rqst(req->rq_task, ret);
+	xprt_unpin_rqst(req);
+out:
+	spin_unlock(&xprt->queue_lock);
+	return ret;
+}
+
+static ssize_t
+xs_read_stream(struct sock_xprt *transport, int flags)
+{
+	struct msghdr msg = { 0 };
+	size_t want, read = 0;
+	ssize_t ret = 0;
+
+	if (transport->recv.len == 0) {
+		want = xs_read_stream_headersize(transport->recv.copied != 0);
+		ret = xs_read_stream_header(transport, &msg, flags, want,
+				transport->recv.offset);
+		if (ret <= 0)
+			goto out_err;
+		transport->recv.offset = ret;
+		if (ret != want) {
+			ret = -EAGAIN;
+			goto out_err;
+		}
+		transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
+			RPC_FRAGMENT_SIZE_MASK;
+		transport->recv.offset -= sizeof(transport->recv.fraghdr);
+		read = ret;
+	}
+
+	switch (be32_to_cpu(transport->recv.calldir)) {
+	case RPC_CALL:
+		ret = xs_read_stream_call(transport, &msg, flags);
+		break;
+	case RPC_REPLY:
+		ret = xs_read_stream_reply(transport, &msg, flags);
+	}
+	if (msg.msg_flags & MSG_TRUNC) {
+		transport->recv.calldir = cpu_to_be32(-1);
+		transport->recv.copied = -1;
+	}
+	if (ret < 0)
+		goto out_err;
+	read += ret;
+	if (transport->recv.offset < transport->recv.len) {
+		ret = xs_read_discard(transport->sock, &msg, flags,
+				transport->recv.len - transport->recv.offset);
+		if (ret <= 0)
+			goto out_err;
+		transport->recv.offset += ret;
+		read += ret;
+		if (transport->recv.offset != transport->recv.len)
+			return -EAGAIN;
+	}
+	if (xs_read_stream_request_done(transport)) {
+		trace_xs_tcp_data_recv(transport);
+		transport->recv.copied = 0;
+	}
+	transport->recv.offset = 0;
+	transport->recv.len = 0;
+	return read;
+out_err:
+	switch (ret) {
+	case 0:
+	case -ESHUTDOWN:
+		xprt_force_disconnect(&transport->xprt);
+		return -ESHUTDOWN;
+	}
+	return ret;
+}
+
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
 
 static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
@@ -484,6 +801,12 @@ static int xs_nospace(struct rpc_rqst *req)
 	return ret;
 }
 
+static void
+xs_stream_prepare_request(struct rpc_rqst *req)
+{
+	req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO);
+}
+
 /*
  * Determine if the previous message in the stream was aborted before it
  * could complete transmission.
@@ -1157,263 +1480,7 @@ static void xs_tcp_force_close(struct rpc_xprt *xprt)
 	xprt_force_disconnect(xprt);
 }
 
-static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-	size_t len, used;
-	char *p;
-
-	p = ((char *) &transport->recv.fraghdr) + transport->recv.offset;
-	len = sizeof(transport->recv.fraghdr) - transport->recv.offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->recv.offset += used;
-	if (used != len)
-		return;
-
-	transport->recv.len = ntohl(transport->recv.fraghdr);
-	if (transport->recv.len & RPC_LAST_STREAM_FRAGMENT)
-		transport->recv.flags |= TCP_RCV_LAST_FRAG;
-	else
-		transport->recv.flags &= ~TCP_RCV_LAST_FRAG;
-	transport->recv.len &= RPC_FRAGMENT_SIZE_MASK;
-
-	transport->recv.flags &= ~TCP_RCV_COPY_FRAGHDR;
-	transport->recv.offset = 0;
-
-	/* Sanity check of the record length */
-	if (unlikely(transport->recv.len < 8)) {
-		dprintk("RPC:       invalid TCP record fragment length\n");
-		xs_tcp_force_close(xprt);
-		return;
-	}
-	dprintk("RPC:       reading TCP record fragment of length %d\n",
-			transport->recv.len);
-}
-
-static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
-{
-	if (transport->recv.offset == transport->recv.len) {
-		transport->recv.flags |= TCP_RCV_COPY_FRAGHDR;
-		transport->recv.offset = 0;
-		if (transport->recv.flags & TCP_RCV_LAST_FRAG) {
-			transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-			transport->recv.flags |= TCP_RCV_COPY_XID;
-			transport->recv.copied = 0;
-		}
-	}
-}
-
-static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
-	size_t len, used;
-	char *p;
-
-	len = sizeof(transport->recv.xid) - transport->recv.offset;
-	dprintk("RPC:       reading XID (%zu bytes)\n", len);
-	p = ((char *) &transport->recv.xid) + transport->recv.offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->recv.offset += used;
-	if (used != len)
-		return;
-	transport->recv.flags &= ~TCP_RCV_COPY_XID;
-	transport->recv.flags |= TCP_RCV_READ_CALLDIR;
-	transport->recv.copied = 4;
-	dprintk("RPC:       reading %s XID %08x\n",
-			(transport->recv.flags & TCP_RPC_REPLY) ? "reply for"
-							      : "request with",
-			ntohl(transport->recv.xid));
-	xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
-				       struct xdr_skb_reader *desc)
-{
-	size_t len, used;
-	u32 offset;
-	char *p;
-
-	/*
-	 * We want transport->recv.offset to be 8 at the end of this routine
-	 * (4 bytes for the xid and 4 bytes for the call/reply flag).
-	 * When this function is called for the first time,
-	 * transport->recv.offset is 4 (after having already read the xid).
-	 */
-	offset = transport->recv.offset - sizeof(transport->recv.xid);
-	len = sizeof(transport->recv.calldir) - offset;
-	dprintk("RPC:       reading CALL/REPLY flag (%zu bytes)\n", len);
-	p = ((char *) &transport->recv.calldir) + offset;
-	used = xdr_skb_read_bits(desc, p, len);
-	transport->recv.offset += used;
-	if (used != len)
-		return;
-	transport->recv.flags &= ~TCP_RCV_READ_CALLDIR;
-	/*
-	 * We don't yet have the XDR buffer, so we will write the calldir
-	 * out after we get the buffer from the 'struct rpc_rqst'
-	 */
-	switch (ntohl(transport->recv.calldir)) {
-	case RPC_REPLY:
-		transport->recv.flags |= TCP_RCV_COPY_CALLDIR;
-		transport->recv.flags |= TCP_RCV_COPY_DATA;
-		transport->recv.flags |= TCP_RPC_REPLY;
-		break;
-	case RPC_CALL:
-		transport->recv.flags |= TCP_RCV_COPY_CALLDIR;
-		transport->recv.flags |= TCP_RCV_COPY_DATA;
-		transport->recv.flags &= ~TCP_RPC_REPLY;
-		break;
-	default:
-		dprintk("RPC:       invalid request message type\n");
-		xs_tcp_force_close(&transport->xprt);
-	}
-	xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
-				     struct xdr_skb_reader *desc,
-				     struct rpc_rqst *req)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct xdr_buf *rcvbuf;
-	size_t len;
-	ssize_t r;
-
-	rcvbuf = &req->rq_private_buf;
-
-	if (transport->recv.flags & TCP_RCV_COPY_CALLDIR) {
-		/*
-		 * Save the RPC direction in the XDR buffer
-		 */
-		memcpy(rcvbuf->head[0].iov_base + transport->recv.copied,
-			&transport->recv.calldir,
-			sizeof(transport->recv.calldir));
-		transport->recv.copied += sizeof(transport->recv.calldir);
-		transport->recv.flags &= ~TCP_RCV_COPY_CALLDIR;
-	}
-
-	len = desc->count;
-	if (len > transport->recv.len - transport->recv.offset)
-		desc->count = transport->recv.len - transport->recv.offset;
-	r = xdr_partial_copy_from_skb(rcvbuf, transport->recv.copied,
-					  desc, xdr_skb_read_bits);
-
-	if (desc->count) {
-		/* Error when copying to the receive buffer,
-		 * usually because we weren't able to allocate
-		 * additional buffer pages. All we can do now
-		 * is turn off TCP_RCV_COPY_DATA, so the request
-		 * will not receive any additional updates,
-		 * and time out.
-		 * Any remaining data from this record will
-		 * be discarded.
-		 */
-		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-		dprintk("RPC:       XID %08x truncated request\n",
-				ntohl(transport->recv.xid));
-		dprintk("RPC:       xprt = %p, recv.copied = %lu, "
-				"recv.offset = %u, recv.len = %u\n",
-				xprt, transport->recv.copied,
-				transport->recv.offset, transport->recv.len);
-		return;
-	}
-
-	transport->recv.copied += r;
-	transport->recv.offset += r;
-	desc->count = len - r;
-
-	dprintk("RPC:       XID %08x read %zd bytes\n",
-			ntohl(transport->recv.xid), r);
-	dprintk("RPC:       xprt = %p, recv.copied = %lu, recv.offset = %u, "
-			"recv.len = %u\n", xprt, transport->recv.copied,
-			transport->recv.offset, transport->recv.len);
-
-	if (transport->recv.copied == req->rq_private_buf.buflen)
-		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-	else if (transport->recv.offset == transport->recv.len) {
-		if (transport->recv.flags & TCP_RCV_LAST_FRAG)
-			transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-	}
-}
-
-/*
- * Finds the request corresponding to the RPC xid and invokes the common
- * tcp read code to read the data.
- */
-static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
-				    struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct rpc_rqst *req;
-
-	dprintk("RPC:       read reply XID %08x\n", ntohl(transport->recv.xid));
-
-	/* Find and lock the request corresponding to this xid */
-	spin_lock(&xprt->queue_lock);
-	req = xprt_lookup_rqst(xprt, transport->recv.xid);
-	if (!req) {
-		dprintk("RPC:       XID %08x request not found!\n",
-				ntohl(transport->recv.xid));
-		spin_unlock(&xprt->queue_lock);
-		return -1;
-	}
-	xprt_pin_rqst(req);
-	spin_unlock(&xprt->queue_lock);
-
-	xs_tcp_read_common(xprt, desc, req);
-
-	spin_lock(&xprt->queue_lock);
-	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
-		xprt_complete_rqst(req->rq_task, transport->recv.copied);
-	xprt_unpin_rqst(req);
-	spin_unlock(&xprt->queue_lock);
-	return 0;
-}
-
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Obtains an rpc_rqst previously allocated and invokes the common
- * tcp read code to read the data.  The result is placed in the callback
- * queue.
- * If we're unable to obtain the rpc_rqst we schedule the closing of the
- * connection and return -1.
- */
-static int xs_tcp_read_callback(struct rpc_xprt *xprt,
-				       struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-	struct rpc_rqst *req;
-
-	/* Look up the request corresponding to the given XID */
-	req = xprt_lookup_bc_request(xprt, transport->recv.xid);
-	if (req == NULL) {
-		printk(KERN_WARNING "Callback slot table overflowed\n");
-		xprt_force_disconnect(xprt);
-		return -1;
-	}
-
-	dprintk("RPC:       read callback  XID %08x\n", ntohl(req->rq_xid));
-	xs_tcp_read_common(xprt, desc, req);
-
-	if (!(transport->recv.flags & TCP_RCV_COPY_DATA))
-		xprt_complete_bc_request(req, transport->recv.copied);
-
-	return 0;
-}
-
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
-					struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-
-	return (transport->recv.flags & TCP_RPC_REPLY) ?
-		xs_tcp_read_reply(xprt, desc) :
-		xs_tcp_read_callback(xprt, desc);
-}
-
 static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
 {
 	int ret;
@@ -1429,106 +1496,14 @@ static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
 {
 	return PAGE_SIZE;
 }
-#else
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
-					struct xdr_skb_reader *desc)
-{
-	return xs_tcp_read_reply(xprt, desc);
-}
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
-/*
- * Read data off the transport.  This can be either an RPC_CALL or an
- * RPC_REPLY.  Relay the processing to helper functions.
- */
-static void xs_tcp_read_data(struct rpc_xprt *xprt,
-				    struct xdr_skb_reader *desc)
-{
-	struct sock_xprt *transport =
-				container_of(xprt, struct sock_xprt, xprt);
-
-	if (_xs_tcp_read_data(xprt, desc) == 0)
-		xs_tcp_check_fraghdr(transport);
-	else {
-		/*
-		 * The transport_lock protects the request handling.
-		 * There's no need to hold it to update the recv.flags.
-		 */
-		transport->recv.flags &= ~TCP_RCV_COPY_DATA;
-	}
-}
-
-static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
-	size_t len;
-
-	len = transport->recv.len - transport->recv.offset;
-	if (len > desc->count)
-		len = desc->count;
-	desc->count -= len;
-	desc->offset += len;
-	transport->recv.offset += len;
-	dprintk("RPC:       discarded %zu bytes\n", len);
-	xs_tcp_check_fraghdr(transport);
-}
-
-static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
-{
-	struct rpc_xprt *xprt = rd_desc->arg.data;
-	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-	struct xdr_skb_reader desc = {
-		.skb	= skb,
-		.offset	= offset,
-		.count	= len,
-	};
-	size_t ret;
-
-	dprintk("RPC:       xs_tcp_data_recv started\n");
-	do {
-		trace_xs_tcp_data_recv(transport);
-		/* Read in a new fragment marker if necessary */
-		/* Can we ever really expect to get completely empty fragments? */
-		if (transport->recv.flags & TCP_RCV_COPY_FRAGHDR) {
-			xs_tcp_read_fraghdr(xprt, &desc);
-			continue;
-		}
-		/* Read in the xid if necessary */
-		if (transport->recv.flags & TCP_RCV_COPY_XID) {
-			xs_tcp_read_xid(transport, &desc);
-			continue;
-		}
-		/* Read in the call/reply flag */
-		if (transport->recv.flags & TCP_RCV_READ_CALLDIR) {
-			xs_tcp_read_calldir(transport, &desc);
-			continue;
-		}
-		/* Read in the request data */
-		if (transport->recv.flags & TCP_RCV_COPY_DATA) {
-			xs_tcp_read_data(xprt, &desc);
-			continue;
-		}
-		/* Skip over any trailing bytes on short reads */
-		xs_tcp_read_discard(transport, &desc);
-	} while (desc.count);
-	ret = len - desc.count;
-	if (ret < rd_desc->count)
-		rd_desc->count -= ret;
-	else
-		rd_desc->count = 0;
-	trace_xs_tcp_data_recv(transport);
-	dprintk("RPC:       xs_tcp_data_recv done\n");
-	return ret;
-}
-
 static void xs_tcp_data_receive(struct sock_xprt *transport)
 {
 	struct rpc_xprt *xprt = &transport->xprt;
 	struct sock *sk;
-	read_descriptor_t rd_desc = {
-		.arg.data = xprt,
-	};
-	unsigned long total = 0;
-	int read = 0;
+	size_t read = 0;
+	ssize_t ret = 0;
 
 restart:
 	mutex_lock(&transport->recv_mutex);
@@ -1536,18 +1511,12 @@ restart:
 	if (sk == NULL)
 		goto out;
 
-	/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
 	for (;;) {
-		rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
-		lock_sock(sk);
-		read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
-		if (rd_desc.count != 0 || read < 0) {
-			clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
-			release_sock(sk);
+		clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+		ret = xs_read_stream(transport, MSG_DONTWAIT | MSG_NOSIGNAL);
+		if (ret < 0)
 			break;
-		}
-		release_sock(sk);
-		total += read;
+		read += ret;
 		if (need_resched()) {
 			mutex_unlock(&transport->recv_mutex);
 			cond_resched();
@@ -1558,7 +1527,7 @@ restart:
 		queue_work(xprtiod_workqueue, &transport->recv_worker);
 out:
 	mutex_unlock(&transport->recv_mutex);
-	trace_xs_tcp_data_ready(xprt, read, total);
+	trace_xs_tcp_data_ready(xprt, ret, read);
 }
 
 static void xs_tcp_data_receive_workfn(struct work_struct *work)
@@ -2380,7 +2349,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 	transport->recv.offset = 0;
 	transport->recv.len = 0;
 	transport->recv.copied = 0;
-	transport->recv.flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
 	transport->xmit.offset = 0;
 
 	/* Tell the socket layer to start connecting... */
@@ -2802,6 +2770,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
+	.prepare_request	= xs_stream_prepare_request,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_tcp_shutdown,
-- 
cgit v1.2.3


From 550aebfe1c573518c35ae85d6ffbdc2d44c92703 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 14 Sep 2018 14:32:45 -0400
Subject: SUNRPC: Allow AF_LOCAL sockets to use the generic stream receive

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xdr.h |   1 -
 net/sunrpc/socklib.c       |   4 +-
 net/sunrpc/xprtsock.c      | 137 ++++++---------------------------------------
 3 files changed, 18 insertions(+), 124 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 745587132a87..8815be7cae72 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -185,7 +185,6 @@ struct xdr_skb_reader {
 
 typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
 
-size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len);
 extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
 extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
 		struct xdr_skb_reader *, xdr_skb_read_actor);
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 08f00a98151f..0e7c0dee7578 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -26,7 +26,8 @@
  * Possibly called several times to iterate over an sk_buff and copy
  * data out of it.
  */
-size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
+static size_t
+xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
 {
 	if (len > desc->count)
 		len = desc->count;
@@ -36,7 +37,6 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
 	desc->offset += len;
 	return len;
 }
-EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
 
 /**
  * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 55df1fadab27..90d4c92177b7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -670,6 +670,17 @@ static void xs_stream_data_receive_workfn(struct work_struct *work)
 	xs_stream_data_receive(transport);
 }
 
+static void
+xs_stream_reset_connect(struct sock_xprt *transport)
+{
+	transport->recv.offset = 0;
+	transport->recv.len = 0;
+	transport->recv.copied = 0;
+	transport->xmit.offset = 0;
+	transport->xprt.stat.connect_count++;
+	transport->xprt.stat.connect_start = jiffies;
+}
+
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
 
 static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
@@ -1266,114 +1277,6 @@ static void xs_destroy(struct rpc_xprt *xprt)
 	module_put(THIS_MODULE);
 }
 
-static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
-{
-	struct xdr_skb_reader desc = {
-		.skb		= skb,
-		.offset		= sizeof(rpc_fraghdr),
-		.count		= skb->len - sizeof(rpc_fraghdr),
-	};
-
-	if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
-		return -1;
-	if (desc.count)
-		return -1;
-	return 0;
-}
-
-/**
- * xs_local_data_read_skb
- * @xprt: transport
- * @sk: socket
- * @skb: skbuff
- *
- * Currently this assumes we can read the whole reply in a single gulp.
- */
-static void xs_local_data_read_skb(struct rpc_xprt *xprt,
-		struct sock *sk,
-		struct sk_buff *skb)
-{
-	struct rpc_task *task;
-	struct rpc_rqst *rovr;
-	int repsize, copied;
-	u32 _xid;
-	__be32 *xp;
-
-	repsize = skb->len - sizeof(rpc_fraghdr);
-	if (repsize < 4) {
-		dprintk("RPC:       impossible RPC reply size %d\n", repsize);
-		return;
-	}
-
-	/* Copy the XID from the skb... */
-	xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
-	if (xp == NULL)
-		return;
-
-	/* Look up and lock the request corresponding to the given XID */
-	spin_lock(&xprt->queue_lock);
-	rovr = xprt_lookup_rqst(xprt, *xp);
-	if (!rovr)
-		goto out_unlock;
-	xprt_pin_rqst(rovr);
-	spin_unlock(&xprt->queue_lock);
-	task = rovr->rq_task;
-
-	copied = rovr->rq_private_buf.buflen;
-	if (copied > repsize)
-		copied = repsize;
-
-	if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
-		dprintk("RPC:       sk_buff copy failed\n");
-		spin_lock(&xprt->queue_lock);
-		goto out_unpin;
-	}
-
-	spin_lock(&xprt->queue_lock);
-	xprt_complete_rqst(task, copied);
-out_unpin:
-	xprt_unpin_rqst(rovr);
- out_unlock:
-	spin_unlock(&xprt->queue_lock);
-}
-
-static void xs_local_data_receive(struct sock_xprt *transport)
-{
-	struct sk_buff *skb;
-	struct sock *sk;
-	int err;
-
-restart:
-	mutex_lock(&transport->recv_mutex);
-	sk = transport->inet;
-	if (sk == NULL)
-		goto out;
-	for (;;) {
-		skb = skb_recv_datagram(sk, 0, 1, &err);
-		if (skb != NULL) {
-			xs_local_data_read_skb(&transport->xprt, sk, skb);
-			skb_free_datagram(sk, skb);
-			continue;
-		}
-		if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
-			break;
-		if (need_resched()) {
-			mutex_unlock(&transport->recv_mutex);
-			cond_resched();
-			goto restart;
-		}
-	}
-out:
-	mutex_unlock(&transport->recv_mutex);
-}
-
-static void xs_local_data_receive_workfn(struct work_struct *work)
-{
-	struct sock_xprt *transport =
-		container_of(work, struct sock_xprt, recv_worker);
-	xs_local_data_receive(transport);
-}
-
 /**
  * xs_udp_data_read_skb - receive callback for UDP sockets
  * @xprt: transport
@@ -1974,11 +1877,8 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
 		write_unlock_bh(&sk->sk_callback_lock);
 	}
 
-	transport->xmit.offset = 0;
+	xs_stream_reset_connect(transport);
 
-	/* Tell the socket layer to start connecting... */
-	xprt->stat.connect_count++;
-	xprt->stat.connect_start = jiffies;
 	return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
 }
 
@@ -2335,14 +2235,9 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 	xs_set_memalloc(xprt);
 
 	/* Reset TCP record info */
-	transport->recv.offset = 0;
-	transport->recv.len = 0;
-	transport->recv.copied = 0;
-	transport->xmit.offset = 0;
+	xs_stream_reset_connect(transport);
 
 	/* Tell the socket layer to start connecting... */
-	xprt->stat.connect_count++;
-	xprt->stat.connect_start = jiffies;
 	set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
 	ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
 	switch (ret) {
@@ -2717,6 +2612,7 @@ static const struct rpc_xprt_ops xs_local_ops = {
 	.connect		= xs_local_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
+	.prepare_request	= xs_stream_prepare_request,
 	.send_request		= xs_local_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_close,
@@ -2901,9 +2797,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
 	xprt->ops = &xs_local_ops;
 	xprt->timeout = &xs_local_default_timeout;
 
-	INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn);
-	INIT_DELAYED_WORK(&transport->connect_worker,
-			xs_dummy_setup_socket);
+	INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+	INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket);
 
 	switch (sun->sun_family) {
 	case AF_LOCAL:
-- 
cgit v1.2.3


From ec846469ba7bdb81e42c04e4e15d8fbf19e426e2 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 14 Sep 2018 14:38:05 -0400
Subject: SUNRPC: Unexport xdr_partial_copy_from_skb()

It is no longer used outside of net/sunrpc/socklib.c

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/xdr.h | 2 --
 net/sunrpc/socklib.c       | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 8815be7cae72..43106ffa6788 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -186,8 +186,6 @@ struct xdr_skb_reader {
 typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
 
 extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
-extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
-		struct xdr_skb_reader *, xdr_skb_read_actor);
 
 extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
 extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 0e7c0dee7578..9062967575c4 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -69,7 +69,8 @@ static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to,
  * @copy_actor: virtual method for copying data
  *
  */
-ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+static ssize_t
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
 {
 	struct page	**ppage = xdr->pages;
 	unsigned int	len, pglen = xdr->page_len;
@@ -140,7 +141,6 @@ copy_tail:
 out:
 	return copied;
 }
-EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
 
 /**
  * csum_partial_copy_to_xdr - checksum and copy data
-- 
cgit v1.2.3


From 1db97eaa0b482a738c715da6edb023d6f99e50b0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sun, 2 Sep 2018 15:11:57 -0400
Subject: NFS: Convert lookups of the lock context to RCU

Speed up lookups of an existing lock context by avoiding the inode->i_lock,
and using RCU instead.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/inode.c         | 25 ++++++++++++-------------
 include/linux/nfs_fs.h |  1 +
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b65aee481d13..09b3b7146ff4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -857,15 +857,14 @@ static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
 
 static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
 {
-	struct nfs_lock_context *head = &ctx->lock_context;
-	struct nfs_lock_context *pos = head;
+	struct nfs_lock_context *pos;
 
-	do {
+	list_for_each_entry_rcu(pos, &ctx->lock_context.list, list) {
 		if (pos->lockowner != current->files)
 			continue;
-		refcount_inc(&pos->count);
-		return pos;
-	} while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
+		if (refcount_inc_not_zero(&pos->count))
+			return pos;
+	}
 	return NULL;
 }
 
@@ -874,10 +873,10 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
 	struct nfs_lock_context *res, *new = NULL;
 	struct inode *inode = d_inode(ctx->dentry);
 
-	spin_lock(&inode->i_lock);
+	rcu_read_lock();
 	res = __nfs_find_lock_context(ctx);
+	rcu_read_unlock();
 	if (res == NULL) {
-		spin_unlock(&inode->i_lock);
 		new = kmalloc(sizeof(*new), GFP_KERNEL);
 		if (new == NULL)
 			return ERR_PTR(-ENOMEM);
@@ -885,14 +884,14 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
 		spin_lock(&inode->i_lock);
 		res = __nfs_find_lock_context(ctx);
 		if (res == NULL) {
-			list_add_tail(&new->list, &ctx->lock_context.list);
+			list_add_tail_rcu(&new->list, &ctx->lock_context.list);
 			new->open_context = ctx;
 			res = new;
 			new = NULL;
 		}
+		spin_unlock(&inode->i_lock);
+		kfree(new);
 	}
-	spin_unlock(&inode->i_lock);
-	kfree(new);
 	return res;
 }
 EXPORT_SYMBOL_GPL(nfs_get_lock_context);
@@ -904,9 +903,9 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
 
 	if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock))
 		return;
-	list_del(&l_ctx->list);
+	list_del_rcu(&l_ctx->list);
 	spin_unlock(&inode->i_lock);
-	kfree(l_ctx);
+	kfree_rcu(l_ctx, rcu_head);
 }
 EXPORT_SYMBOL_GPL(nfs_put_lock_context);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index a0831e9d19c9..d2f4f88a0e66 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -62,6 +62,7 @@ struct nfs_lock_context {
 	struct nfs_open_context *open_context;
 	fl_owner_t lockowner;
 	atomic_t io_count;
+	struct rcu_head	rcu_head;
 };
 
 struct nfs4_state;
-- 
cgit v1.2.3


From 0de43976fbe716379084f954b1e370c35aa87bf0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sun, 2 Sep 2018 15:57:01 -0400
Subject: NFS: Convert lookups of the open context to RCU

Reduce contention on the inode->i_lock by ensuring that we use RCU
when looking up the NFS open context.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/delegation.c    | 11 ++++++-----
 fs/nfs/inode.c         | 35 +++++++++++++++--------------------
 fs/nfs/nfs4proc.c      | 30 ++++++++++++++++++++++++------
 fs/nfs/nfs4state.c     | 12 ++++++------
 fs/nfs/pnfs.c          |  5 ++++-
 include/linux/nfs_fs.h |  1 +
 6 files changed, 56 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index f033f3a69a3b..76d205d1c7bc 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -136,8 +136,8 @@ static int nfs_delegation_claim_opens(struct inode *inode,
 	int err;
 
 again:
-	spin_lock(&inode->i_lock);
-	list_for_each_entry(ctx, &nfsi->open_files, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
 		state = ctx->state;
 		if (state == NULL)
 			continue;
@@ -147,8 +147,9 @@ again:
 			continue;
 		if (!nfs4_stateid_match(&state->stateid, stateid))
 			continue;
-		get_nfs_open_context(ctx);
-		spin_unlock(&inode->i_lock);
+		if (!get_nfs_open_context(ctx))
+			continue;
+		rcu_read_unlock();
 		sp = state->owner;
 		/* Block nfs4_proc_unlck */
 		mutex_lock(&sp->so_delegreturn_mutex);
@@ -164,7 +165,7 @@ again:
 			return err;
 		goto again;
 	}
-	spin_unlock(&inode->i_lock);
+	rcu_read_unlock();
 	return 0;
 }
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 052db41a7f80..5b1eee4952b7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -977,9 +977,9 @@ EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
 
 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 {
-	if (ctx != NULL)
-		refcount_inc(&ctx->lock_context.count);
-	return ctx;
+	if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count))
+		return ctx;
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(get_nfs_open_context);
 
@@ -988,13 +988,13 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 	struct inode *inode = d_inode(ctx->dentry);
 	struct super_block *sb = ctx->dentry->d_sb;
 
+	if (!refcount_dec_and_test(&ctx->lock_context.count))
+		return;
 	if (!list_empty(&ctx->list)) {
-		if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
-			return;
-		list_del(&ctx->list);
+		spin_lock(&inode->i_lock);
+		list_del_rcu(&ctx->list);
 		spin_unlock(&inode->i_lock);
-	} else if (!refcount_dec_and_test(&ctx->lock_context.count))
-		return;
+	}
 	if (inode != NULL)
 		NFS_PROTO(inode)->close_context(ctx, is_sync);
 	if (ctx->cred != NULL)
@@ -1002,7 +1002,7 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
 	dput(ctx->dentry);
 	nfs_sb_deactive(sb);
 	kfree(ctx->mdsthreshold);
-	kfree(ctx);
+	kfree_rcu(ctx, rcu_head);
 }
 
 void put_nfs_open_context(struct nfs_open_context *ctx)
@@ -1026,10 +1026,7 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	spin_lock(&inode->i_lock);
-	if (ctx->mode & FMODE_WRITE)
-		list_add(&ctx->list, &nfsi->open_files);
-	else
-		list_add_tail(&ctx->list, &nfsi->open_files);
+	list_add_tail_rcu(&ctx->list, &nfsi->open_files);
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
@@ -1050,16 +1047,17 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *pos, *ctx = NULL;
 
-	spin_lock(&inode->i_lock);
-	list_for_each_entry(pos, &nfsi->open_files, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(pos, &nfsi->open_files, list) {
 		if (cred != NULL && pos->cred != cred)
 			continue;
 		if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
 			continue;
 		ctx = get_nfs_open_context(pos);
-		break;
+		if (ctx)
+			break;
 	}
-	spin_unlock(&inode->i_lock);
+	rcu_read_unlock();
 	return ctx;
 }
 
@@ -1077,9 +1075,6 @@ void nfs_file_clear_open_context(struct file *filp)
 		if (ctx->error < 0)
 			invalidate_inode_pages2(inode->i_mapping);
 		filp->private_data = NULL;
-		spin_lock(&inode->i_lock);
-		list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
-		spin_unlock(&inode->i_lock);
 		put_nfs_open_context_sync(ctx);
 	}
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8220a168282e..10c20a5b075d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1933,23 +1933,41 @@ nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 	return ret;
 }
 
-static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
+static struct nfs_open_context *
+nfs4_state_find_open_context_mode(struct nfs4_state *state, fmode_t mode)
 {
 	struct nfs_inode *nfsi = NFS_I(state->inode);
 	struct nfs_open_context *ctx;
 
-	spin_lock(&state->inode->i_lock);
-	list_for_each_entry(ctx, &nfsi->open_files, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
 		if (ctx->state != state)
 			continue;
-		get_nfs_open_context(ctx);
-		spin_unlock(&state->inode->i_lock);
+		if ((ctx->mode & mode) != mode)
+			continue;
+		if (!get_nfs_open_context(ctx))
+			continue;
+		rcu_read_unlock();
 		return ctx;
 	}
-	spin_unlock(&state->inode->i_lock);
+	rcu_read_unlock();
 	return ERR_PTR(-ENOENT);
 }
 
+static struct nfs_open_context *
+nfs4_state_find_open_context(struct nfs4_state *state)
+{
+	struct nfs_open_context *ctx;
+
+	ctx = nfs4_state_find_open_context_mode(state, FMODE_READ|FMODE_WRITE);
+	if (!IS_ERR(ctx))
+		return ctx;
+	ctx = nfs4_state_find_open_context_mode(state, FMODE_WRITE);
+	if (!IS_ERR(ctx))
+		return ctx;
+	return nfs4_state_find_open_context_mode(state, FMODE_READ);
+}
+
 static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
 		struct nfs4_state *state, enum open_claim_type4 claim)
 {
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 40a08cd483f0..be92ce4259e9 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1437,8 +1437,8 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
 	struct nfs4_state *state;
 	bool found = false;
 
-	spin_lock(&inode->i_lock);
-	list_for_each_entry(ctx, &nfsi->open_files, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
 		state = ctx->state;
 		if (state == NULL)
 			continue;
@@ -1456,7 +1456,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
 		    nfs4_state_mark_reclaim_nograce(clp, state))
 			found = true;
 	}
-	spin_unlock(&inode->i_lock);
+	rcu_read_unlock();
 
 	nfs_inode_find_delegation_state_and_recover(inode, stateid);
 	if (found)
@@ -1469,13 +1469,13 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *ctx;
 
-	spin_lock(&inode->i_lock);
-	list_for_each_entry(ctx, &nfsi->open_files, list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
 		if (ctx->state != state)
 			continue;
 		set_bit(NFS_CONTEXT_BAD, &ctx->flags);
 	}
-	spin_unlock(&inode->i_lock);
+	rcu_read_unlock();
 }
 
 static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c5672c02afd6..06cb90e9bc6e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1339,6 +1339,7 @@ bool pnfs_roc(struct inode *ino,
 	if (!nfs_have_layout(ino))
 		return false;
 retry:
+	rcu_read_lock();
 	spin_lock(&ino->i_lock);
 	lo = nfsi->layout;
 	if (!lo || !pnfs_layout_is_valid(lo) ||
@@ -1349,6 +1350,7 @@ retry:
 	pnfs_get_layout_hdr(lo);
 	if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
 		spin_unlock(&ino->i_lock);
+		rcu_read_unlock();
 		wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
 				TASK_UNINTERRUPTIBLE);
 		pnfs_put_layout_hdr(lo);
@@ -1362,7 +1364,7 @@ retry:
 		skip_read = true;
 	}
 
-	list_for_each_entry(ctx, &nfsi->open_files, list) {
+	list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
 		state = ctx->state;
 		if (state == NULL)
 			continue;
@@ -1410,6 +1412,7 @@ retry:
 
 out_noroc:
 	spin_unlock(&ino->i_lock);
+	rcu_read_unlock();
 	pnfs_layoutcommit_inode(ino, true);
 	if (roc) {
 		struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d2f4f88a0e66..6e0417c02279 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -83,6 +83,7 @@ struct nfs_open_context {
 
 	struct list_head list;
 	struct nfs4_threshold	*mdsthreshold;
+	struct rcu_head	rcu_head;
 };
 
 struct nfs_open_dir_context {
-- 
cgit v1.2.3


From 8d8928d87960d71f898767185b8c0e4ce3de3cbe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Mon, 5 Mar 2018 12:03:00 -0500
Subject: NFSv3: Improve NFSv3 performance when server returns no post-op
 attributes

When the server fails to return post-op attributes, the client's
attempt to place read data directly in the page cache fails, and
so we have to do an extra copy in order to realign the data with
page borders.
This patch attempts to detect servers that don't return post-op
attributes on read (e.g. for pNFS) and adjusts the placement
calculation accordingly.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/nfs3proc.c         | 5 +++++
 fs/nfs/nfs3xdr.c          | 6 +++++-
 include/linux/nfs_fs_sb.h | 3 +++
 include/linux/nfs_xdr.h   | 3 ++-
 4 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ec8a9efa268f..71bc16225b98 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -786,6 +786,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 {
 	struct inode *inode = hdr->inode;
+	struct nfs_server *server = NFS_SERVER(inode);
 
 	if (hdr->pgio_done_cb != NULL)
 		return hdr->pgio_done_cb(task, hdr);
@@ -793,6 +794,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 	if (nfs3_async_handle_jukebox(task, inode))
 		return -EAGAIN;
 
+	if (task->tk_status >= 0 && !server->read_hdrsize)
+		cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+
 	nfs_invalidate_atime(inode);
 	nfs_refresh_inode(inode, &hdr->fattr);
 	return 0;
@@ -802,6 +806,7 @@ static void nfs3_proc_read_setup(struct nfs_pgio_header *hdr,
 				 struct rpc_message *msg)
 {
 	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
+	hdr->args.replen = NFS_SERVER(hdr->inode)->read_hdrsize;
 }
 
 static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d8c4c10b15f7..78df4eb60f85 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -983,10 +983,11 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 				   const void *data)
 {
 	const struct nfs_pgio_args *args = data;
+	unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
 
 	encode_read3args(xdr, args);
 	prepare_reply_buffer(req, args->pages, args->pgbase,
-					args->count, NFS3_readres_sz);
+					args->count, replen);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 }
 
@@ -1675,9 +1676,11 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 				 void *data)
 {
 	struct nfs_pgio_res *result = data;
+	unsigned int pos;
 	enum nfs_stat status;
 	int error;
 
+	pos = xdr_stream_pos(xdr);
 	error = decode_nfsstat3(xdr, &status);
 	if (unlikely(error))
 		goto out;
@@ -1687,6 +1690,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 	result->op_status = status;
 	if (status != NFS3_OK)
 		goto out_status;
+	result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
 	error = decode_read3resok(xdr, result);
 out:
 	return error;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index bf39d9c92201..0fc0b9135d46 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -228,6 +228,9 @@ struct nfs_server {
 	unsigned short		mountd_port;
 	unsigned short		mountd_protocol;
 	struct rpc_wait_queue	uoc_rpcwaitq;
+
+	/* XDR related information */
+	unsigned int		read_hdrsize;
 };
 
 /* Server capabilities */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index bd1c889a9ed9..7f5535e5e852 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -608,6 +608,7 @@ struct nfs_pgio_args {
 	__u32			count;
 	unsigned int		pgbase;
 	struct page **		pages;
+	unsigned int		replen;		/* used by read */
 	const u32 *		bitmask;	/* used by write */
 	enum nfs3_stable_how	stable;		/* used by write */
 };
@@ -618,9 +619,9 @@ struct nfs_pgio_res {
 	__u32			count;
 	__u32			op_status;
 	int			eof;		/* used by read */
+	unsigned int		replen;		/* used by read */
 	struct nfs_writeverf *	verf;		/* used by write */
 	const struct nfs_server *server;	/* used by write */
-
 };
 
 /*
-- 
cgit v1.2.3


From 28d52235ee25ba7d843242b4cb3c3f27a8828b5a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Mon, 24 Sep 2018 13:15:37 -0400
Subject: NFSv4: Save a few bytes in the nfs_pgio_args/res

Save a few bytes by allowing the read/write specific fields of the
structures to share storage.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/nfs_xdr.h | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7f5535e5e852..343e44166346 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -608,9 +608,13 @@ struct nfs_pgio_args {
 	__u32			count;
 	unsigned int		pgbase;
 	struct page **		pages;
-	unsigned int		replen;		/* used by read */
-	const u32 *		bitmask;	/* used by write */
-	enum nfs3_stable_how	stable;		/* used by write */
+	union {
+		unsigned int		replen;			/* used by read */
+		struct {
+			const u32 *		bitmask;	/* used by write */
+			enum nfs3_stable_how	stable;		/* used by write */
+		};
+	};
 };
 
 struct nfs_pgio_res {
@@ -618,10 +622,16 @@ struct nfs_pgio_res {
 	struct nfs_fattr *	fattr;
 	__u32			count;
 	__u32			op_status;
-	int			eof;		/* used by read */
-	unsigned int		replen;		/* used by read */
-	struct nfs_writeverf *	verf;		/* used by write */
-	const struct nfs_server *server;	/* used by write */
+	union {
+		struct {
+			unsigned int		replen;		/* used by read */
+			int			eof;		/* used by read */
+		};
+		struct {
+			struct nfs_writeverf *	verf;		/* used by write */
+			const struct nfs_server *server;	/* used by write */
+		};
+	};
 };
 
 /*
-- 
cgit v1.2.3


From 1c6c4b740df12f2162ae5c3fac337137e2776236 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 25 Sep 2018 12:34:43 -0400
Subject: NFS: Remove private spinlock in struct nfs_pgio_header

Now that each struct nfs_pgio_header corresponds to one RPC call, we
only have one writer to the struct nfs_pgio_header.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 fs/nfs/pagelist.c       | 13 ++++++-------
 fs/nfs/read.c           | 10 ++++------
 include/linux/nfs_xdr.h |  5 ++---
 3 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index bb5476a6d264..f97c455f5734 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -63,14 +63,14 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init);
 
 void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
 {
-	spin_lock(&hdr->lock);
-	if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags)
-	    || pos < hdr->io_start + hdr->good_bytes) {
+	unsigned int new = pos - hdr->io_start;
+
+	if (hdr->good_bytes > new) {
+		hdr->good_bytes = new;
 		clear_bit(NFS_IOHDR_EOF, &hdr->flags);
-		hdr->good_bytes = pos - hdr->io_start;
-		hdr->error = error;
+		if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags))
+			hdr->error = error;
 	}
-	spin_unlock(&hdr->lock);
 }
 
 static inline struct nfs_page *
@@ -494,7 +494,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
 
 	if (hdr) {
 		INIT_LIST_HEAD(&hdr->pages);
-		spin_lock_init(&hdr->lock);
 		hdr->rw_ops = ops;
 	}
 	return hdr;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 48d7277c60a9..f9f19784db82 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -276,16 +276,14 @@ static void nfs_readpage_result(struct rpc_task *task,
 				struct nfs_pgio_header *hdr)
 {
 	if (hdr->res.eof) {
-		loff_t bound;
+		loff_t pos = hdr->args.offset + hdr->res.count;
+		unsigned int new = pos - hdr->io_start;
 
-		bound = hdr->args.offset + hdr->res.count;
-		spin_lock(&hdr->lock);
-		if (bound < hdr->io_start + hdr->good_bytes) {
+		if (hdr->good_bytes > new) {
+			hdr->good_bytes = new;
 			set_bit(NFS_IOHDR_EOF, &hdr->flags);
 			clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
-			hdr->good_bytes = bound - hdr->io_start;
 		}
-		spin_unlock(&hdr->lock);
 	} else if (hdr->res.count < hdr->args.count)
 		nfs_readpage_retry(task, hdr);
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 343e44166346..0e016252cfc6 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1482,11 +1482,10 @@ struct nfs_pgio_header {
 	const struct nfs_rw_ops	*rw_ops;
 	struct nfs_io_completion *io_completion;
 	struct nfs_direct_req	*dreq;
-	spinlock_t		lock;
-	/* fields protected by lock */
+
 	int			pnfs_error;
 	int			error;		/* merge with pnfs_error */
-	unsigned long		good_bytes;	/* boundary of good data */
+	unsigned int		good_bytes;	/* boundary of good data */
 	unsigned long		flags;
 
 	/*
-- 
cgit v1.2.3


From 571ed1fd2390f74e4c1f46994f753fb0d29285e4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sat, 29 Sep 2018 16:00:43 -0400
Subject: SUNRPC: Replace krb5_seq_lock with a lockless scheme

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/gss_krb5.h     |  3 ++-
 net/sunrpc/auth_gss/gss_krb5_seal.c | 37 ++++++++++++++++++++++++++-----------
 net/sunrpc/auth_gss/gss_krb5_wrap.c |  8 ++------
 3 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 7df625d41e35..69f749afa617 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -118,7 +118,8 @@ struct krb5_ctx {
 	u8			acceptor_integ[GSS_KRB5_MAX_KEYLEN];
 };
 
-extern spinlock_t krb5_seq_lock;
+extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx);
+extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx);
 
 /* The length of the Kerberos GSS token header */
 #define GSS_KRB5_TOK_HDR_LEN	(16)
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index eaad9bc7a0bd..0ffb797b92e5 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -68,8 +68,6 @@
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
-DEFINE_SPINLOCK(krb5_seq_lock);
-
 static void *
 setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
 {
@@ -124,6 +122,30 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
 	return krb5_hdr;
 }
 
+u32
+gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx)
+{
+	u32 old, seq_send = READ_ONCE(ctx->seq_send);
+
+	do {
+		old = seq_send;
+		seq_send = cmpxchg(&ctx->seq_send, old, old + 1);
+	} while (old != seq_send);
+	return seq_send;
+}
+
+u64
+gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
+{
+	u64 old, seq_send = READ_ONCE(ctx->seq_send);
+
+	do {
+		old = seq_send;
+		seq_send = cmpxchg(&ctx->seq_send64, old, old + 1);
+	} while (old != seq_send);
+	return seq_send;
+}
+
 static u32
 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
@@ -154,9 +176,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-	spin_lock(&krb5_seq_lock);
-	seq_send = ctx->seq_send++;
-	spin_unlock(&krb5_seq_lock);
+	seq_send = gss_seq_send_fetch_and_inc(ctx);
 
 	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
 			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
@@ -174,7 +194,6 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 				       .data = cksumdata};
 	void *krb5_hdr;
 	s32 now;
-	u64 seq_send;
 	u8 *cksumkey;
 	unsigned int cksum_usage;
 	__be64 seq_send_be64;
@@ -185,11 +204,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 
 	/* Set up the sequence number. Now 64-bits in clear
 	 * text and w/o direction indicator */
-	spin_lock(&krb5_seq_lock);
-	seq_send = ctx->seq_send64++;
-	spin_unlock(&krb5_seq_lock);
-
-	seq_send_be64 = cpu_to_be64(seq_send);
+	seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx));
 	memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
 
 	if (ctx->initiate) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 39a2e672900b..41cb294cd071 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -228,9 +228,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-	spin_lock(&krb5_seq_lock);
-	seq_send = kctx->seq_send++;
-	spin_unlock(&krb5_seq_lock);
+	seq_send = gss_seq_send_fetch_and_inc(kctx);
 
 	/* XXX would probably be more efficient to compute checksum
 	 * and encrypt at the same time: */
@@ -477,9 +475,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
 	*be16ptr++ = 0;
 
 	be64ptr = (__be64 *)be16ptr;
-	spin_lock(&krb5_seq_lock);
-	*be64ptr = cpu_to_be64(kctx->seq_send64++);
-	spin_unlock(&krb5_seq_lock);
+	*be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx));
 
 	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
 	if (err)
-- 
cgit v1.2.3


From aef716fa5e6da3919cca22ac2097a90d73d8177f Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Thu, 27 Sep 2018 13:55:58 -0700
Subject: RDMA/qedr: Remove enumerated type qed_roce_ll2_tx_dest

Clang warns when one enumerated type is explicitly converted to another.

drivers/infiniband/hw/qedr/qedr_roce_cm.c:198:28: warning: implicit
conversion from enumeration type 'enum qed_roce_ll2_tx_dest' to
different enumeration type 'enum qed_ll2_tx_dest' [-Wenum-conversion]
        ll2_tx_pkt.tx_dest = pkt->tx_dest;
                           ~ ~~~~~^~~~~~~
1 warning generated.

Turns out that QED_ROCE_LL2_TX_DEST_NW and QED_ROCE_LL2_TX_DEST_LB are
only used once in the whole tree and QED_ROCE_LL2_TX_DEST_MAX is used
nowhere. Remove them and use the equivalent values from qed_ll2_tx_dest
in their place.

Reported-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Acked-by: Michal Kalderon <michal.kalderon@cavium.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
 drivers/infiniband/hw/qedr/qedr_roce_cm.c |  4 ++--
 include/linux/qed/qed_rdma_if.h           | 11 +----------
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 85578887421b..e1ac2fd60bb1 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -519,9 +519,9 @@ static inline int qedr_gsi_build_packet(struct qedr_dev *dev,
 	}
 
 	if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h))
-		packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB;
+		packet->tx_dest = QED_LL2_TX_DEST_LB;
 	else
-		packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW;
+		packet->tx_dest = QED_LL2_TX_DEST_NW;
 
 	packet->roce_mode = roce_mode;
 	memcpy(packet->header.vaddr, ud_header_buffer, header_size);
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index df4d13f7e191..d15f8e4815e3 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -39,15 +39,6 @@
 #include <linux/qed/qed_ll2_if.h>
 #include <linux/qed/rdma_common.h>
 
-enum qed_roce_ll2_tx_dest {
-	/* Light L2 TX Destination to the Network */
-	QED_ROCE_LL2_TX_DEST_NW,
-
-	/* Light L2 TX Destination to the Loopback */
-	QED_ROCE_LL2_TX_DEST_LB,
-	QED_ROCE_LL2_TX_DEST_MAX
-};
-
 #define QED_RDMA_MAX_CNQ_SIZE               (0xFFFF)
 
 /* rdma interface */
@@ -581,7 +572,7 @@ struct qed_roce_ll2_packet {
 	int n_seg;
 	struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE];
 	int roce_mode;
-	enum qed_roce_ll2_tx_dest tx_dest;
+	enum qed_ll2_tx_dest tx_dest;
 };
 
 enum qed_rdma_type {
-- 
cgit v1.2.3


From d440c52d3151a28358f4c2d52d8583a0aa54ab83 Mon Sep 17 00:00:00 2001
From: Junling Zheng <zhengjunling@huawei.com>
Date: Fri, 28 Sep 2018 20:25:56 +0800
Subject: f2fs: support superblock checksum

Now we support crc32 checksum for superblock.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Junling Zheng <zhengjunling@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h          |  2 ++
 fs/f2fs/super.c         | 28 ++++++++++++++++++++++++++++
 fs/f2fs/sysfs.c         |  7 +++++++
 include/linux/f2fs_fs.h |  3 ++-
 4 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 19243678d5d9..668836c2d678 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -148,6 +148,7 @@ struct f2fs_mount_info {
 #define F2FS_FEATURE_INODE_CRTIME	0x0100
 #define F2FS_FEATURE_LOST_FOUND		0x0200
 #define F2FS_FEATURE_VERITY		0x0400	/* reserved */
+#define F2FS_FEATURE_SB_CHKSUM		0x0800
 
 #define F2FS_HAS_FEATURE(sb, mask)					\
 	((F2FS_SB(sb)->raw_super->feature & cpu_to_le32(mask)) != 0)
@@ -3431,6 +3432,7 @@ F2FS_FEATURE_FUNCS(flexible_inline_xattr, FLEXIBLE_INLINE_XATTR);
 F2FS_FEATURE_FUNCS(quota_ino, QUOTA_INO);
 F2FS_FEATURE_FUNCS(inode_crtime, INODE_CRTIME);
 F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND);
+F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM);
 
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline int get_blkz_type(struct f2fs_sb_info *sbi,
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 218695e44bd4..a44913224e3b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2178,6 +2178,26 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
 					(bh->b_data + F2FS_SUPER_OFFSET);
 	struct super_block *sb = sbi->sb;
 	unsigned int blocksize;
+	size_t crc_offset = 0;
+	__u32 crc = 0;
+
+	/* Check checksum_offset and crc in superblock */
+	if (le32_to_cpu(raw_super->feature) & F2FS_FEATURE_SB_CHKSUM) {
+		crc_offset = le32_to_cpu(raw_super->checksum_offset);
+		if (crc_offset !=
+			offsetof(struct f2fs_super_block, crc)) {
+			f2fs_msg(sb, KERN_INFO,
+				"Invalid SB checksum offset: %zu",
+				crc_offset);
+			return 1;
+		}
+		crc = le32_to_cpu(raw_super->crc);
+		if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
+			f2fs_msg(sb, KERN_INFO,
+				"Invalid SB checksum value: %u", crc);
+			return 1;
+		}
+	}
 
 	if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
 		f2fs_msg(sb, KERN_INFO,
@@ -2635,6 +2655,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 {
 	struct buffer_head *bh;
+	__u32 crc = 0;
 	int err;
 
 	if ((recover && f2fs_readonly(sbi->sb)) ||
@@ -2643,6 +2664,13 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 		return -EROFS;
 	}
 
+	/* we should update superblock crc here */
+	if (!recover && f2fs_sb_has_sb_chksum(sbi->sb)) {
+		crc = f2fs_crc32(sbi, F2FS_RAW_SUPER(sbi),
+				offsetof(struct f2fs_super_block, crc));
+		F2FS_RAW_SUPER(sbi)->crc = cpu_to_le32(crc);
+	}
+
 	/* write back-up superblock first */
 	bh = sb_bread(sbi->sb, sbi->valid_super_block ? 0 : 1);
 	if (!bh)
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index f5a545437b81..b777cbdd796b 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -117,6 +117,9 @@ static ssize_t features_show(struct f2fs_attr *a,
 	if (f2fs_sb_has_lost_found(sb))
 		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
 				len ? ", " : "", "lost_found");
+	if (f2fs_sb_has_sb_chksum(sb))
+		len += snprintf(buf + len, PAGE_SIZE - len, "%s%s",
+				len ? ", " : "", "sb_checksum");
 	len += snprintf(buf + len, PAGE_SIZE - len, "\n");
 	return len;
 }
@@ -334,6 +337,7 @@ enum feat_id {
 	FEAT_QUOTA_INO,
 	FEAT_INODE_CRTIME,
 	FEAT_LOST_FOUND,
+	FEAT_SB_CHECKSUM,
 };
 
 static ssize_t f2fs_feature_show(struct f2fs_attr *a,
@@ -350,6 +354,7 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a,
 	case FEAT_QUOTA_INO:
 	case FEAT_INODE_CRTIME:
 	case FEAT_LOST_FOUND:
+	case FEAT_SB_CHECKSUM:
 		return snprintf(buf, PAGE_SIZE, "supported\n");
 	}
 	return 0;
@@ -434,6 +439,7 @@ F2FS_FEATURE_RO_ATTR(flexible_inline_xattr, FEAT_FLEXIBLE_INLINE_XATTR);
 F2FS_FEATURE_RO_ATTR(quota_ino, FEAT_QUOTA_INO);
 F2FS_FEATURE_RO_ATTR(inode_crtime, FEAT_INODE_CRTIME);
 F2FS_FEATURE_RO_ATTR(lost_found, FEAT_LOST_FOUND);
+F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM);
 
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
@@ -493,6 +499,7 @@ static struct attribute *f2fs_feat_attrs[] = {
 	ATTR_LIST(quota_ino),
 	ATTR_LIST(inode_crtime),
 	ATTR_LIST(lost_found),
+	ATTR_LIST(sb_checksum),
 	NULL,
 };
 
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1d4b196291d6..1db13ff9a3f4 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -109,7 +109,8 @@ struct f2fs_super_block {
 	struct f2fs_device devs[MAX_DEVICES];	/* device list */
 	__le32 qf_ino[F2FS_MAX_QUOTAS];	/* quota inode numbers */
 	__u8 hot_ext_count;		/* # of hot file extension */
-	__u8 reserved[314];		/* valid reserved region */
+	__u8 reserved[310];		/* valid reserved region */
+	__le32 crc;			/* checksum of superblock */
 } __packed;
 
 /*
-- 
cgit v1.2.3


From cf9af0d5786c008971148f4e06567a98f6a7f9d0 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 27 Sep 2018 13:38:09 +0200
Subject: gpio: Propagate errors from gpiod_set_array_value_complex()

Internal helper function gpiod_set_array_value_complex() was changed to
return an error value, but not all gpiolib callers were updated to
propagate the new error up.

Fixes: 3027743f83f867d8 ("gpio: Remove VLA from gpiolib")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/driver-api/gpio/consumer.rst | 16 ++++++++--------
 drivers/gpio/gpiolib.c                     | 30 ++++++++++++++++--------------
 include/linux/gpio/consumer.h              | 28 +++++++++++++++-------------
 3 files changed, 39 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst
index ba4973829fae..5e4d8aa68913 100644
--- a/Documentation/driver-api/gpio/consumer.rst
+++ b/Documentation/driver-api/gpio/consumer.rst
@@ -340,18 +340,18 @@ The following functions get or set the values of an array of GPIOs::
 					   struct gpio_array *array_info,
 					   unsigned long *value_bitmap);
 
-	void gpiod_set_array_value(unsigned int array_size,
-				   struct gpio_desc **desc_array,
-				   struct gpio_array *array_info,
-				   unsigned long *value_bitmap)
+	int gpiod_set_array_value(unsigned int array_size,
+				  struct gpio_desc **desc_array,
+				  struct gpio_array *array_info,
+				  unsigned long *value_bitmap)
 	int gpiod_set_raw_array_value(unsigned int array_size,
 				      struct gpio_desc **desc_array,
 				      struct gpio_array *array_info,
 				      unsigned long *value_bitmap)
-	void gpiod_set_array_value_cansleep(unsigned int array_size,
-					    struct gpio_desc **desc_array,
-					    struct gpio_array *array_info,
-					    unsigned long *value_bitmap)
+	int gpiod_set_array_value_cansleep(unsigned int array_size,
+					   struct gpio_desc **desc_array,
+					   struct gpio_array *array_info,
+					   unsigned long *value_bitmap)
 	int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
 					       struct gpio_desc **desc_array,
 					       struct gpio_array *array_info,
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a293f14ba75c..51dd9d601cac 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3346,15 +3346,16 @@ EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value);
  * This function should be called from contexts where we cannot sleep, and will
  * complain if the GPIO chip functions potentially sleep.
  */
-void gpiod_set_array_value(unsigned int array_size,
-			   struct gpio_desc **desc_array,
-			   struct gpio_array *array_info,
-			   unsigned long *value_bitmap)
+int gpiod_set_array_value(unsigned int array_size,
+			  struct gpio_desc **desc_array,
+			  struct gpio_array *array_info,
+			  unsigned long *value_bitmap)
 {
 	if (!desc_array)
-		return;
-	gpiod_set_array_value_complex(false, false, array_size, desc_array,
-				      array_info, value_bitmap);
+		return -EINVAL;
+	return gpiod_set_array_value_complex(false, false, array_size,
+					     desc_array, array_info,
+					     value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_array_value);
 
@@ -3763,16 +3764,17 @@ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n)
  *
  * This function is to be called from contexts that can sleep.
  */
-void gpiod_set_array_value_cansleep(unsigned int array_size,
-				    struct gpio_desc **desc_array,
-				    struct gpio_array *array_info,
-				    unsigned long *value_bitmap)
+int gpiod_set_array_value_cansleep(unsigned int array_size,
+				   struct gpio_desc **desc_array,
+				   struct gpio_array *array_info,
+				   unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
-		return;
-	gpiod_set_array_value_complex(false, true, array_size, desc_array,
-				      array_info, value_bitmap);
+		return -EINVAL;
+	return gpiod_set_array_value_complex(false, true, array_size,
+					     desc_array, array_info,
+					     value_bitmap);
 }
 EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep);
 
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index d7fbe30ece84..136aeca37740 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -117,10 +117,10 @@ int gpiod_get_array_value(unsigned int array_size,
 			  struct gpio_array *array_info,
 			  unsigned long *value_bitmap);
 void gpiod_set_value(struct gpio_desc *desc, int value);
-void gpiod_set_array_value(unsigned int array_size,
-			   struct gpio_desc **desc_array,
-			   struct gpio_array *array_info,
-			   unsigned long *value_bitmap);
+int gpiod_set_array_value(unsigned int array_size,
+			  struct gpio_desc **desc_array,
+			  struct gpio_array *array_info,
+			  unsigned long *value_bitmap);
 int gpiod_get_raw_value(const struct gpio_desc *desc);
 int gpiod_get_raw_array_value(unsigned int array_size,
 			      struct gpio_desc **desc_array,
@@ -139,10 +139,10 @@ int gpiod_get_array_value_cansleep(unsigned int array_size,
 				   struct gpio_array *array_info,
 				   unsigned long *value_bitmap);
 void gpiod_set_value_cansleep(struct gpio_desc *desc, int value);
-void gpiod_set_array_value_cansleep(unsigned int array_size,
-				    struct gpio_desc **desc_array,
-				    struct gpio_array *array_info,
-				    unsigned long *value_bitmap);
+int gpiod_set_array_value_cansleep(unsigned int array_size,
+				   struct gpio_desc **desc_array,
+				   struct gpio_array *array_info,
+				   unsigned long *value_bitmap);
 int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc);
 int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 				       struct gpio_desc **desc_array,
@@ -361,13 +361,14 @@ static inline void gpiod_set_value(struct gpio_desc *desc, int value)
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
-static inline void gpiod_set_array_value(unsigned int array_size,
-					 struct gpio_desc **desc_array,
-					 struct gpio_array *array_info,
-					 unsigned long *value_bitmap)
+static inline int gpiod_set_array_value(unsigned int array_size,
+					struct gpio_desc **desc_array,
+					struct gpio_array *array_info,
+					unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
+	return 0;
 }
 static inline int gpiod_get_raw_value(const struct gpio_desc *desc)
 {
@@ -419,13 +420,14 @@ static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
-static inline void gpiod_set_array_value_cansleep(unsigned int array_size,
+static inline int gpiod_set_array_value_cansleep(unsigned int array_size,
 					    struct gpio_desc **desc_array,
 					    struct gpio_array *array_info,
 					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
+	return 0;
 }
 static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
 {
-- 
cgit v1.2.3


From 3c940660cb1ea6c952bb863040cdacafd5077448 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 27 Sep 2018 13:38:10 +0200
Subject: gpio: Restore indentation of continued lines

Fixes: 3027743f83f867d8 ("gpio: Remove VLA from gpiolib")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c        | 20 ++++++++++----------
 drivers/gpio/gpiolib.h        |  8 ++++----
 include/linux/gpio/consumer.h | 18 +++++++++---------
 3 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 51dd9d601cac..83c6ec7f7634 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3145,10 +3145,10 @@ static void gpio_chip_set_multiple(struct gpio_chip *chip,
 }
 
 int gpiod_set_array_value_complex(bool raw, bool can_sleep,
-				   unsigned int array_size,
-				   struct gpio_desc **desc_array,
-				   struct gpio_array *array_info,
-				   unsigned long *value_bitmap)
+				  unsigned int array_size,
+				  struct gpio_desc **desc_array,
+				  struct gpio_array *array_info,
+				  unsigned long *value_bitmap)
 {
 	int i = 0;
 
@@ -3322,9 +3322,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_value);
  * complain if the GPIO chip functions potentially sleep.
  */
 int gpiod_set_raw_array_value(unsigned int array_size,
-			 struct gpio_desc **desc_array,
-			 struct gpio_array *array_info,
-			 unsigned long *value_bitmap)
+			      struct gpio_desc **desc_array,
+			      struct gpio_array *array_info,
+			      unsigned long *value_bitmap)
 {
 	if (!desc_array)
 		return -EINVAL;
@@ -3723,9 +3723,9 @@ EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
  * This function is to be called from contexts that can sleep.
  */
 int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
-					struct gpio_desc **desc_array,
-					struct gpio_array *array_info,
-					unsigned long *value_bitmap)
+				       struct gpio_desc **desc_array,
+				       struct gpio_array *array_info,
+				       unsigned long *value_bitmap)
 {
 	might_sleep_if(extra_checks);
 	if (!desc_array)
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index 7b5dcef1605b..087d865286a0 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -196,10 +196,10 @@ int gpiod_get_array_value_complex(bool raw, bool can_sleep,
 				  struct gpio_array *array_info,
 				  unsigned long *value_bitmap);
 int gpiod_set_array_value_complex(bool raw, bool can_sleep,
-				   unsigned int array_size,
-				   struct gpio_desc **desc_array,
-				   struct gpio_array *array_info,
-				   unsigned long *value_bitmap);
+				  unsigned int array_size,
+				  struct gpio_desc **desc_array,
+				  struct gpio_array *array_info,
+				  unsigned long *value_bitmap);
 
 /* This is just passed between gpiolib and devres */
 struct gpio_desc *gpiod_get_from_of_node(struct device_node *node,
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 136aeca37740..0f350616d372 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -128,9 +128,9 @@ int gpiod_get_raw_array_value(unsigned int array_size,
 			      unsigned long *value_bitmap);
 void gpiod_set_raw_value(struct gpio_desc *desc, int value);
 int gpiod_set_raw_array_value(unsigned int array_size,
-			       struct gpio_desc **desc_array,
-			       struct gpio_array *array_info,
-			       unsigned long *value_bitmap);
+			      struct gpio_desc **desc_array,
+			      struct gpio_array *array_info,
+			      unsigned long *value_bitmap);
 
 /* Value get/set from sleeping context */
 int gpiod_get_value_cansleep(const struct gpio_desc *desc);
@@ -150,9 +150,9 @@ int gpiod_get_raw_array_value_cansleep(unsigned int array_size,
 				       unsigned long *value_bitmap);
 void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value);
 int gpiod_set_raw_array_value_cansleep(unsigned int array_size,
-					struct gpio_desc **desc_array,
-					struct gpio_array *array_info,
-					unsigned long *value_bitmap);
+				       struct gpio_desc **desc_array,
+				       struct gpio_array *array_info,
+				       unsigned long *value_bitmap);
 
 int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
 int gpiod_set_transitory(struct gpio_desc *desc, bool transitory);
@@ -391,9 +391,9 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 	WARN_ON(1);
 }
 static inline int gpiod_set_raw_array_value(unsigned int array_size,
-					     struct gpio_desc **desc_array,
-					     struct gpio_array *array_info,
-					     unsigned long *value_bitmap)
+					    struct gpio_desc **desc_array,
+					    struct gpio_array *array_info,
+					    unsigned long *value_bitmap)
 {
 	/* GPIO can never have been requested */
 	WARN_ON(1);
-- 
cgit v1.2.3


From 22839869f21ab3850fbbac9b425ccc4c0023926f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 5 Sep 2018 15:34:42 +0100
Subject: signal: Introduce COMPAT_SIGMINSTKSZ for use in
 compat_sys_sigaltstack

The sigaltstack(2) system call fails with -ENOMEM if the new alternative
signal stack is found to be smaller than SIGMINSTKSZ. On architectures
such as arm64, where the native value for SIGMINSTKSZ is larger than
the compat value, this can result in an unexpected error being reported
to a compat task. See, for example:

  https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=904385

This patch fixes the problem by extending do_sigaltstack to take the
minimum signal stack size as an additional parameter, allowing the
native and compat system call entry code to pass in their respective
values. COMPAT_SIGMINSTKSZ is just defined as SIGMINSTKSZ if it has not
been defined by the architecture.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Oleg Nesterov <oleg@redhat.com>
Reported-by: Steve McIntyre <steve.mcintyre@arm.com>
Tested-by: Steve McIntyre <93sam@debian.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
 include/linux/compat.h |  3 +++
 kernel/signal.c        | 14 +++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 1a3c4f37e908..de0c13bdcd2c 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -103,6 +103,9 @@ typedef struct compat_sigaltstack {
 	compat_size_t			ss_size;
 } compat_stack_t;
 #endif
+#ifndef COMPAT_MINSIGSTKSZ
+#define COMPAT_MINSIGSTKSZ	MINSIGSTKSZ
+#endif
 
 #define compat_jiffies_to_clock_t(x)	\
 		(((unsigned long)(x) * COMPAT_USER_HZ) / HZ)
diff --git a/kernel/signal.c b/kernel/signal.c
index 5843c541fda9..e4aad0e90882 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3460,7 +3460,8 @@ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 }
 
 static int
-do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp)
+do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp,
+		size_t min_ss_size)
 {
 	struct task_struct *t = current;
 
@@ -3490,7 +3491,7 @@ do_sigaltstack (const stack_t *ss, stack_t *oss, unsigned long sp)
 			ss_size = 0;
 			ss_sp = NULL;
 		} else {
-			if (unlikely(ss_size < MINSIGSTKSZ))
+			if (unlikely(ss_size < min_ss_size))
 				return -ENOMEM;
 		}
 
@@ -3508,7 +3509,8 @@ SYSCALL_DEFINE2(sigaltstack,const stack_t __user *,uss, stack_t __user *,uoss)
 	if (uss && copy_from_user(&new, uss, sizeof(stack_t)))
 		return -EFAULT;
 	err = do_sigaltstack(uss ? &new : NULL, uoss ? &old : NULL,
-			      current_user_stack_pointer());
+			      current_user_stack_pointer(),
+			      MINSIGSTKSZ);
 	if (!err && uoss && copy_to_user(uoss, &old, sizeof(stack_t)))
 		err = -EFAULT;
 	return err;
@@ -3519,7 +3521,8 @@ int restore_altstack(const stack_t __user *uss)
 	stack_t new;
 	if (copy_from_user(&new, uss, sizeof(stack_t)))
 		return -EFAULT;
-	(void)do_sigaltstack(&new, NULL, current_user_stack_pointer());
+	(void)do_sigaltstack(&new, NULL, current_user_stack_pointer(),
+			     MINSIGSTKSZ);
 	/* squash all but EFAULT for now */
 	return 0;
 }
@@ -3553,7 +3556,8 @@ static int do_compat_sigaltstack(const compat_stack_t __user *uss_ptr,
 		uss.ss_size = uss32.ss_size;
 	}
 	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss,
-			     compat_user_stack_pointer());
+			     compat_user_stack_pointer(),
+			     COMPAT_MINSIGSTKSZ);
 	if (ret >= 0 && uoss_ptr)  {
 		compat_stack_t old;
 		memset(&old, 0, sizeof(old));
-- 
cgit v1.2.3


From 2da274cdf998a1c12afa6b5975db2df1df01edf1 Mon Sep 17 00:00:00 2001
From: Zhen Lei <thunder.leizhen@huawei.com>
Date: Thu, 20 Sep 2018 17:10:22 +0100
Subject: iommu/dma: Add support for non-strict mode

With the flush queue infrastructure already abstracted into IOVA
domains, hooking it up in iommu-dma is pretty simple. Since there is a
degree of dependency on the IOMMU driver knowing what to do to play
along, we key the whole thing off a domain attribute which will be set
on default DMA ops domains to request non-strict invalidation. That way,
drivers can indicate the appropriate support by acknowledging the
attribute, and we can easily fall back to strict invalidation otherwise.

The flush queue callback needs a handle on the iommu_domain which owns
our cookie, so we have to add a pointer back to that, but neatly, that's
also sufficient to indicate whether we're using a flush queue or not,
and thus which way to release IOVAs. The only slight subtlety is
switching __iommu_dma_unmap() from calling iommu_unmap() to explicit
iommu_unmap_fast()/iommu_tlb_sync() so that we can elide the sync
entirely in non-strict mode.

Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
[rm: convert to domain attribute, tweak comments and commit message]
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/dma-iommu.c | 32 +++++++++++++++++++++++++++++++-
 include/linux/iommu.h     |  1 +
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 511ff9a1d6d9..cc1bf786cfac 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -55,6 +55,9 @@ struct iommu_dma_cookie {
 	};
 	struct list_head		msi_page_list;
 	spinlock_t			msi_lock;
+
+	/* Domain for flush queue callback; NULL if flush queue not in use */
+	struct iommu_domain		*fq_domain;
 };
 
 static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
@@ -257,6 +260,20 @@ static int iova_reserve_iommu_regions(struct device *dev,
 	return ret;
 }
 
+static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
+{
+	struct iommu_dma_cookie *cookie;
+	struct iommu_domain *domain;
+
+	cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
+	domain = cookie->fq_domain;
+	/*
+	 * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
+	 * implies that ops->flush_iotlb_all must be non-NULL.
+	 */
+	domain->ops->flush_iotlb_all(domain);
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -275,6 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	struct iommu_dma_cookie *cookie = domain->iova_cookie;
 	struct iova_domain *iovad = &cookie->iovad;
 	unsigned long order, base_pfn, end_pfn;
+	int attr;
 
 	if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
 		return -EINVAL;
@@ -308,6 +326,13 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 	}
 
 	init_iova_domain(iovad, 1UL << order, base_pfn);
+
+	if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
+			DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
+		cookie->fq_domain = domain;
+		init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
+	}
+
 	if (!dev)
 		return 0;
 
@@ -393,6 +418,9 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie,
 	/* The MSI case is only ever cleaning up its most recent allocation */
 	if (cookie->type == IOMMU_DMA_MSI_COOKIE)
 		cookie->msi_iova -= size;
+	else if (cookie->fq_domain)	/* non-strict mode */
+		queue_iova(iovad, iova_pfn(iovad, iova),
+				size >> iova_shift(iovad), 0);
 	else
 		free_iova_fast(iovad, iova_pfn(iovad, iova),
 				size >> iova_shift(iovad));
@@ -408,7 +436,9 @@ static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr,
 	dma_addr -= iova_off;
 	size = iova_align(iovad, size + iova_off);
 
-	WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
+	WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
+	if (!cookie->fq_domain)
+		iommu_tlb_sync(domain);
 	iommu_dma_free_iova(cookie, dma_addr, size);
 }
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 87994c265bf5..decabe8e8dbe 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -124,6 +124,7 @@ enum iommu_attr {
 	DOMAIN_ATTR_FSL_PAMU_ENABLE,
 	DOMAIN_ATTR_FSL_PAMUV1,
 	DOMAIN_ATTR_NESTING,	/* two stages of translation */
+	DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
 	DOMAIN_ATTR_MAX,
 };
 
-- 
cgit v1.2.3


From 8ad50c8985d805923f52a80698010a0a5123c07d Mon Sep 17 00:00:00 2001
From: Kristina Martsenko <kristina.martsenko@arm.com>
Date: Wed, 26 Sep 2018 17:32:50 +0100
Subject: vgic: Add support for 52bit guest physical address

Add support for handling 52bit guest physical address to the
VGIC layer. So far we have limited the guest physical address
to 48bits, by explicitly masking the upper bits. This patch
removes the restriction. We do not have to check if the host
supports 52bit as the gpa is always validated during an access.
(e.g, kvm_{read/write}_guest, kvm_is_visible_gfn()).
Also, the ITS table save-restore is also not affected with
the enhancement. The DTE entries already store the bits[51:8]
of the ITT_addr (with a 256byte alignment).

Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Christoffer Dall <cdall@kernel.org>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Kristina Martsenko <kristina.martsenko@arm.com>
[ Macro clean ups, fix PROPBASER and PENDBASER accesses ]
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irqchip/arm-gic-v3.h |  5 +++++
 virt/kvm/arm/vgic/vgic-its.c       | 36 ++++++++++--------------------------
 virt/kvm/arm/vgic/vgic-mmio-v3.c   |  2 --
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 8bdbb5f29494..74b0aa9c7499 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -357,6 +357,8 @@
 #define GITS_CBASER_RaWaWt	GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
 #define GITS_CBASER_RaWaWb	GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
 
+#define GITS_CBASER_ADDRESS(cbaser)	((cbaser) & GENMASK_ULL(51, 12))
+
 #define GITS_BASER_NR_REGS		8
 
 #define GITS_BASER_VALID			(1ULL << 63)
@@ -388,6 +390,9 @@
 #define GITS_BASER_ENTRY_SIZE_MASK	GENMASK_ULL(52, 48)
 #define GITS_BASER_PHYS_52_to_48(phys)					\
 	(((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
+#define GITS_BASER_ADDR_48_to_52(baser)					\
+	(((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
+
 #define GITS_BASER_SHAREABILITY_SHIFT	(10)
 #define GITS_BASER_InnerShareable					\
 	GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 12502251727e..eb2a390a6c86 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -241,13 +241,6 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
 	list_for_each_entry(dev, &(its)->device_list, dev_list) \
 		list_for_each_entry(ite, &(dev)->itt_head, ite_list)
 
-/*
- * We only implement 48 bits of PA at the moment, although the ITS
- * supports more. Let's be restrictive here.
- */
-#define BASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 16))
-#define CBASER_ADDRESS(x)	((x) & GENMASK_ULL(47, 12))
-
 #define GIC_LPI_OFFSET 8192
 
 #define VITS_TYPER_IDBITS 16
@@ -759,6 +752,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
 {
 	int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
 	u64 indirect_ptr, type = GITS_BASER_TYPE(baser);
+	phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser);
 	int esz = GITS_BASER_ENTRY_SIZE(baser);
 	int index;
 	gfn_t gfn;
@@ -783,7 +777,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
 		if (id >= (l1_tbl_size / esz))
 			return false;
 
-		addr = BASER_ADDRESS(baser) + id * esz;
+		addr = base + id * esz;
 		gfn = addr >> PAGE_SHIFT;
 
 		if (eaddr)
@@ -798,7 +792,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
 
 	/* Each 1st level entry is represented by a 64-bit value. */
 	if (kvm_read_guest_lock(its->dev->kvm,
-			   BASER_ADDRESS(baser) + index * sizeof(indirect_ptr),
+			   base + index * sizeof(indirect_ptr),
 			   &indirect_ptr, sizeof(indirect_ptr)))
 		return false;
 
@@ -808,11 +802,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
 	if (!(indirect_ptr & BIT_ULL(63)))
 		return false;
 
-	/*
-	 * Mask the guest physical address and calculate the frame number.
-	 * Any address beyond our supported 48 bits of PA will be caught
-	 * by the actual check in the final step.
-	 */
+	/* Mask the guest physical address and calculate the frame number. */
 	indirect_ptr &= GENMASK_ULL(51, 16);
 
 	/* Find the address of the actual entry */
@@ -1304,9 +1294,6 @@ static u64 vgic_sanitise_its_baser(u64 reg)
 				  GITS_BASER_OUTER_CACHEABILITY_SHIFT,
 				  vgic_sanitise_outer_cacheability);
 
-	/* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */
-	reg &= ~GENMASK_ULL(15, 12);
-
 	/* We support only one (ITS) page size: 64K */
 	reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K;
 
@@ -1325,11 +1312,8 @@ static u64 vgic_sanitise_its_cbaser(u64 reg)
 				  GITS_CBASER_OUTER_CACHEABILITY_SHIFT,
 				  vgic_sanitise_outer_cacheability);
 
-	/*
-	 * Sanitise the physical address to be 64k aligned.
-	 * Also limit the physical addresses to 48 bits.
-	 */
-	reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12));
+	/* Sanitise the physical address to be 64k aligned. */
+	reg &= ~GENMASK_ULL(15, 12);
 
 	return reg;
 }
@@ -1375,7 +1359,7 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its)
 	if (!its->enabled)
 		return;
 
-	cbaser = CBASER_ADDRESS(its->cbaser);
+	cbaser = GITS_CBASER_ADDRESS(its->cbaser);
 
 	while (its->cwriter != its->creadr) {
 		int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr,
@@ -2233,7 +2217,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its)
 	if (!(baser & GITS_BASER_VALID))
 		return 0;
 
-	l1_gpa = BASER_ADDRESS(baser);
+	l1_gpa = GITS_BASER_ADDR_48_to_52(baser);
 
 	if (baser & GITS_BASER_INDIRECT) {
 		l1_esz = GITS_LVL1_ENTRY_SIZE;
@@ -2305,7 +2289,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its)
 {
 	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
 	u64 baser = its->baser_coll_table;
-	gpa_t gpa = BASER_ADDRESS(baser);
+	gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser);
 	struct its_collection *collection;
 	u64 val;
 	size_t max_size, filled = 0;
@@ -2354,7 +2338,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its)
 	if (!(baser & GITS_BASER_VALID))
 		return 0;
 
-	gpa = BASER_ADDRESS(baser);
+	gpa = GITS_BASER_ADDR_48_to_52(baser);
 
 	max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K;
 
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index a2a175b08b17..b3d1f0985117 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg)
 				  vgic_sanitise_outer_cacheability);
 
 	reg &= ~PENDBASER_RES0_MASK;
-	reg &= ~GENMASK_ULL(51, 48);
 
 	return reg;
 }
@@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg)
 				  vgic_sanitise_outer_cacheability);
 
 	reg &= ~PROPBASER_RES0_MASK;
-	reg &= ~GENMASK_ULL(51, 48);
 	return reg;
 }
 
-- 
cgit v1.2.3


From 8bad74f9840f87661f20ced3dc80c84ab4fd55a1 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 28 Sep 2018 14:45:36 +0000
Subject: bpf: extend cgroup bpf core to allow multiple cgroup storage types

In order to introduce per-cpu cgroup storage, let's generalize
bpf cgroup core to support multiple cgroup storage types.
Potentially, per-node cgroup storage can be added later.

This commit is mostly a formal change that replaces
cgroup_storage pointer with a array of cgroup_storage pointers.
It doesn't actually introduce a new storage type,
it will be done later.

Each bpf program is now able to have one cgroup storage of each type.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 38 +++++++++++++++++-------
 include/linux/bpf.h        | 11 +++++--
 kernel/bpf/cgroup.c        | 74 ++++++++++++++++++++++++++++++++--------------
 kernel/bpf/helpers.c       | 15 ++++++----
 kernel/bpf/local_storage.c | 18 ++++++-----
 kernel/bpf/syscall.c       |  9 ++++--
 kernel/bpf/verifier.c      |  8 +++--
 net/bpf/test_run.c         | 20 +++++++++----
 8 files changed, 136 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index f91b0f8ff3a9..e9871b012dac 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -2,6 +2,7 @@
 #ifndef _BPF_CGROUP_H
 #define _BPF_CGROUP_H
 
+#include <linux/bpf.h>
 #include <linux/errno.h>
 #include <linux/jump_label.h>
 #include <linux/percpu.h>
@@ -22,7 +23,10 @@ struct bpf_cgroup_storage;
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
-DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+
+#define for_each_cgroup_storage_type(stype) \
+	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
 
 struct bpf_cgroup_storage_map;
 
@@ -43,7 +47,7 @@ struct bpf_cgroup_storage {
 struct bpf_prog_list {
 	struct list_head node;
 	struct bpf_prog *prog;
-	struct bpf_cgroup_storage *storage;
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 };
 
 struct bpf_prog_array;
@@ -101,18 +105,29 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum bpf_attach_type type);
 
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
+static inline enum bpf_cgroup_storage_type cgroup_storage_type(
+	struct bpf_map *map)
 {
+	return BPF_CGROUP_STORAGE_SHARED;
+}
+
+static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
+					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
+{
+	enum bpf_cgroup_storage_type stype;
 	struct bpf_storage_buffer *buf;
 
-	if (!storage)
-		return;
+	for_each_cgroup_storage_type(stype) {
+		if (!storage[stype])
+			continue;
 
-	buf = READ_ONCE(storage->buf);
-	this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
+		buf = READ_ONCE(storage[stype]->buf);
+		this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]);
+	}
 }
 
-struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
+					enum bpf_cgroup_storage_type stype);
 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
 			     struct cgroup *cgroup,
@@ -265,13 +280,14 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
-static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
+static inline void bpf_cgroup_storage_set(
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
 static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
 					    struct bpf_map *map) { return 0; }
 static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
 					      struct bpf_map *map) {}
 static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
-	struct bpf_prog *prog) { return 0; }
+	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
 static inline void bpf_cgroup_storage_free(
 	struct bpf_cgroup_storage *storage) {}
 
@@ -293,6 +309,8 @@ static inline void bpf_cgroup_storage_free(
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
 
+#define for_each_cgroup_storage_type(stype) for (; false; )
+
 #endif /* CONFIG_CGROUP_BPF */
 
 #endif /* _BPF_CGROUP_H */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 988a00797bcd..b457fbe7b70b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -272,6 +272,13 @@ struct bpf_prog_offload {
 	u32			jited_len;
 };
 
+enum bpf_cgroup_storage_type {
+	BPF_CGROUP_STORAGE_SHARED,
+	__BPF_CGROUP_STORAGE_MAX
+};
+
+#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
+
 struct bpf_prog_aux {
 	atomic_t refcnt;
 	u32 used_map_cnt;
@@ -289,7 +296,7 @@ struct bpf_prog_aux {
 	struct bpf_prog *prog;
 	struct user_struct *user;
 	u64 load_time; /* ns since boottime */
-	struct bpf_map *cgroup_storage;
+	struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 	char name[BPF_OBJ_NAME_LEN];
 #ifdef CONFIG_SECURITY
 	void *security;
@@ -358,7 +365,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
  */
 struct bpf_prog_array_item {
 	struct bpf_prog *prog;
-	struct bpf_cgroup_storage *cgroup_storage;
+	struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
 };
 
 struct bpf_prog_array {
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 549f6fbcc461..00f6ed2e4f9a 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -25,6 +25,7 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
  */
 void cgroup_bpf_put(struct cgroup *cgrp)
 {
+	enum bpf_cgroup_storage_type stype;
 	unsigned int type;
 
 	for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
@@ -34,8 +35,10 @@ void cgroup_bpf_put(struct cgroup *cgrp)
 		list_for_each_entry_safe(pl, tmp, progs, node) {
 			list_del(&pl->node);
 			bpf_prog_put(pl->prog);
-			bpf_cgroup_storage_unlink(pl->storage);
-			bpf_cgroup_storage_free(pl->storage);
+			for_each_cgroup_storage_type(stype) {
+				bpf_cgroup_storage_unlink(pl->storage[stype]);
+				bpf_cgroup_storage_free(pl->storage[stype]);
+			}
 			kfree(pl);
 			static_branch_dec(&cgroup_bpf_enabled_key);
 		}
@@ -97,6 +100,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
 				   enum bpf_attach_type type,
 				   struct bpf_prog_array __rcu **array)
 {
+	enum bpf_cgroup_storage_type stype;
 	struct bpf_prog_array *progs;
 	struct bpf_prog_list *pl;
 	struct cgroup *p = cgrp;
@@ -125,7 +129,9 @@ static int compute_effective_progs(struct cgroup *cgrp,
 				continue;
 
 			progs->items[cnt].prog = pl->prog;
-			progs->items[cnt].cgroup_storage = pl->storage;
+			for_each_cgroup_storage_type(stype)
+				progs->items[cnt].cgroup_storage[stype] =
+					pl->storage[stype];
 			cnt++;
 		}
 	} while ((p = cgroup_parent(p)));
@@ -232,7 +238,9 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 {
 	struct list_head *progs = &cgrp->bpf.progs[type];
 	struct bpf_prog *old_prog = NULL;
-	struct bpf_cgroup_storage *storage, *old_storage = NULL;
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
+		*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
+	enum bpf_cgroup_storage_type stype;
 	struct bpf_prog_list *pl;
 	bool pl_was_allocated;
 	int err;
@@ -254,34 +262,44 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 	if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
 		return -E2BIG;
 
-	storage = bpf_cgroup_storage_alloc(prog);
-	if (IS_ERR(storage))
-		return -ENOMEM;
+	for_each_cgroup_storage_type(stype) {
+		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+		if (IS_ERR(storage[stype])) {
+			storage[stype] = NULL;
+			for_each_cgroup_storage_type(stype)
+				bpf_cgroup_storage_free(storage[stype]);
+			return -ENOMEM;
+		}
+	}
 
 	if (flags & BPF_F_ALLOW_MULTI) {
 		list_for_each_entry(pl, progs, node) {
 			if (pl->prog == prog) {
 				/* disallow attaching the same prog twice */
-				bpf_cgroup_storage_free(storage);
+				for_each_cgroup_storage_type(stype)
+					bpf_cgroup_storage_free(storage[stype]);
 				return -EINVAL;
 			}
 		}
 
 		pl = kmalloc(sizeof(*pl), GFP_KERNEL);
 		if (!pl) {
-			bpf_cgroup_storage_free(storage);
+			for_each_cgroup_storage_type(stype)
+				bpf_cgroup_storage_free(storage[stype]);
 			return -ENOMEM;
 		}
 
 		pl_was_allocated = true;
 		pl->prog = prog;
-		pl->storage = storage;
+		for_each_cgroup_storage_type(stype)
+			pl->storage[stype] = storage[stype];
 		list_add_tail(&pl->node, progs);
 	} else {
 		if (list_empty(progs)) {
 			pl = kmalloc(sizeof(*pl), GFP_KERNEL);
 			if (!pl) {
-				bpf_cgroup_storage_free(storage);
+				for_each_cgroup_storage_type(stype)
+					bpf_cgroup_storage_free(storage[stype]);
 				return -ENOMEM;
 			}
 			pl_was_allocated = true;
@@ -289,12 +307,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 		} else {
 			pl = list_first_entry(progs, typeof(*pl), node);
 			old_prog = pl->prog;
-			old_storage = pl->storage;
-			bpf_cgroup_storage_unlink(old_storage);
+			for_each_cgroup_storage_type(stype) {
+				old_storage[stype] = pl->storage[stype];
+				bpf_cgroup_storage_unlink(old_storage[stype]);
+			}
 			pl_was_allocated = false;
 		}
 		pl->prog = prog;
-		pl->storage = storage;
+		for_each_cgroup_storage_type(stype)
+			pl->storage[stype] = storage[stype];
 	}
 
 	cgrp->bpf.flags[type] = flags;
@@ -304,21 +325,27 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
 		goto cleanup;
 
 	static_branch_inc(&cgroup_bpf_enabled_key);
-	if (old_storage)
-		bpf_cgroup_storage_free(old_storage);
+	for_each_cgroup_storage_type(stype) {
+		if (!old_storage[stype])
+			continue;
+		bpf_cgroup_storage_free(old_storage[stype]);
+	}
 	if (old_prog) {
 		bpf_prog_put(old_prog);
 		static_branch_dec(&cgroup_bpf_enabled_key);
 	}
-	bpf_cgroup_storage_link(storage, cgrp, type);
+	for_each_cgroup_storage_type(stype)
+		bpf_cgroup_storage_link(storage[stype], cgrp, type);
 	return 0;
 
 cleanup:
 	/* and cleanup the prog list */
 	pl->prog = old_prog;
-	bpf_cgroup_storage_free(pl->storage);
-	pl->storage = old_storage;
-	bpf_cgroup_storage_link(old_storage, cgrp, type);
+	for_each_cgroup_storage_type(stype) {
+		bpf_cgroup_storage_free(pl->storage[stype]);
+		pl->storage[stype] = old_storage[stype];
+		bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
+	}
 	if (pl_was_allocated) {
 		list_del(&pl->node);
 		kfree(pl);
@@ -339,6 +366,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 			enum bpf_attach_type type, u32 unused_flags)
 {
 	struct list_head *progs = &cgrp->bpf.progs[type];
+	enum bpf_cgroup_storage_type stype;
 	u32 flags = cgrp->bpf.flags[type];
 	struct bpf_prog *old_prog = NULL;
 	struct bpf_prog_list *pl;
@@ -385,8 +413,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 
 	/* now can actually delete it from this cgroup list */
 	list_del(&pl->node);
-	bpf_cgroup_storage_unlink(pl->storage);
-	bpf_cgroup_storage_free(pl->storage);
+	for_each_cgroup_storage_type(stype) {
+		bpf_cgroup_storage_unlink(pl->storage[stype]);
+		bpf_cgroup_storage_free(pl->storage[stype]);
+	}
 	kfree(pl);
 	if (list_empty(progs))
 		/* last program was detached, reset flags to zero */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1991466b8327..9070b2ace6aa 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -194,16 +194,18 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
-DECLARE_PER_CPU(void*, bpf_cgroup_storage);
+#ifdef CONFIG_CGROUP_BPF
+DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 {
-	/* map and flags arguments are not used now,
-	 * but provide an ability to extend the API
-	 * for other types of local storages.
-	 * verifier checks that their values are correct.
+	/* flags argument is not used now,
+	 * but provides an ability to extend the API.
+	 * verifier checks that its value is correct.
 	 */
-	return (unsigned long) this_cpu_read(bpf_cgroup_storage);
+	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+
+	return (unsigned long) this_cpu_read(bpf_cgroup_storage[stype]);
 }
 
 const struct bpf_func_proto bpf_get_local_storage_proto = {
@@ -214,3 +216,4 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 #endif
+#endif
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 22ad967d1e5f..0bd9f19fc557 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -7,7 +7,7 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 
-DEFINE_PER_CPU(void*, bpf_cgroup_storage);
+DEFINE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 #ifdef CONFIG_CGROUP_BPF
 
@@ -251,6 +251,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
 
 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
 {
+	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 	int ret = -EBUSY;
 
@@ -258,11 +259,12 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
 
 	if (map->prog && map->prog != prog)
 		goto unlock;
-	if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
+	if (prog->aux->cgroup_storage[stype] &&
+	    prog->aux->cgroup_storage[stype] != _map)
 		goto unlock;
 
 	map->prog = prog;
-	prog->aux->cgroup_storage = _map;
+	prog->aux->cgroup_storage[stype] = _map;
 	ret = 0;
 unlock:
 	spin_unlock_bh(&map->lock);
@@ -272,24 +274,26 @@ unlock:
 
 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
 {
+	enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
 	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
 
 	spin_lock_bh(&map->lock);
 	if (map->prog == prog) {
-		WARN_ON(prog->aux->cgroup_storage != _map);
+		WARN_ON(prog->aux->cgroup_storage[stype] != _map);
 		map->prog = NULL;
-		prog->aux->cgroup_storage = NULL;
+		prog->aux->cgroup_storage[stype] = NULL;
 	}
 	spin_unlock_bh(&map->lock);
 }
 
-struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
+struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
+					enum bpf_cgroup_storage_type stype)
 {
 	struct bpf_cgroup_storage *storage;
 	struct bpf_map *map;
 	u32 pages;
 
-	map = prog->aux->cgroup_storage;
+	map = prog->aux->cgroup_storage[stype];
 	if (!map)
 		return NULL;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b3c2d09bcf7a..8c91d2b41b1e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -988,10 +988,15 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 /* drop refcnt on maps used by eBPF program and free auxilary data */
 static void free_used_maps(struct bpf_prog_aux *aux)
 {
+	enum bpf_cgroup_storage_type stype;
 	int i;
 
-	if (aux->cgroup_storage)
-		bpf_cgroup_storage_release(aux->prog, aux->cgroup_storage);
+	for_each_cgroup_storage_type(stype) {
+		if (!aux->cgroup_storage[stype])
+			continue;
+		bpf_cgroup_storage_release(aux->prog,
+					   aux->cgroup_storage[stype]);
+	}
 
 	for (i = 0; i < aux->used_map_cnt; i++)
 		bpf_map_put(aux->used_maps[i]);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e986518d7bc3..e90899df585d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5171,11 +5171,15 @@ next_insn:
 /* drop refcnt of maps used by the rejected program */
 static void release_maps(struct bpf_verifier_env *env)
 {
+	enum bpf_cgroup_storage_type stype;
 	int i;
 
-	if (env->prog->aux->cgroup_storage)
+	for_each_cgroup_storage_type(stype) {
+		if (!env->prog->aux->cgroup_storage[stype])
+			continue;
 		bpf_cgroup_storage_release(env->prog,
-					   env->prog->aux->cgroup_storage);
+			env->prog->aux->cgroup_storage[stype]);
+	}
 
 	for (i = 0; i < env->used_map_cnt; i++)
 		bpf_map_put(env->used_maps[i]);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f4078830ea50..0c423b8cd75c 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -12,7 +12,7 @@
 #include <linux/sched/signal.h>
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
-					    struct bpf_cgroup_storage *storage)
+		struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
 {
 	u32 ret;
 
@@ -28,13 +28,20 @@ static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
 
 static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
 {
-	struct bpf_cgroup_storage *storage = NULL;
+	struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { 0 };
+	enum bpf_cgroup_storage_type stype;
 	u64 time_start, time_spent = 0;
 	u32 ret = 0, i;
 
-	storage = bpf_cgroup_storage_alloc(prog);
-	if (IS_ERR(storage))
-		return PTR_ERR(storage);
+	for_each_cgroup_storage_type(stype) {
+		storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
+		if (IS_ERR(storage[stype])) {
+			storage[stype] = NULL;
+			for_each_cgroup_storage_type(stype)
+				bpf_cgroup_storage_free(storage[stype]);
+			return -ENOMEM;
+		}
+	}
 
 	if (!repeat)
 		repeat = 1;
@@ -53,7 +60,8 @@ static u32 bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *time)
 	do_div(time_spent, repeat);
 	*time = time_spent > U32_MAX ? U32_MAX : (u32)time_spent;
 
-	bpf_cgroup_storage_free(storage);
+	for_each_cgroup_storage_type(stype)
+		bpf_cgroup_storage_free(storage[stype]);
 
 	return ret;
 }
-- 
cgit v1.2.3


From f294b37ec7b24a574884cd157497a3748081c0f0 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 28 Sep 2018 14:45:40 +0000
Subject: bpf: rework cgroup storage pointer passing

To simplify the following introduction of per-cpu cgroup storage,
let's rework a bit a mechanism of passing a pointer to a cgroup
storage into the bpf_get_local_storage(). Let's save a pointer
to the corresponding bpf_cgroup_storage structure, instead of
a pointer to the actual buffer.

It will help us to handle per-cpu storage later, which has
a different way of accessing to the actual data.

Signed-off-by: Roman Gushchin <guro@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h | 13 ++++---------
 kernel/bpf/helpers.c       |  8 ++++++--
 kernel/bpf/local_storage.c |  3 ++-
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index e9871b012dac..7e0c9a1d48b7 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -23,7 +23,8 @@ struct bpf_cgroup_storage;
 extern struct static_key_false cgroup_bpf_enabled_key;
 #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
 
-DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DECLARE_PER_CPU(struct bpf_cgroup_storage*,
+		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 #define for_each_cgroup_storage_type(stype) \
 	for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
@@ -115,15 +116,9 @@ static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
 					  *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
 {
 	enum bpf_cgroup_storage_type stype;
-	struct bpf_storage_buffer *buf;
-
-	for_each_cgroup_storage_type(stype) {
-		if (!storage[stype])
-			continue;
 
-		buf = READ_ONCE(storage[stype]->buf);
-		this_cpu_write(bpf_cgroup_storage[stype], &buf->data[0]);
-	}
+	for_each_cgroup_storage_type(stype)
+		this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
 }
 
 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9070b2ace6aa..e42f8789b7ea 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -195,7 +195,8 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
 };
 
 #ifdef CONFIG_CGROUP_BPF
-DECLARE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DECLARE_PER_CPU(struct bpf_cgroup_storage*,
+		bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 {
@@ -204,8 +205,11 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 	 * verifier checks that its value is correct.
 	 */
 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
+	struct bpf_cgroup_storage *storage;
 
-	return (unsigned long) this_cpu_read(bpf_cgroup_storage[stype]);
+	storage = this_cpu_read(bpf_cgroup_storage[stype]);
+
+	return (unsigned long)&READ_ONCE(storage->buf)->data[0];
 }
 
 const struct bpf_func_proto bpf_get_local_storage_proto = {
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 0bd9f19fc557..6742292fb39e 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -7,7 +7,8 @@
 #include <linux/rbtree.h>
 #include <linux/slab.h>
 
-DEFINE_PER_CPU(void*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
+DEFINE_PER_CPU(struct bpf_cgroup_storage*,
+	       bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
 
 #ifdef CONFIG_CGROUP_BPF
 
-- 
cgit v1.2.3


From b741f1630346defcbc8cc60f1a2bdae8b3b0036f Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 28 Sep 2018 14:45:43 +0000
Subject: bpf: introduce per-cpu cgroup local storage

This commit introduced per-cpu cgroup local storage.

Per-cpu cgroup local storage is very similar to simple cgroup storage
(let's call it shared), except all the data is per-cpu.

The main goal of per-cpu variant is to implement super fast
counters (e.g. packet counters), which don't require neither
lookups, neither atomic operations.

>From userspace's point of view, accessing a per-cpu cgroup storage
is similar to other per-cpu map types (e.g. per-cpu hashmaps and
arrays).

Writing to a per-cpu cgroup storage is not atomic, but is performed
by copying longs, so some minimal atomicity is here, exactly
as with other per-cpu maps.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf-cgroup.h |  20 +++++-
 include/linux/bpf.h        |   1 +
 include/linux/bpf_types.h  |   1 +
 include/uapi/linux/bpf.h   |   1 +
 kernel/bpf/helpers.c       |   8 ++-
 kernel/bpf/local_storage.c | 150 +++++++++++++++++++++++++++++++++++++++------
 kernel/bpf/syscall.c       |  11 +++-
 kernel/bpf/verifier.c      |  15 +++--
 8 files changed, 179 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 7e0c9a1d48b7..588dd5f0bd85 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -37,7 +37,10 @@ struct bpf_storage_buffer {
 };
 
 struct bpf_cgroup_storage {
-	struct bpf_storage_buffer *buf;
+	union {
+		struct bpf_storage_buffer *buf;
+		void __percpu *percpu_buf;
+	};
 	struct bpf_cgroup_storage_map *map;
 	struct bpf_cgroup_storage_key key;
 	struct list_head list;
@@ -109,6 +112,9 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 static inline enum bpf_cgroup_storage_type cgroup_storage_type(
 	struct bpf_map *map)
 {
+	if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+		return BPF_CGROUP_STORAGE_PERCPU;
+
 	return BPF_CGROUP_STORAGE_SHARED;
 }
 
@@ -131,6 +137,10 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
 int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
 void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
 
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
+int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
+				     void *value, u64 flags);
+
 /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb)			      \
 ({									      \
@@ -285,6 +295,14 @@ static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
 	struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
 static inline void bpf_cgroup_storage_free(
 	struct bpf_cgroup_storage *storage) {}
+static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
+						 void *value) {
+	return 0;
+}
+static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
+					void *key, void *value, u64 flags) {
+	return 0;
+}
 
 #define cgroup_bpf_enabled (0)
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b457fbe7b70b..018299a595c8 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -274,6 +274,7 @@ struct bpf_prog_offload {
 
 enum bpf_cgroup_storage_type {
 	BPF_CGROUP_STORAGE_SHARED,
+	BPF_CGROUP_STORAGE_PERCPU,
 	__BPF_CGROUP_STORAGE_MAX
 };
 
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index c9bd6fb765b0..5432f4c9f50e 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -43,6 +43,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
 #endif
 #ifdef CONFIG_CGROUP_BPF
 BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index aa5ccd2385ed..e2070d819e04 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -127,6 +127,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_SOCKHASH,
 	BPF_MAP_TYPE_CGROUP_STORAGE,
 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
 };
 
 enum bpf_prog_type {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index e42f8789b7ea..6502115e8f55 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -206,10 +206,16 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
 	 */
 	enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
 	struct bpf_cgroup_storage *storage;
+	void *ptr;
 
 	storage = this_cpu_read(bpf_cgroup_storage[stype]);
 
-	return (unsigned long)&READ_ONCE(storage->buf)->data[0];
+	if (stype == BPF_CGROUP_STORAGE_SHARED)
+		ptr = &READ_ONCE(storage->buf)->data[0];
+	else
+		ptr = this_cpu_ptr(storage->percpu_buf);
+
+	return (unsigned long)ptr;
 }
 
 const struct bpf_func_proto bpf_get_local_storage_proto = {
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 6742292fb39e..944eb297465f 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -152,6 +152,71 @@ static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
 	return 0;
 }
 
+int bpf_percpu_cgroup_storage_copy(struct bpf_map *_map, void *_key,
+				   void *value)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+	struct bpf_cgroup_storage_key *key = _key;
+	struct bpf_cgroup_storage *storage;
+	int cpu, off = 0;
+	u32 size;
+
+	rcu_read_lock();
+	storage = cgroup_storage_lookup(map, key, false);
+	if (!storage) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
+
+	/* per_cpu areas are zero-filled and bpf programs can only
+	 * access 'value_size' of them, so copying rounded areas
+	 * will not leak any kernel data
+	 */
+	size = round_up(_map->value_size, 8);
+	for_each_possible_cpu(cpu) {
+		bpf_long_memcpy(value + off,
+				per_cpu_ptr(storage->percpu_buf, cpu), size);
+		off += size;
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
+				     void *value, u64 map_flags)
+{
+	struct bpf_cgroup_storage_map *map = map_to_storage(_map);
+	struct bpf_cgroup_storage_key *key = _key;
+	struct bpf_cgroup_storage *storage;
+	int cpu, off = 0;
+	u32 size;
+
+	if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
+		return -EINVAL;
+
+	rcu_read_lock();
+	storage = cgroup_storage_lookup(map, key, false);
+	if (!storage) {
+		rcu_read_unlock();
+		return -ENOENT;
+	}
+
+	/* the user space will provide round_up(value_size, 8) bytes that
+	 * will be copied into per-cpu area. bpf programs can only access
+	 * value_size of it. During lookup the same extra bytes will be
+	 * returned or zeros which were zero-filled by percpu_alloc,
+	 * so no kernel data leaks possible
+	 */
+	size = round_up(_map->value_size, 8);
+	for_each_possible_cpu(cpu) {
+		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
+				value + off, size);
+		off += size;
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
 static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
 				       void *_next_key)
 {
@@ -287,60 +352,105 @@ void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
 	spin_unlock_bh(&map->lock);
 }
 
+static size_t bpf_cgroup_storage_calculate_size(struct bpf_map *map, u32 *pages)
+{
+	size_t size;
+
+	if (cgroup_storage_type(map) == BPF_CGROUP_STORAGE_SHARED) {
+		size = sizeof(struct bpf_storage_buffer) + map->value_size;
+		*pages = round_up(sizeof(struct bpf_cgroup_storage) + size,
+				  PAGE_SIZE) >> PAGE_SHIFT;
+	} else {
+		size = map->value_size;
+		*pages = round_up(round_up(size, 8) * num_possible_cpus(),
+				  PAGE_SIZE) >> PAGE_SHIFT;
+	}
+
+	return size;
+}
+
 struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
 					enum bpf_cgroup_storage_type stype)
 {
 	struct bpf_cgroup_storage *storage;
 	struct bpf_map *map;
+	gfp_t flags;
+	size_t size;
 	u32 pages;
 
 	map = prog->aux->cgroup_storage[stype];
 	if (!map)
 		return NULL;
 
-	pages = round_up(sizeof(struct bpf_cgroup_storage) +
-			 sizeof(struct bpf_storage_buffer) +
-			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
+	size = bpf_cgroup_storage_calculate_size(map, &pages);
+
 	if (bpf_map_charge_memlock(map, pages))
 		return ERR_PTR(-EPERM);
 
 	storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
 			       __GFP_ZERO | GFP_USER, map->numa_node);
-	if (!storage) {
-		bpf_map_uncharge_memlock(map, pages);
-		return ERR_PTR(-ENOMEM);
-	}
+	if (!storage)
+		goto enomem;
 
-	storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
-				    map->value_size, __GFP_ZERO | GFP_USER,
-				    map->numa_node);
-	if (!storage->buf) {
-		bpf_map_uncharge_memlock(map, pages);
-		kfree(storage);
-		return ERR_PTR(-ENOMEM);
+	flags = __GFP_ZERO | GFP_USER;
+
+	if (stype == BPF_CGROUP_STORAGE_SHARED) {
+		storage->buf = kmalloc_node(size, flags, map->numa_node);
+		if (!storage->buf)
+			goto enomem;
+	} else {
+		storage->percpu_buf = __alloc_percpu_gfp(size, 8, flags);
+		if (!storage->percpu_buf)
+			goto enomem;
 	}
 
 	storage->map = (struct bpf_cgroup_storage_map *)map;
 
 	return storage;
+
+enomem:
+	bpf_map_uncharge_memlock(map, pages);
+	kfree(storage);
+	return ERR_PTR(-ENOMEM);
+}
+
+static void free_shared_cgroup_storage_rcu(struct rcu_head *rcu)
+{
+	struct bpf_cgroup_storage *storage =
+		container_of(rcu, struct bpf_cgroup_storage, rcu);
+
+	kfree(storage->buf);
+	kfree(storage);
+}
+
+static void free_percpu_cgroup_storage_rcu(struct rcu_head *rcu)
+{
+	struct bpf_cgroup_storage *storage =
+		container_of(rcu, struct bpf_cgroup_storage, rcu);
+
+	free_percpu(storage->percpu_buf);
+	kfree(storage);
 }
 
 void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
 {
-	u32 pages;
+	enum bpf_cgroup_storage_type stype;
 	struct bpf_map *map;
+	u32 pages;
 
 	if (!storage)
 		return;
 
 	map = &storage->map->map;
-	pages = round_up(sizeof(struct bpf_cgroup_storage) +
-			 sizeof(struct bpf_storage_buffer) +
-			 map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
+
+	bpf_cgroup_storage_calculate_size(map, &pages);
 	bpf_map_uncharge_memlock(map, pages);
 
-	kfree_rcu(storage->buf, rcu);
-	kfree_rcu(storage, rcu);
+	stype = cgroup_storage_type(map);
+	if (stype == BPF_CGROUP_STORAGE_SHARED)
+		call_rcu(&storage->rcu, free_shared_cgroup_storage_rcu);
+	else
+		call_rcu(&storage->rcu, free_percpu_cgroup_storage_rcu);
 }
 
 void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8c91d2b41b1e..5742df21598c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -686,7 +686,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
 	else if (IS_FD_MAP(map))
 		value_size = sizeof(u32);
@@ -705,6 +706,8 @@ static int map_lookup_elem(union bpf_attr *attr)
 		err = bpf_percpu_hash_copy(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 		err = bpf_percpu_array_copy(map, key, value);
+	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+		err = bpf_percpu_cgroup_storage_copy(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
 		err = bpf_stackmap_copy(map, key, value);
 	} else if (IS_FD_ARRAY(map)) {
@@ -774,7 +777,8 @@ static int map_update_elem(union bpf_attr *attr)
 
 	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
+	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
 	else
 		value_size = map->value_size;
@@ -809,6 +813,9 @@ static int map_update_elem(union bpf_attr *attr)
 		err = bpf_percpu_hash_update(map, key, value, attr->flags);
 	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 		err = bpf_percpu_array_update(map, key, value, attr->flags);
+	} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
+		err = bpf_percpu_cgroup_storage_update(map, key, value,
+						       attr->flags);
 	} else if (IS_FD_ARRAY(map)) {
 		rcu_read_lock();
 		err = bpf_fd_array_map_update_elem(map, f.file, key, value,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e90899df585d..a8cc83a970d1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2074,6 +2074,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_STORAGE:
+	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
 		if (func_id != BPF_FUNC_get_local_storage)
 			goto error;
 		break;
@@ -2164,7 +2165,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 			goto error;
 		break;
 	case BPF_FUNC_get_local_storage:
-		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE)
+		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
+		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
 			goto error;
 		break;
 	case BPF_FUNC_sk_select_reuseport:
@@ -5049,6 +5051,12 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
+{
+	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
+		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+}
+
 /* look for pseudo eBPF instructions that access map FDs and
  * replace them with actual map pointers
  */
@@ -5139,10 +5147,9 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 			}
 			env->used_maps[env->used_map_cnt++] = map;
 
-			if (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE &&
+			if (bpf_map_is_cgroup_storage(map) &&
 			    bpf_cgroup_storage_assign(env->prog, map)) {
-				verbose(env,
-					"only one cgroup storage is allowed\n");
+				verbose(env, "only one cgroup storage of each type is allowed\n");
 				fdput(f);
 				return -EBUSY;
 			}
-- 
cgit v1.2.3


From c6d4381220a0087ce19dbf6984d92c451bd6b364 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 6 Sep 2018 19:27:24 -0400
Subject: dma-mapping: make the get_required_mask method available
 unconditionally

This save some duplication for ia64, and makes the interface more
general.  In the long run we want each dma_map_ops instance to fill this
out, but this will take a little more prep work.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 arch/ia64/include/asm/dma-mapping.h  |  2 --
 arch/ia64/include/asm/machvec.h      |  7 -------
 arch/ia64/include/asm/machvec_init.h |  1 -
 arch/ia64/include/asm/machvec_sn2.h  |  2 --
 arch/ia64/pci/pci.c                  | 26 --------------------------
 arch/ia64/sn/pci/pci_dma.c           |  4 ++--
 drivers/base/platform.c              | 13 +++++++++++--
 drivers/pci/controller/vmd.c         |  4 ----
 include/linux/dma-mapping.h          |  2 --
 9 files changed, 13 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h
index 76e4d6632d68..522745ae67bb 100644
--- a/arch/ia64/include/asm/dma-mapping.h
+++ b/arch/ia64/include/asm/dma-mapping.h
@@ -10,8 +10,6 @@
 #include <linux/scatterlist.h>
 #include <linux/dma-debug.h>
 
-#define ARCH_HAS_DMA_GET_REQUIRED_MASK
-
 extern const struct dma_map_ops *dma_ops;
 extern struct ia64_machine_vector ia64_mv;
 extern void set_iommu_machvec(void);
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 267f4f170191..5133739966bc 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -44,7 +44,6 @@ typedef void ia64_mv_kernel_launch_event_t(void);
 
 /* DMA-mapping interface: */
 typedef void ia64_mv_dma_init (void);
-typedef u64 ia64_mv_dma_get_required_mask (struct device *);
 typedef const struct dma_map_ops *ia64_mv_dma_get_ops(struct device *);
 
 /*
@@ -127,7 +126,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
 #  define platform_global_tlb_purge	ia64_mv.global_tlb_purge
 #  define platform_tlb_migrate_finish	ia64_mv.tlb_migrate_finish
 #  define platform_dma_init		ia64_mv.dma_init
-#  define platform_dma_get_required_mask ia64_mv.dma_get_required_mask
 #  define platform_dma_get_ops		ia64_mv.dma_get_ops
 #  define platform_irq_to_vector	ia64_mv.irq_to_vector
 #  define platform_local_vector_to_irq	ia64_mv.local_vector_to_irq
@@ -171,7 +169,6 @@ struct ia64_machine_vector {
 	ia64_mv_global_tlb_purge_t *global_tlb_purge;
 	ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
 	ia64_mv_dma_init *dma_init;
-	ia64_mv_dma_get_required_mask *dma_get_required_mask;
 	ia64_mv_dma_get_ops *dma_get_ops;
 	ia64_mv_irq_to_vector *irq_to_vector;
 	ia64_mv_local_vector_to_irq *local_vector_to_irq;
@@ -211,7 +208,6 @@ struct ia64_machine_vector {
 	platform_global_tlb_purge,		\
 	platform_tlb_migrate_finish,		\
 	platform_dma_init,			\
-	platform_dma_get_required_mask,		\
 	platform_dma_get_ops,			\
 	platform_irq_to_vector,			\
 	platform_local_vector_to_irq,		\
@@ -286,9 +282,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *);
 #ifndef platform_dma_get_ops
 # define platform_dma_get_ops		dma_get_ops
 #endif
-#ifndef platform_dma_get_required_mask
-# define  platform_dma_get_required_mask	ia64_dma_get_required_mask
-#endif
 #ifndef platform_irq_to_vector
 # define platform_irq_to_vector		__ia64_irq_to_vector
 #endif
diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h
index 2b32fd06b7c6..2aafb69a3787 100644
--- a/arch/ia64/include/asm/machvec_init.h
+++ b/arch/ia64/include/asm/machvec_init.h
@@ -4,7 +4,6 @@
 
 extern ia64_mv_send_ipi_t ia64_send_ipi;
 extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge;
-extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask;
 extern ia64_mv_irq_to_vector __ia64_irq_to_vector;
 extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq;
 extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem;
diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h
index ece9fa85be88..b5153d300289 100644
--- a/arch/ia64/include/asm/machvec_sn2.h
+++ b/arch/ia64/include/asm/machvec_sn2.h
@@ -55,7 +55,6 @@ extern ia64_mv_readb_t __sn_readb_relaxed;
 extern ia64_mv_readw_t __sn_readw_relaxed;
 extern ia64_mv_readl_t __sn_readl_relaxed;
 extern ia64_mv_readq_t __sn_readq_relaxed;
-extern ia64_mv_dma_get_required_mask	sn_dma_get_required_mask;
 extern ia64_mv_dma_init			sn_dma_init;
 extern ia64_mv_migrate_t		sn_migrate;
 extern ia64_mv_kernel_launch_event_t	sn_kernel_launch_event;
@@ -100,7 +99,6 @@ extern ia64_mv_pci_fixup_bus_t		sn_pci_fixup_bus;
 #define platform_pci_get_legacy_mem	sn_pci_get_legacy_mem
 #define platform_pci_legacy_read	sn_pci_legacy_read
 #define platform_pci_legacy_write	sn_pci_legacy_write
-#define platform_dma_get_required_mask	sn_dma_get_required_mask
 #define platform_dma_init		sn_dma_init
 #define platform_migrate		sn_migrate
 #define platform_kernel_launch_event    sn_kernel_launch_event
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 7ccc64d5fe3e..5d71800df431 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -568,32 +568,6 @@ static void __init set_pci_dfl_cacheline_size(void)
 	pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4;
 }
 
-u64 ia64_dma_get_required_mask(struct device *dev)
-{
-	u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
-	u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
-	u64 mask;
-
-	if (!high_totalram) {
-		/* convert to mask just covering totalram */
-		low_totalram = (1 << (fls(low_totalram) - 1));
-		low_totalram += low_totalram - 1;
-		mask = low_totalram;
-	} else {
-		high_totalram = (1 << (fls(high_totalram) - 1));
-		high_totalram += high_totalram - 1;
-		mask = (((u64)high_totalram) << 32) + 0xffffffff;
-	}
-	return mask;
-}
-EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask);
-
-u64 dma_get_required_mask(struct device *dev)
-{
-	return platform_dma_get_required_mask(dev);
-}
-EXPORT_SYMBOL_GPL(dma_get_required_mask);
-
 static int __init pcibios_init(void)
 {
 	set_pci_dfl_cacheline_size();
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
index 74c934a997bb..96eb2567718a 100644
--- a/arch/ia64/sn/pci/pci_dma.c
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -344,11 +344,10 @@ static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 	return 0;
 }
 
-u64 sn_dma_get_required_mask(struct device *dev)
+static u64 sn_dma_get_required_mask(struct device *dev)
 {
 	return DMA_BIT_MASK(64);
 }
-EXPORT_SYMBOL_GPL(sn_dma_get_required_mask);
 
 char *sn_pci_get_legacy_mem(struct pci_bus *bus)
 {
@@ -473,6 +472,7 @@ static struct dma_map_ops sn_dma_ops = {
 	.sync_sg_for_device	= sn_dma_sync_sg_for_device,
 	.mapping_error		= sn_dma_mapping_error,
 	.dma_supported		= sn_dma_supported,
+	.get_required_mask	= sn_dma_get_required_mask,
 };
 
 void sn_dma_init(void)
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index dff82a3c2caa..cfe22fded980 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1179,8 +1179,7 @@ int __init platform_bus_init(void)
 	return error;
 }
 
-#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
-u64 dma_get_required_mask(struct device *dev)
+static u64 dma_default_get_required_mask(struct device *dev)
 {
 	u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT);
 	u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT));
@@ -1198,6 +1197,16 @@ u64 dma_get_required_mask(struct device *dev)
 	}
 	return mask;
 }
+
+#ifndef ARCH_HAS_DMA_GET_REQUIRED_MASK
+u64 dma_get_required_mask(struct device *dev)
+{
+	const struct dma_map_ops *ops = get_dma_ops(dev);
+
+	if (ops->get_required_mask)
+		return ops->get_required_mask(dev);
+	return dma_default_get_required_mask(dev);
+}
 EXPORT_SYMBOL_GPL(dma_get_required_mask);
 #endif
 
diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index fd2dbd7eed7b..f31ed62d518c 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -404,12 +404,10 @@ static int vmd_dma_supported(struct device *dev, u64 mask)
 	return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask);
 }
 
-#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 static u64 vmd_get_required_mask(struct device *dev)
 {
 	return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev));
 }
-#endif
 
 static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
 {
@@ -450,9 +448,7 @@ static void vmd_setup_dma_ops(struct vmd_dev *vmd)
 	ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device);
 	ASSIGN_VMD_DMA_OPS(source, dest, mapping_error);
 	ASSIGN_VMD_DMA_OPS(source, dest, dma_supported);
-#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 	ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask);
-#endif
 	add_dma_domain(domain);
 }
 #undef ASSIGN_VMD_DMA_OPS
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index d23fc45c8208..562af6b45f23 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -130,9 +130,7 @@ struct dma_map_ops {
 			enum dma_data_direction direction);
 	int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
 	int (*dma_supported)(struct device *dev, u64 mask);
-#ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
 	u64 (*get_required_mask)(struct device *dev);
-#endif
 };
 
 extern const struct dma_map_ops dma_direct_ops;
-- 
cgit v1.2.3


From a20bb058375147cb639c7aa17ef86ad68b32d847 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Sep 2018 13:26:13 +0200
Subject: dma-direct: add an explicit dma_direct_get_required_mask

This is somewhat modelled after the powerpc version, and differs from
the legacy fallback in use fls64 instead of pointlessly splitting up the
address into low and high dwords and in that it takes (__)phys_to_dma
into account.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
---
 include/linux/dma-direct.h |  1 +
 kernel/dma/direct.c        | 22 +++++++++++++++++++---
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 86a59ba5a7f3..b79496d8c75b 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -55,6 +55,7 @@ static inline void dma_mark_clean(void *addr, size_t size)
 }
 #endif /* CONFIG_ARCH_HAS_DMA_MARK_CLEAN */
 
+u64 dma_direct_get_required_mask(struct device *dev);
 void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs);
 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index c954f0a6dc62..f32b33cfa331 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -4,6 +4,7 @@
  *
  * DMA operations that map physical memory directly without using an IOMMU.
  */
+#include <linux/bootmem.h> /* for max_pfn */
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/dma-direct.h>
@@ -53,11 +54,25 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
 	return true;
 }
 
+static inline dma_addr_t phys_to_dma_direct(struct device *dev,
+		phys_addr_t phys)
+{
+	if (force_dma_unencrypted())
+		return __phys_to_dma(dev, phys);
+	return phys_to_dma(dev, phys);
+}
+
+u64 dma_direct_get_required_mask(struct device *dev)
+{
+	u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);
+
+	return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
+}
+
 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 {
-	dma_addr_t addr = force_dma_unencrypted() ?
-		__phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
-	return addr + size - 1 <= dev->coherent_dma_mask;
+	return phys_to_dma_direct(dev, phys) + size - 1 <=
+			dev->coherent_dma_mask;
 }
 
 void *dma_direct_alloc_pages(struct device *dev, size_t size,
@@ -296,6 +311,7 @@ const struct dma_map_ops dma_direct_ops = {
 	.unmap_page		= dma_direct_unmap_page,
 	.unmap_sg		= dma_direct_unmap_sg,
 #endif
+	.get_required_mask	= dma_direct_get_required_mask,
 	.dma_supported		= dma_direct_supported,
 	.mapping_error		= dma_direct_mapping_error,
 	.cache_sync		= arch_dma_cache_sync,
-- 
cgit v1.2.3


From b4ebe6063204da58e48600b810a97c29ae9e5d12 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 20 Sep 2018 14:04:08 +0200
Subject: dma-direct: implement complete bus_dma_mask handling

Instead of rejecting devices with a too small bus_dma_mask we can handle
by taking the bus dma_mask into account for allocations and bounce
buffering decisions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-direct.h |  3 ++-
 kernel/dma/direct.c        | 21 +++++++++++----------
 2 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index b79496d8c75b..fbca184ff5a0 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -27,7 +27,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size)
 	if (!dev->dma_mask)
 		return false;
 
-	return addr + size - 1 <= *dev->dma_mask;
+	return addr + size - 1 <=
+		min_not_zero(*dev->dma_mask, dev->bus_dma_mask);
 }
 #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */
 
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index e78548397a92..60c433b880e0 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -44,10 +44,11 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
 			return false;
 		}
 
-		if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
+		if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) {
 			dev_err(dev,
-				"%s: overflow %pad+%zu of device mask %llx\n",
-				caller, &dma_addr, size, *dev->dma_mask);
+				"%s: overflow %pad+%zu of device mask %llx bus mask %llx\n",
+				caller, &dma_addr, size,
+				*dev->dma_mask, dev->bus_dma_mask);
 		}
 		return false;
 	}
@@ -66,12 +67,18 @@ u64 dma_direct_get_required_mask(struct device *dev)
 {
 	u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT);
 
+	if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma)
+		max_dma = dev->bus_dma_mask;
+
 	return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
 }
 
 static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
 		u64 *phys_mask)
 {
+	if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask)
+		dma_mask = dev->bus_dma_mask;
+
 	if (force_dma_unencrypted())
 		*phys_mask = __dma_to_phys(dev, dma_mask);
 	else
@@ -88,7 +95,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 {
 	return phys_to_dma_direct(dev, phys) + size - 1 <=
-			dev->coherent_dma_mask;
+			min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask);
 }
 
 void *dma_direct_alloc_pages(struct device *dev, size_t size,
@@ -292,12 +299,6 @@ int dma_direct_supported(struct device *dev, u64 mask)
 	if (mask < phys_to_dma(dev, DMA_BIT_MASK(32)))
 		return 0;
 #endif
-	/*
-	 * Upstream PCI/PCIe bridges or SoC interconnects may not carry
-	 * as many DMA address bits as the device itself supports.
-	 */
-	if (dev->bus_dma_mask && mask > dev->bus_dma_mask)
-		return 0;
 	return 1;
 }
 
-- 
cgit v1.2.3


From 4d8fcf216c90bc25e34ae2200aa8985ee3158898 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@mellanox.com>
Date: Wed, 5 Sep 2018 11:43:23 +0300
Subject: net/mlx5e: Avoid unbounded peer devices when unpairing TC hairpin
 rules

If the peer device was already unbound, then do not attempt to modify
it's resources, otherwise we will crash on dereferencing non-existing
device.

Fixes: 5c65c564c962 ("net/mlx5e: Support offloading TC NIC hairpin flows")
Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  1 +
 drivers/net/ethernet/mellanox/mlx5/core/en/fs.h    |  2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 62 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/transobj.c |  5 +-
 include/linux/mlx5/transobj.h                      |  2 +
 6 files changed, 71 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index db2cfcd21d43..0f189f873859 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -54,6 +54,7 @@
 #include "en_stats.h"
 #include "en/fs.h"
 
+extern const struct net_device_ops mlx5e_netdev_ops;
 struct page_pool;
 
 #define MLX5E_METADATA_ETHER_TYPE (0x8CE4)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index bbf69e859b78..1431232c9a09 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -16,6 +16,8 @@ struct mlx5e_tc_table {
 
 	DECLARE_HASHTABLE(mod_hdr_tbl, 8);
 	DECLARE_HASHTABLE(hairpin_tbl, 8);
+
+	struct notifier_block     netdevice_nb;
 };
 
 struct mlx5e_flow_table {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 54118b77dc1f..f291d1bf1558 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4315,7 +4315,7 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	}
 }
 
-static const struct net_device_ops mlx5e_netdev_ops = {
+const struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_open                = mlx5e_open,
 	.ndo_stop                = mlx5e_close,
 	.ndo_start_xmit          = mlx5e_xmit,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9fed54017659..52e05f3ece50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2946,14 +2946,71 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
 	return 0;
 }
 
+static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
+					      struct mlx5e_priv *peer_priv)
+{
+	struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
+	struct mlx5e_hairpin_entry *hpe;
+	u16 peer_vhca_id;
+	int bkt;
+
+	if (!same_hw_devs(priv, peer_priv))
+		return;
+
+	peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
+
+	hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) {
+		if (hpe->peer_vhca_id == peer_vhca_id)
+			hpe->hp->pair->peer_gone = true;
+	}
+}
+
+static int mlx5e_tc_netdev_event(struct notifier_block *this,
+				 unsigned long event, void *ptr)
+{
+	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
+	struct mlx5e_flow_steering *fs;
+	struct mlx5e_priv *peer_priv;
+	struct mlx5e_tc_table *tc;
+	struct mlx5e_priv *priv;
+
+	if (ndev->netdev_ops != &mlx5e_netdev_ops ||
+	    event != NETDEV_UNREGISTER ||
+	    ndev->reg_state == NETREG_REGISTERED)
+		return NOTIFY_DONE;
+
+	tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
+	fs = container_of(tc, struct mlx5e_flow_steering, tc);
+	priv = container_of(fs, struct mlx5e_priv, fs);
+	peer_priv = netdev_priv(ndev);
+	if (priv == peer_priv ||
+	    !(priv->netdev->features & NETIF_F_HW_TC))
+		return NOTIFY_DONE;
+
+	mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
+
+	return NOTIFY_DONE;
+}
+
 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
+	int err;
 
 	hash_init(tc->mod_hdr_tbl);
 	hash_init(tc->hairpin_tbl);
 
-	return rhashtable_init(&tc->ht, &tc_ht_params);
+	err = rhashtable_init(&tc->ht, &tc_ht_params);
+	if (err)
+		return err;
+
+	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
+	if (register_netdevice_notifier(&tc->netdevice_nb)) {
+		tc->netdevice_nb.notifier_call = NULL;
+		mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
+	}
+
+	return err;
 }
 
 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
@@ -2969,6 +3026,9 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 {
 	struct mlx5e_tc_table *tc = &priv->fs.tc;
 
+	if (tc->netdevice_nb.notifier_call)
+		unregister_netdevice_notifier(&tc->netdevice_nb);
+
 	rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
 
 	if (!IS_ERR_OR_NULL(tc->t)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index d2f76070ea7c..a1ee9a8a769e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -475,7 +475,8 @@ static void mlx5_hairpin_destroy_queues(struct mlx5_hairpin *hp)
 
 	for (i = 0; i < hp->num_channels; i++) {
 		mlx5_core_destroy_rq(hp->func_mdev, hp->rqn[i]);
-		mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
+		if (!hp->peer_gone)
+			mlx5_core_destroy_sq(hp->peer_mdev, hp->sqn[i]);
 	}
 }
 
@@ -567,6 +568,8 @@ static void mlx5_hairpin_unpair_queues(struct mlx5_hairpin *hp)
 				       MLX5_RQC_STATE_RST, 0, 0);
 
 	/* unset peer SQs */
+	if (hp->peer_gone)
+		return;
 	for (i = 0; i < hp->num_channels; i++)
 		mlx5_hairpin_modify_sq(hp->peer_mdev, hp->sqn[i], MLX5_SQC_STATE_RDY,
 				       MLX5_SQC_STATE_RST, 0, 0);
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index 83a33a1873a6..7f5ca2cd3a32 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -90,6 +90,8 @@ struct mlx5_hairpin {
 
 	u32 *rqn;
 	u32 *sqn;
+
+	bool peer_gone;
 };
 
 struct mlx5_hairpin *
-- 
cgit v1.2.3


From 59c9d35ea9cd73c3a55642ec9a0097770baccb93 Mon Sep 17 00:00:00 2001
From: Alaa Hleihel <alaa@mellanox.com>
Date: Wed, 5 Sep 2018 17:06:37 +0300
Subject: net/mlx5: Cache the system image guid

The system image guid is a read-only field which is used by the TC
offloads code to determine if two mlx5 devices belong to the same
ASIC while adding flows.

Read this once and save it on the core device rather than querying each
time an offloaded flow is added.

Signed-off-by: Alaa Hleihel <alaa@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 9 +++++++++
 include/linux/mlx5/driver.h                     | 1 +
 include/linux/mlx5/vport.h                      | 2 ++
 4 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9fed54017659..82723a0e509a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2040,8 +2040,8 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
 	fmdev = priv->mdev;
 	pmdev = peer_priv->mdev;
 
-	mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
-	mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
+	fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
+	psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
 
 	return (fsystem_guid == psystem_guid);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index b02af317c125..cfbea66b4879 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1201,3 +1201,12 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
+
+u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
+{
+	if (!mdev->sys_image_guid)
+		mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid);
+
+	return mdev->sys_image_guid;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ed73b51f6697..26a92462f4ce 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -838,6 +838,7 @@ struct mlx5_core_dev {
 		u32 fpga[MLX5_ST_SZ_DW(fpga_cap)];
 		u32 qcam[MLX5_ST_SZ_DW(qcam_reg)];
 	} caps;
+	u64			sys_image_guid;
 	phys_addr_t		iseg_base;
 	struct mlx5_init_seg __iomem *iseg;
 	enum mlx5_device_state	state;
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 7e7c6dfcfb09..9c694808c212 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -121,4 +121,6 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status);
 int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
 				       struct mlx5_core_dev *port_mdev);
 int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev);
+
+u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev);
 #endif /* __MLX5_VPORT_H__ */
-- 
cgit v1.2.3


From 783f4a4408e1251d17f333ad56abac24dde988b9 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 27 Sep 2018 16:58:54 -0700
Subject: nvme: call nvme_complete_rq when nvmf_check_ready fails for mpath I/O

When an io is rejected by nvmf_check_ready() due to validation of the
controller state, the nvmf_fail_nonready_command() will normally return
BLK_STS_RESOURCE to requeue and retry.  However, if the controller is
dying or the I/O is marked for NVMe multipath, the I/O is failed so that
the controller can terminate or so that the io can be issued on a
different path.  Unfortunately, as this reject point is before the
transport has accepted the command, blk-mq ends up completing the I/O
and never calls nvme_complete_rq(), which is where multipath may preserve
or re-route the I/O. The end result is, the device user ends up seeing an
EIO error.

Example: single path connectivity, controller is under load, and a reset
is induced.  An I/O is received:

  a) while the reset state has been set but the queues have yet to be
     stopped; or
  b) after queues are started (at end of reset) but before the reconnect
     has completed.

The I/O finishes with an EIO status.

This patch makes the following changes:

  - Adds the HOST_PATH_ERROR pathing status from TP4028
  - Modifies the reject point such that it appears to queue successfully,
    but actually completes the io with the new pathing status and calls
    nvme_complete_rq().
  - nvme_complete_rq() recognizes the new status, avoids resetting the
    controller (likely was already done in order to get this new status),
    and calls the multipather to clear the current path that errored.
    This allows the next command (retry or new command) to select a new
    path if there is one.

Signed-off-by: James Smart <jsmart2021@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c   | 7 +++++--
 drivers/nvme/host/multipath.c | 7 +++++++
 include/linux/nvme.h          | 1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 206d63cb1afc..bcd09d3a44da 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -552,8 +552,11 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
 	    ctrl->state != NVME_CTRL_DEAD &&
 	    !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
 		return BLK_STS_RESOURCE;
-	nvme_req(rq)->status = NVME_SC_ABORT_REQ;
-	return BLK_STS_IOERR;
+
+	nvme_req(rq)->status = NVME_SC_HOST_PATH_ERROR;
+	blk_mq_start_request(rq);
+	nvme_complete_rq(rq);
+	return BLK_STS_OK;
 }
 EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
 
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index bfbc6d5b1d93..ac16093a7928 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -77,6 +77,13 @@ void nvme_failover_req(struct request *req)
 			queue_work(nvme_wq, &ns->ctrl->ana_work);
 		}
 		break;
+	case NVME_SC_HOST_PATH_ERROR:
+		/*
+		 * Temporary transport disruption in talking to the controller.
+		 * Try to send on a new path.
+		 */
+		nvme_mpath_clear_current_path(ns);
+		break;
 	default:
 		/*
 		 * Reset the controller for any non-ANA error as we don't know
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 68e91ef5494c..818dbe9331be 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -1241,6 +1241,7 @@ enum {
 	NVME_SC_ANA_PERSISTENT_LOSS	= 0x301,
 	NVME_SC_ANA_INACCESSIBLE	= 0x302,
 	NVME_SC_ANA_TRANSITION		= 0x303,
+	NVME_SC_HOST_PATH_ERROR		= 0x370,
 
 	NVME_SC_DNR			= 0x4000,
 };
-- 
cgit v1.2.3


From 684bec1092b6991ff2a7751e8a763898576eb5c2 Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Mon, 1 Oct 2018 15:55:22 -0700
Subject: Input: i8042 - enable keyboard wakeups by default when s2idle is used

Previously, on typical consumer laptops, pressing a key on the keyboard
when the system is in suspend would cause it to wake up (default or
unconditional behaviour). This happens because the EC generates a SCI
interrupt in this scenario.

That is no longer true on modern laptops based on Intel WhiskeyLake,
including Acer Swift SF314-55G, Asus UX333FA, Asus UX433FN and Asus
UX533FD. We confirmed with Asus EC engineers that the "Modern Standby"
design has been modified so that the EC no longer generates a SCI
in this case; the keyboard controller itself should be used for wakeup.

In order to retain the standard behaviour of being able to use the
keyboard to wake up the system, enable serio wakeups by default on
platforms that are using s2idle.

Link: https://lkml.kernel.org/r/CAB4CAwfQ0mPMqCLp95TVjw4J0r5zKPWkSvvkK4cpZUGE--w8bQ@mail.gmail.com
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/serio/i8042.c | 29 ++++++++++++++++++++---------
 include/linux/suspend.h     |  2 ++
 kernel/power/suspend.c      |  6 ++++++
 3 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
index b8bc71569349..95a78ccbd847 100644
--- a/drivers/input/serio/i8042.c
+++ b/drivers/input/serio/i8042.c
@@ -1395,15 +1395,26 @@ static void __init i8042_register_ports(void)
 	for (i = 0; i < I8042_NUM_PORTS; i++) {
 		struct serio *serio = i8042_ports[i].serio;
 
-		if (serio) {
-			printk(KERN_INFO "serio: %s at %#lx,%#lx irq %d\n",
-				serio->name,
-				(unsigned long) I8042_DATA_REG,
-				(unsigned long) I8042_COMMAND_REG,
-				i8042_ports[i].irq);
-			serio_register_port(serio);
-			device_set_wakeup_capable(&serio->dev, true);
-		}
+		if (!serio)
+			continue;
+
+		printk(KERN_INFO "serio: %s at %#lx,%#lx irq %d\n",
+			serio->name,
+			(unsigned long) I8042_DATA_REG,
+			(unsigned long) I8042_COMMAND_REG,
+			i8042_ports[i].irq);
+		serio_register_port(serio);
+		device_set_wakeup_capable(&serio->dev, true);
+
+		/*
+		 * On platforms using suspend-to-idle, allow the keyboard to
+		 * wake up the system from sleep by enabling keyboard wakeups
+		 * by default.  This is consistent with keyboard wakeup
+		 * behavior on many platforms using suspend-to-RAM (ACPI S3)
+		 * by default.
+		 */
+		if (pm_suspend_via_s2idle() && i == I8042_KBD_PORT_NO)
+			device_set_wakeup_enable(&serio->dev, true);
 	}
 }
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 440b62f7502e..206b735f383f 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -251,6 +251,7 @@ static inline bool idle_should_enter_s2idle(void)
 	return unlikely(s2idle_state == S2IDLE_STATE_ENTER);
 }
 
+extern bool pm_suspend_via_s2idle(void);
 extern void __init pm_states_init(void);
 extern void s2idle_set_ops(const struct platform_s2idle_ops *ops);
 extern void s2idle_wake(void);
@@ -282,6 +283,7 @@ static inline void pm_set_suspend_via_firmware(void) {}
 static inline void pm_set_resume_via_firmware(void) {}
 static inline bool pm_suspend_via_firmware(void) { return false; }
 static inline bool pm_resume_via_firmware(void) { return false; }
+static inline bool pm_suspend_via_s2idle(void) { return false; }
 
 static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {}
 static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; }
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 4c10be0f4843..be3d0d477661 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -62,6 +62,12 @@ static DECLARE_WAIT_QUEUE_HEAD(s2idle_wait_head);
 enum s2idle_states __read_mostly s2idle_state;
 static DEFINE_SPINLOCK(s2idle_lock);
 
+bool pm_suspend_via_s2idle(void)
+{
+	return mem_sleep_current == PM_SUSPEND_TO_IDLE;
+}
+EXPORT_SYMBOL_GPL(pm_suspend_via_s2idle);
+
 void s2idle_set_ops(const struct platform_s2idle_ops *ops)
 {
 	lock_system_sleep();
-- 
cgit v1.2.3


From b596d895fa2957e136a6b398b97b06bd42b51291 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Fri, 3 Aug 2018 13:05:11 -0700
Subject: PM / devfreq: Make update_devfreq() public

Currently update_devfreq() is only visible to devfreq governors outside
of devfreq.c. Make it public to allow drivers that adjust devfreq policies
to cause a re-evaluation of the frequency after a policy change.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/governor.h | 3 ---
 include/linux/devfreq.h    | 8 ++++++++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index b81700244ce3..f53339ca610f 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -57,9 +57,6 @@ struct devfreq_governor {
 				unsigned int event, void *data);
 };
 
-/* Caution: devfreq->lock must be locked before calling update_devfreq */
-extern int update_devfreq(struct devfreq *devfreq);
-
 extern void devfreq_monitor_start(struct devfreq *devfreq);
 extern void devfreq_monitor_stop(struct devfreq *devfreq);
 extern void devfreq_monitor_suspend(struct devfreq *devfreq);
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 3aae5b3af87c..e4963b0f45da 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -198,6 +198,14 @@ extern void devm_devfreq_remove_device(struct device *dev,
 extern int devfreq_suspend_device(struct devfreq *devfreq);
 extern int devfreq_resume_device(struct devfreq *devfreq);
 
+/**
+ * update_devfreq() - Reevaluate the device and configure frequency
+ * @devfreq:	the devfreq device
+ *
+ * Note: devfreq->lock must be held
+ */
+extern int update_devfreq(struct devfreq *devfreq);
+
 /* Helper functions for devfreq user device driver with OPP. */
 extern struct dev_pm_opp *devfreq_recommended_opp(struct device *dev,
 					   unsigned long *freq, u32 flags);
-- 
cgit v1.2.3


From b31cdffa2329fe330cd304ca26c250dd1520fb0a Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:09 +0200
Subject: net: phy: Move linkmode helpers to somewhere public

phylink has some useful helpers to working with linkmode bitmaps.
Move them to there own header so other code can use them.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 27 -------------------
 include/linux/linkmode.h  | 67 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mii.h       |  1 +
 include/linux/phy.h       |  1 +
 4 files changed, 69 insertions(+), 27 deletions(-)
 create mode 100644 include/linux/linkmode.h

(limited to 'include/linux')

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 1d01e0c625a5..b6993af5c9e4 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -68,33 +68,6 @@ struct phylink {
 	struct sfp_bus *sfp_bus;
 };
 
-static inline void linkmode_zero(unsigned long *dst)
-{
-	bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
-}
-
-static inline void linkmode_copy(unsigned long *dst, const unsigned long *src)
-{
-	bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS);
-}
-
-static inline void linkmode_and(unsigned long *dst, const unsigned long *a,
-				const unsigned long *b)
-{
-	bitmap_and(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
-}
-
-static inline void linkmode_or(unsigned long *dst, const unsigned long *a,
-				const unsigned long *b)
-{
-	bitmap_or(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
-}
-
-static inline bool linkmode_empty(const unsigned long *src)
-{
-	return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS);
-}
-
 /**
  * phylink_set_port_modes() - set the port type modes in the ethtool mask
  * @mask: ethtool link mode mask
diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
new file mode 100644
index 000000000000..014fb86c7114
--- /dev/null
+++ b/include/linux/linkmode.h
@@ -0,0 +1,67 @@
+#ifndef __LINKMODE_H
+#define __LINKMODE_H
+
+#include <linux/bitmap.h>
+#include <linux/ethtool.h>
+#include <uapi/linux/ethtool.h>
+
+static inline void linkmode_zero(unsigned long *dst)
+{
+	bitmap_zero(dst, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_copy(unsigned long *dst, const unsigned long *src)
+{
+	bitmap_copy(dst, src, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_and(unsigned long *dst, const unsigned long *a,
+				const unsigned long *b)
+{
+	bitmap_and(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_or(unsigned long *dst, const unsigned long *a,
+				const unsigned long *b)
+{
+	bitmap_or(dst, a, b, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline bool linkmode_empty(const unsigned long *src)
+{
+	return bitmap_empty(src, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline int linkmode_andnot(unsigned long *dst, const unsigned long *src1,
+				  const unsigned long *src2)
+{
+	return bitmap_andnot(dst, src1, src2,  __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static inline void linkmode_set_bit(int nr, volatile unsigned long *addr)
+{
+	__set_bit(nr, addr);
+}
+
+static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr)
+{
+	__clear_bit(nr, addr);
+}
+
+static inline void linkmode_change_bit(int nr, volatile unsigned long *addr)
+{
+	__change_bit(nr, addr);
+}
+
+static inline int linkmode_test_bit(int nr, volatile unsigned long *addr)
+{
+	return test_bit(nr, addr);
+}
+
+static inline int linkmode_equal(const unsigned long *src1,
+				 const unsigned long *src2)
+{
+	return bitmap_equal(src1, src2, __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+#endif /* __LINKMODE_H */
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 55000ee5c6ad..567047ef0309 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -10,6 +10,7 @@
 
 
 #include <linux/if.h>
+#include <linux/linkmode.h>
 #include <uapi/linux/mii.h>
 
 struct ethtool_cmd;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 192a1fa0c73b..d24cc46748e2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -19,6 +19,7 @@
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
+#include <linux/linkmode.h>
 #include <linux/mdio.h>
 #include <linux/mii.h>
 #include <linux/module.h>
-- 
cgit v1.2.3


From ab2a605fa621ecf4ec26603a237822f7772cfa28 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:10 +0200
Subject: net: phy: Add phydev_warn()

Not all new style LINK_MODE bits can be converted into old style
SUPPORTED bits. We need to warn when such a conversion is attempted.
Add a helper for this.

Convert all pr_warn() calls to phydev_warn() where possible.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c     |  2 +-
 drivers/net/phy/dp83640.c    |  7 ++++---
 drivers/net/phy/marvell.c    |  2 +-
 drivers/net/phy/marvell10g.c |  6 +++---
 drivers/net/phy/microchip.c  | 33 +++++++++++++++++----------------
 include/linux/phy.h          |  3 +++
 6 files changed, 29 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 411cf1072bae..e74a047a846e 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -357,7 +357,7 @@ static int at803x_aneg_done(struct phy_device *phydev)
 
 	/* check if the SGMII link is OK. */
 	if (!(phy_read(phydev, AT803X_PSSR) & AT803X_PSSR_MR_AN_COMPLETE)) {
-		pr_warn("803x_aneg_done: SGMII link is not ok\n");
+		phydev_warn(phydev, "803x_aneg_done: SGMII link is not ok\n");
 		aneg_done = 0;
 	}
 	/* switch back to copper page */
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 29aa8d772b0c..74cf356d8171 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -553,16 +553,17 @@ static void enable_status_frames(struct phy_device *phydev, bool on)
 	mutex_unlock(&clock->extreg_lock);
 
 	if (!phydev->attached_dev) {
-		pr_warn("expected to find an attached netdevice\n");
+		phydev_warn(phydev,
+			    "expected to find an attached netdevice\n");
 		return;
 	}
 
 	if (on) {
 		if (dev_mc_add(phydev->attached_dev, status_frame_dst))
-			pr_warn("failed to add mc address\n");
+			phydev_warn(phydev, "failed to add mc address\n");
 	} else {
 		if (dev_mc_del(phydev->attached_dev, status_frame_dst))
-			pr_warn("failed to delete mc address\n");
+			phydev_warn(phydev, "failed to delete mc address\n");
 	}
 }
 
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 24fc4a73c300..8872a430d74a 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -638,7 +638,7 @@ static void marvell_config_led(struct phy_device *phydev)
 	err = phy_write_paged(phydev, MII_MARVELL_LED_PAGE, MII_PHY_LED_CTRL,
 			      def_config);
 	if (err < 0)
-		pr_warn("Fail to config marvell phy LED.\n");
+		phydev_warn(phydev, "Fail to config marvell phy LED.\n");
 }
 
 static int marvell_config_init(struct phy_device *phydev)
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index f77a2d9e7f9d..f214834819dd 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -337,9 +337,9 @@ static int mv3310_config_init(struct phy_device *phydev)
 	}
 
 	if (!ethtool_convert_link_mode_to_legacy_u32(&mask, supported))
-		dev_warn(&phydev->mdio.dev,
-			 "PHY supports (%*pb) more modes than phylib supports, some modes not supported.\n",
-			 __ETHTOOL_LINK_MODE_MASK_NBITS, supported);
+		phydev_warn(phydev,
+			    "PHY supports (%*pb) more modes than phylib supports, some modes not supported.\n",
+			    __ETHTOOL_LINK_MODE_MASK_NBITS, supported);
 
 	phydev->supported &= mask;
 	phydev->advertising &= phydev->supported;
diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
index 2d67937866a3..04b12e34da58 100644
--- a/drivers/net/phy/microchip.c
+++ b/drivers/net/phy/microchip.c
@@ -88,7 +88,7 @@ static int lan88xx_TR_reg_set(struct phy_device *phydev, u16 regaddr,
 	/* Save current page */
 	save_page = phy_save_page(phydev);
 	if (save_page < 0) {
-		pr_warn("Failed to get current page\n");
+		phydev_warn(phydev, "Failed to get current page\n");
 		goto err;
 	}
 
@@ -98,14 +98,14 @@ static int lan88xx_TR_reg_set(struct phy_device *phydev, u16 regaddr,
 	ret = __phy_write(phydev, LAN88XX_EXT_PAGE_TR_LOW_DATA,
 			  (data & 0xFFFF));
 	if (ret < 0) {
-		pr_warn("Failed to write TR low data\n");
+		phydev_warn(phydev, "Failed to write TR low data\n");
 		goto err;
 	}
 
 	ret = __phy_write(phydev, LAN88XX_EXT_PAGE_TR_HIGH_DATA,
 			  (data & 0x00FF0000) >> 16);
 	if (ret < 0) {
-		pr_warn("Failed to write TR high data\n");
+		phydev_warn(phydev, "Failed to write TR high data\n");
 		goto err;
 	}
 
@@ -115,14 +115,15 @@ static int lan88xx_TR_reg_set(struct phy_device *phydev, u16 regaddr,
 
 	ret = __phy_write(phydev, LAN88XX_EXT_PAGE_TR_CR, buf);
 	if (ret < 0) {
-		pr_warn("Failed to write data in reg\n");
+		phydev_warn(phydev, "Failed to write data in reg\n");
 		goto err;
 	}
 
 	usleep_range(1000, 2000);/* Wait for Data to be written */
 	val = __phy_read(phydev, LAN88XX_EXT_PAGE_TR_CR);
 	if (!(val & 0x8000))
-		pr_warn("TR Register[0x%X] configuration failed\n", regaddr);
+		phydev_warn(phydev, "TR Register[0x%X] configuration failed\n",
+			    regaddr);
 err:
 	return phy_restore_page(phydev, save_page, ret);
 }
@@ -137,7 +138,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x0F82, 0x12B00A);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x0F82]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x0F82]\n");
 
 	/* Get access to Channel b'10, Node b'1101, Register 0x06.
 	 * Write 24-bit value 0xD2C46F to register. Setting SSTrKf1000Slv,
@@ -145,7 +146,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x168C, 0xD2C46F);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x168C]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x168C]\n");
 
 	/* Get access to Channel b'10, Node b'1111, Register 0x11.
 	 * Write 24-bit value 0x620 to register. Setting rem_upd_done_thresh
@@ -153,7 +154,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x17A2, 0x620);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x17A2]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x17A2]\n");
 
 	/* Get access to Channel b'10, Node b'1101, Register 0x10.
 	 * Write 24-bit value 0xEEFFDD to register. Setting
@@ -162,7 +163,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x16A0, 0xEEFFDD);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x16A0]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x16A0]\n");
 
 	/* Get access to Channel b'10, Node b'1101, Register 0x13.
 	 * Write 24-bit value 0x071448 to register. Setting
@@ -170,7 +171,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x16A6, 0x071448);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x16A6]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x16A6]\n");
 
 	/* Get access to Channel b'10, Node b'1101, Register 0x12.
 	 * Write 24-bit value 0x13132F to register. Setting
@@ -178,7 +179,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x16A4, 0x13132F);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x16A4]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x16A4]\n");
 
 	/* Get access to Channel b'10, Node b'1101, Register 0x14.
 	 * Write 24-bit value 0x0 to register. Setting eee_3level_delay,
@@ -186,7 +187,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x16A8, 0x0);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x16A8]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x16A8]\n");
 
 	/* Get access to Channel b'01, Node b'1111, Register 0x34.
 	 * Write 24-bit value 0x91B06C to register. Setting
@@ -195,7 +196,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x0FE8, 0x91B06C);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x0FE8]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x0FE8]\n");
 
 	/* Get access to Channel b'01, Node b'1111, Register 0x3E.
 	 * Write 24-bit value 0xC0A028 to register. Setting
@@ -204,7 +205,7 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x0FFC, 0xC0A028);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x0FFC]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x0FFC]\n");
 
 	/* Get access to Channel b'01, Node b'1111, Register 0x35.
 	 * Write 24-bit value 0x041600 to register. Setting
@@ -213,14 +214,14 @@ static void lan88xx_config_TR_regs(struct phy_device *phydev)
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x0FEA, 0x041600);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x0FEA]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x0FEA]\n");
 
 	/* Get access to Channel b'10, Node b'1101, Register 0x03.
 	 * Write 24-bit value 0x000004 to register. Setting TrFreeze bits.
 	 */
 	err = lan88xx_TR_reg_set(phydev, 0x1686, 0x000004);
 	if (err < 0)
-		pr_warn("Failed to Set Register[0x1686]\n");
+		phydev_warn(phydev, "Failed to Set Register[0x1686]\n");
 }
 
 static int lan88xx_probe(struct phy_device *phydev)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index d24cc46748e2..0ab9f89773fd 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -968,6 +968,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value)
 #define phydev_err(_phydev, format, args...)	\
 	dev_err(&_phydev->mdio.dev, format, ##args)
 
+#define phydev_warn(_phydev, format, args...)	\
+	dev_warn(&_phydev->mdio.dev, format, ##args)
+
 #define phydev_dbg(_phydev, format, args...)	\
 	dev_dbg(&_phydev->mdio.dev, format, ##args)
 
-- 
cgit v1.2.3


From c4fabb8b3c0d724eb93dabaf346b0dd8a8be7118 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:11 +0200
Subject: net: phy: Add phydev_info()

Add phydev_info() and make use of it within the phy drivers and core
code.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83640.c    | 11 ++++++-----
 drivers/net/phy/phy_device.c |  4 ++--
 include/linux/phy.h          |  3 +++
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 74cf356d8171..edd4d44a386d 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -687,9 +687,9 @@ static void recalibrate(struct dp83640_clock *clock)
 	 * read out and correct offsets
 	 */
 	val = ext_read(master, PAGE4, PTP_STS);
-	pr_info("master PTP_STS  0x%04hx\n", val);
+	phydev_info(master, "master PTP_STS  0x%04hx\n", val);
 	val = ext_read(master, PAGE4, PTP_ESTS);
-	pr_info("master PTP_ESTS 0x%04hx\n", val);
+	phydev_info(master, "master PTP_ESTS 0x%04hx\n", val);
 	event_ts.ns_lo  = ext_read(master, PAGE4, PTP_EDATA);
 	event_ts.ns_hi  = ext_read(master, PAGE4, PTP_EDATA);
 	event_ts.sec_lo = ext_read(master, PAGE4, PTP_EDATA);
@@ -699,15 +699,16 @@ static void recalibrate(struct dp83640_clock *clock)
 	list_for_each(this, &clock->phylist) {
 		tmp = list_entry(this, struct dp83640_private, list);
 		val = ext_read(tmp->phydev, PAGE4, PTP_STS);
-		pr_info("slave  PTP_STS  0x%04hx\n", val);
+		phydev_info(tmp->phydev, "slave  PTP_STS  0x%04hx\n", val);
 		val = ext_read(tmp->phydev, PAGE4, PTP_ESTS);
-		pr_info("slave  PTP_ESTS 0x%04hx\n", val);
+		phydev_info(tmp->phydev, "slave  PTP_ESTS 0x%04hx\n", val);
 		event_ts.ns_lo  = ext_read(tmp->phydev, PAGE4, PTP_EDATA);
 		event_ts.ns_hi  = ext_read(tmp->phydev, PAGE4, PTP_EDATA);
 		event_ts.sec_lo = ext_read(tmp->phydev, PAGE4, PTP_EDATA);
 		event_ts.sec_hi = ext_read(tmp->phydev, PAGE4, PTP_EDATA);
 		diff = now - (s64) phy2txts(&event_ts);
-		pr_info("slave offset %lld nanoseconds\n", diff);
+		phydev_info(tmp->phydev, "slave offset %lld nanoseconds\n",
+			    diff);
 		diff += ADJTIME_FIX;
 		ts = ns_to_timespec64(diff);
 		tdr_write(0, tmp->phydev, &ts, PTP_STEP_CLK);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index ee676d75fe02..35102e17bbeb 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -920,13 +920,13 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
 
 
 	if (!fmt) {
-		dev_info(&phydev->mdio.dev, ATTACHED_FMT "\n",
+		phydev_info(phydev, ATTACHED_FMT "\n",
 			 drv_name, phydev_name(phydev),
 			 irq_str);
 	} else {
 		va_list ap;
 
-		dev_info(&phydev->mdio.dev, ATTACHED_FMT,
+		phydev_info(phydev, ATTACHED_FMT,
 			 drv_name, phydev_name(phydev),
 			 irq_str);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 0ab9f89773fd..0f6e7bf5e9c5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -968,6 +968,9 @@ static inline void phy_device_reset(struct phy_device *phydev, int value)
 #define phydev_err(_phydev, format, args...)	\
 	dev_err(&_phydev->mdio.dev, format, ##args)
 
+#define phydev_info(_phydev, format, args...)	\
+	dev_info(&_phydev->mdio.dev, format, ##args)
+
 #define phydev_warn(_phydev, format, args...)	\
 	dev_warn(&_phydev->mdio.dev, format, ##args)
 
-- 
cgit v1.2.3


From edc7ccbbcf32b97c7d26cd556f364eb4f22c4285 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:12 +0200
Subject: net: phy: Add helper to convert MII ADV register to a linkmode

The phy_mii_ioctl can be used to write a value into the MII_ADVERTISE
register in the PHY. Since this changes the state of the PHY, we need
to make the same change to phydev->advertising. Add a helper which can
convert the register value to a linkmode.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index 567047ef0309..8c7da9473ad9 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -303,6 +303,37 @@ static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa)
 	return result | mii_adv_to_ethtool_adv_x(lpa);
 }
 
+/**
+ * mii_adv_to_linkmode_adv_t
+ * @advertising:pointer to destination link mode.
+ * @adv: value of the MII_ADVERTISE register
+ *
+ * A small helper function that translates MII_ADVERTISE bits
+ * to linkmode advertisement settings.
+ */
+static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising,
+					     u32 adv)
+{
+	linkmode_zero(advertising);
+
+	if (adv & ADVERTISE_10HALF)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+				 advertising);
+	if (adv & ADVERTISE_10FULL)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+				 advertising);
+	if (adv & ADVERTISE_100HALF)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+				 advertising);
+	if (adv & ADVERTISE_100FULL)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+				 advertising);
+	if (adv & ADVERTISE_PAUSE_CAP)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising);
+	if (adv & ADVERTISE_PAUSE_ASYM)
+		linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising);
+}
+
 /**
  * mii_advertise_flowctrl - get flow control advertisement flags
  * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
-- 
cgit v1.2.3


From 5f991f7bddc991ecc3c8a009ffd76fccff4661c7 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:13 +0200
Subject: net: phy: Add helper for advertise to lcl value

Add a helper to convert the local advertising to an LCL capabilities,
which is then used to resolve pause flow control settings.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c                              |  6 +-----
 drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c           |  5 +----
 drivers/net/ethernet/freescale/fman/mac.c             |  6 +-----
 drivers/net/ethernet/freescale/gianfar.c              |  7 +------
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c   |  6 +-----
 drivers/net/ethernet/mediatek/mtk_eth_soc.c           |  6 +-----
 drivers/net/ethernet/socionext/sni_ave.c              |  5 +----
 include/linux/mii.h                                   | 19 +++++++++++++++++++
 8 files changed, 26 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 62e486652e62..a5de9bffe5be 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -658,11 +658,7 @@ static void mt7530_adjust_link(struct dsa_switch *ds, int port,
 			if (phydev->asym_pause)
 				rmt_adv |= LPA_PAUSE_ASYM;
 
-			if (phydev->advertising & ADVERTISED_Pause)
-				lcl_adv |= ADVERTISE_PAUSE_CAP;
-			if (phydev->advertising & ADVERTISED_Asym_Pause)
-				lcl_adv |= ADVERTISE_PAUSE_ASYM;
-
+			lcl_adv = ethtool_adv_to_lcl_adv_t(phydev->advertising);
 			flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
 
 			if (flowctrl & FLOW_CTRL_TX)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 289129011b9f..a7e03e3ecc93 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -1495,10 +1495,7 @@ static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata)
 	if (!phy_data->phydev)
 		return;
 
-	if (phy_data->phydev->advertising & ADVERTISED_Pause)
-		lcl_adv |= ADVERTISE_PAUSE_CAP;
-	if (phy_data->phydev->advertising & ADVERTISED_Asym_Pause)
-		lcl_adv |= ADVERTISE_PAUSE_ASYM;
+	lcl_adv = ethtool_adv_to_lcl_adv_t(phy_data->phydev->advertising);
 
 	if (phy_data->phydev->pause) {
 		XGBE_SET_LP_ADV(lks, Pause);
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index a847b9c3b31a..d79e4e009d63 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -393,11 +393,7 @@ void fman_get_pause_cfg(struct mac_device *mac_dev, bool *rx_pause,
 	 */
 
 	/* get local capabilities */
-	lcl_adv = 0;
-	if (phy_dev->advertising & ADVERTISED_Pause)
-		lcl_adv |= ADVERTISE_PAUSE_CAP;
-	if (phy_dev->advertising & ADVERTISED_Asym_Pause)
-		lcl_adv |= ADVERTISE_PAUSE_ASYM;
+	lcl_adv = ethtool_adv_to_lcl_adv_t(phy_dev->advertising);
 
 	/* get link partner capabilities */
 	rmt_adv = 0;
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 0bd21a493016..3c8da1a18ba0 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -3656,12 +3656,7 @@ static u32 gfar_get_flowctrl_cfg(struct gfar_private *priv)
 		if (phydev->asym_pause)
 			rmt_adv |= LPA_PAUSE_ASYM;
 
-		lcl_adv = 0;
-		if (phydev->advertising & ADVERTISED_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_CAP;
-		if (phydev->advertising & ADVERTISED_Asym_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_ASYM;
-
+		lcl_adv = ethtool_adv_to_lcl_adv_t(phydev->advertising);
 		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
 		if (flowctrl & FLOW_CTRL_TX)
 			val |= MACCFG1_TX_FLOW;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 7c8b686b1ce1..c17ceeefa453 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5006,11 +5006,7 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev)
 	if (!phydev->link || !phydev->autoneg)
 		return 0;
 
-	if (phydev->advertising & ADVERTISED_Pause)
-		local_advertising = ADVERTISE_PAUSE_CAP;
-
-	if (phydev->advertising & ADVERTISED_Asym_Pause)
-		local_advertising |= ADVERTISE_PAUSE_ASYM;
+	local_advertising = ethtool_adv_to_lcl_adv_t(phydev->advertising);
 
 	if (phydev->pause)
 		remote_advertising = LPA_PAUSE_CAP;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index cc1e9a96a43b..7dbfdac4067a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -243,11 +243,7 @@ static void mtk_phy_link_adjust(struct net_device *dev)
 		if (dev->phydev->asym_pause)
 			rmt_adv |= LPA_PAUSE_ASYM;
 
-		if (dev->phydev->advertising & ADVERTISED_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_CAP;
-		if (dev->phydev->advertising & ADVERTISED_Asym_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_ASYM;
-
+		lcl_adv = ethtool_adv_to_lcl_adv_t(dev->phydev->advertising);
 		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
 
 		if (flowctrl & FLOW_CTRL_TX)
diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 2a156dcd4534..6732f5cbde08 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1116,11 +1116,8 @@ static void ave_phy_adjust_link(struct net_device *ndev)
 			rmt_adv |= LPA_PAUSE_CAP;
 		if (phydev->asym_pause)
 			rmt_adv |= LPA_PAUSE_ASYM;
-		if (phydev->advertising & ADVERTISED_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_CAP;
-		if (phydev->advertising & ADVERTISED_Asym_Pause)
-			lcl_adv |= ADVERTISE_PAUSE_ASYM;
 
+		lcl_adv = ethtool_adv_to_lcl_adv_t(phydev->advertising);
 		cap = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
 		if (cap & FLOW_CTRL_TX)
 			txcr |= AVE_TXCR_FLOCTR;
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 8c7da9473ad9..9ed49c8261d0 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -334,6 +334,25 @@ static inline void mii_adv_to_linkmode_adv_t(unsigned long *advertising,
 		linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising);
 }
 
+/**
+ * ethtool_adv_to_lcl_adv_t
+ * @advertising:pointer to ethtool advertising
+ *
+ * A small helper function that translates ethtool advertising to LVL
+ * pause capabilities.
+ */
+static inline u32 ethtool_adv_to_lcl_adv_t(u32 advertising)
+{
+	u32 lcl_adv = 0;
+
+	if (advertising & ADVERTISED_Pause)
+		lcl_adv |= ADVERTISE_PAUSE_CAP;
+	if (advertising & ADVERTISED_Asym_Pause)
+		lcl_adv |= ADVERTISE_PAUSE_ASYM;
+
+	return lcl_adv;
+}
+
 /**
  * mii_advertise_flowctrl - get flow control advertisement flags
  * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
-- 
cgit v1.2.3


From f954a04ea18ebfcba1cd2756eaee59eb4978a20e Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:14 +0200
Subject: net: phy: Add limkmode equivalents to some of the MII ethtool helpers

Add helpers which take a linkmode rather than a u32 ethtool for
advertising settings.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index 9ed49c8261d0..2da85b02e1c0 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -132,6 +132,34 @@ static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv)
 	return result;
 }
 
+/**
+ * linkmode_adv_to_mii_adv_t
+ * @advertising: the linkmode advertisement settings
+ *
+ * A small helper function that translates linkmode advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_ADVERTISE register.
+ */
+static inline u32 linkmode_adv_to_mii_adv_t(unsigned long *advertising)
+{
+	u32 result = 0;
+
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, advertising))
+		result |= ADVERTISE_10HALF;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, advertising))
+		result |= ADVERTISE_10FULL;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, advertising))
+		result |= ADVERTISE_100HALF;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, advertising))
+		result |= ADVERTISE_100FULL;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertising))
+		result |= ADVERTISE_PAUSE_CAP;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertising))
+		result |= ADVERTISE_PAUSE_ASYM;
+
+	return result;
+}
+
 /**
  * mii_adv_to_ethtool_adv_t
  * @adv: value of the MII_ADVERTISE register
@@ -179,6 +207,28 @@ static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv)
 	return result;
 }
 
+/**
+ * linkmode_adv_to_mii_ctrl1000_t
+ * advertising: the linkmode advertisement settings
+ *
+ * A small helper function that translates linkmode advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_CTRL1000 register when in 1000T mode.
+ */
+static inline u32 linkmode_adv_to_mii_ctrl1000_t(unsigned long *advertising)
+{
+	u32 result = 0;
+
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+			      advertising))
+		result |= ADVERTISE_1000HALF;
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+			      advertising))
+		result |= ADVERTISE_1000FULL;
+
+	return result;
+}
+
 /**
  * mii_ctrl1000_to_ethtool_adv_t
  * @adv: value of the MII_CTRL1000 register
-- 
cgit v1.2.3


From 719655a149715f26fc4de904fe0aa83068bd5b9e Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 29 Sep 2018 23:04:16 +0200
Subject: net: phy: Replace phy driver features u32 with link_mode bitmap

This is one step in allowing phylib to make use of link_mode bitmaps,
instead of u32 for supported and advertised features. Convert the phy
drivers to use bitmaps to indicates the features they support.

Build bitmap equivalents of the u32 values at runtime, and have the
drivers point to the appropriate bitmap. These bitmaps are shared, and
we don't want a driver to modify them. So mark them __ro_after_init.

Within phylib, the features bitmap is currently turned back into a
u32. This will be removed once the whole of phylib, and the drivers
are converted to use bitmaps.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/pxa168_eth.c |   4 +-
 drivers/net/phy/aquantia.c                |  12 +--
 drivers/net/phy/bcm63xx.c                 |   9 +-
 drivers/net/phy/marvell.c                 |   2 +-
 drivers/net/phy/marvell10g.c              |  11 +-
 drivers/net/phy/microchip_t1.c            |   2 +-
 drivers/net/phy/phy_device.c              | 164 ++++++++++++++++++++++++++++--
 include/linux/linkmode.h                  |   9 ++
 include/linux/phy.h                       |  24 +++--
 9 files changed, 198 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index ff2fea0f8b75..0bd4351b2a49 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -988,8 +988,8 @@ static int pxa168_init_phy(struct net_device *dev)
 	cmd.base.phy_address = pep->phy_addr;
 	cmd.base.speed = pep->phy_speed;
 	cmd.base.duplex = pep->phy_duplex;
-	ethtool_convert_legacy_u32_to_link_mode(cmd.link_modes.advertising,
-						PHY_BASIC_FEATURES);
+	bitmap_copy(cmd.link_modes.advertising, PHY_BASIC_FEATURES,
+		    __ETHTOOL_LINK_MODE_MASK_NBITS);
 	cmd.base.autoneg = AUTONEG_ENABLE;
 
 	if (cmd.base.speed != 0)
diff --git a/drivers/net/phy/aquantia.c b/drivers/net/phy/aquantia.c
index 319edc9c8ec7..632472cab3bb 100644
--- a/drivers/net/phy/aquantia.c
+++ b/drivers/net/phy/aquantia.c
@@ -115,7 +115,7 @@ static struct phy_driver aquantia_driver[] = {
 	.phy_id		= PHY_ID_AQ1202,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Aquantia AQ1202",
-	.features	= PHY_AQUANTIA_FEATURES,
+	.features	= PHY_10GBIT_FULL_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
@@ -127,7 +127,7 @@ static struct phy_driver aquantia_driver[] = {
 	.phy_id		= PHY_ID_AQ2104,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Aquantia AQ2104",
-	.features	= PHY_AQUANTIA_FEATURES,
+	.features	= PHY_10GBIT_FULL_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
@@ -139,7 +139,7 @@ static struct phy_driver aquantia_driver[] = {
 	.phy_id		= PHY_ID_AQR105,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Aquantia AQR105",
-	.features	= PHY_AQUANTIA_FEATURES,
+	.features	= PHY_10GBIT_FULL_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
@@ -151,7 +151,7 @@ static struct phy_driver aquantia_driver[] = {
 	.phy_id		= PHY_ID_AQR106,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Aquantia AQR106",
-	.features	= PHY_AQUANTIA_FEATURES,
+	.features	= PHY_10GBIT_FULL_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
@@ -163,7 +163,7 @@ static struct phy_driver aquantia_driver[] = {
 	.phy_id		= PHY_ID_AQR107,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Aquantia AQR107",
-	.features	= PHY_AQUANTIA_FEATURES,
+	.features	= PHY_10GBIT_FULL_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
@@ -175,7 +175,7 @@ static struct phy_driver aquantia_driver[] = {
 	.phy_id		= PHY_ID_AQR405,
 	.phy_id_mask	= 0xfffffff0,
 	.name		= "Aquantia AQR405",
-	.features	= PHY_AQUANTIA_FEATURES,
+	.features	= PHY_10GBIT_FULL_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.aneg_done	= genphy_c45_aneg_done,
 	.config_aneg    = aquantia_config_aneg,
diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c
index cf14613745c9..d95bffdec4c1 100644
--- a/drivers/net/phy/bcm63xx.c
+++ b/drivers/net/phy/bcm63xx.c
@@ -42,6 +42,9 @@ static int bcm63xx_config_init(struct phy_device *phydev)
 {
 	int reg, err;
 
+	/* ASYM_PAUSE bit is marked RO in datasheet, so don't cheat */
+	phydev->supported |= SUPPORTED_Pause;
+
 	reg = phy_read(phydev, MII_BCM63XX_IR);
 	if (reg < 0)
 		return reg;
@@ -65,8 +68,7 @@ static struct phy_driver bcm63xx_driver[] = {
 	.phy_id		= 0x00406000,
 	.phy_id_mask	= 0xfffffc00,
 	.name		= "Broadcom BCM63XX (1)",
-	/* ASYM_PAUSE bit is marked RO in datasheet, so don't cheat */
-	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause),
+	.features	= PHY_BASIC_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT | PHY_IS_INTERNAL,
 	.config_init	= bcm63xx_config_init,
 	.ack_interrupt	= bcm_phy_ack_intr,
@@ -75,8 +77,7 @@ static struct phy_driver bcm63xx_driver[] = {
 	/* same phy as above, with just a different OUI */
 	.phy_id		= 0x002bdc00,
 	.phy_id_mask	= 0xfffffc00,
-	.name		= "Broadcom BCM63XX (2)",
-	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause),
+	.features	= PHY_BASIC_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT | PHY_IS_INTERNAL,
 	.config_init	= bcm63xx_config_init,
 	.ack_interrupt	= bcm_phy_ack_intr,
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 8872a430d74a..cbec296107bd 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -2201,7 +2201,7 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id = MARVELL_PHY_ID_88E1510,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1510",
-		.features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE,
+		.features = PHY_GBIT_FIBRE_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.probe = &m88e1510_probe,
 		.config_init = &m88e1510_config_init,
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index f214834819dd..1c9d039eec63 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -535,16 +535,7 @@ static struct phy_driver mv3310_drivers[] = {
 		.phy_id		= 0x002b09aa,
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
 		.name		= "mv88x3310",
-		.features	= SUPPORTED_10baseT_Full |
-				  SUPPORTED_10baseT_Half |
-				  SUPPORTED_100baseT_Full |
-				  SUPPORTED_100baseT_Half |
-				  SUPPORTED_1000baseT_Full |
-				  SUPPORTED_Autoneg |
-				  SUPPORTED_TP |
-				  SUPPORTED_FIBRE |
-				  SUPPORTED_10000baseT_Full |
-				  SUPPORTED_Backplane,
+		.features	= PHY_10GBIT_FEATURES,
 		.soft_reset	= gen10g_no_soft_reset,
 		.config_init	= mv3310_config_init,
 		.probe		= mv3310_probe,
diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c
index b1917dd1978a..c600a8509d60 100644
--- a/drivers/net/phy/microchip_t1.c
+++ b/drivers/net/phy/microchip_t1.c
@@ -46,7 +46,7 @@ static struct phy_driver microchip_t1_phy_driver[] = {
 		.phy_id_mask    = 0xfffffff0,
 		.name           = "Microchip LAN87xx T1",
 
-		.features       = SUPPORTED_100baseT_Full,
+		.features       = PHY_BASIC_T1_FEATURES,
 		.flags          = PHY_HAS_INTERRUPT,
 
 		.config_init    = genphy_config_init,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 35102e17bbeb..f53ce65f45c5 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
+#include <linux/bitmap.h>
 #include <linux/phy.h>
 #include <linux/phy_led_triggers.h>
 #include <linux/mdio.h>
@@ -42,6 +43,149 @@ MODULE_DESCRIPTION("PHY library");
 MODULE_AUTHOR("Andy Fleming");
 MODULE_LICENSE("GPL");
 
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_basic_features);
+
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_basic_t1_features);
+
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_gbit_features);
+
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_gbit_fibre_features);
+
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_gbit_all_ports_features);
+
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_10gbit_features);
+
+static const int phy_basic_ports_array[] = {
+	ETHTOOL_LINK_MODE_Autoneg_BIT,
+	ETHTOOL_LINK_MODE_TP_BIT,
+	ETHTOOL_LINK_MODE_MII_BIT,
+};
+
+static const int phy_fibre_port_array[] = {
+	ETHTOOL_LINK_MODE_FIBRE_BIT,
+};
+
+static const int phy_all_ports_features_array[] = {
+	ETHTOOL_LINK_MODE_Autoneg_BIT,
+	ETHTOOL_LINK_MODE_TP_BIT,
+	ETHTOOL_LINK_MODE_MII_BIT,
+	ETHTOOL_LINK_MODE_FIBRE_BIT,
+	ETHTOOL_LINK_MODE_AUI_BIT,
+	ETHTOOL_LINK_MODE_BNC_BIT,
+	ETHTOOL_LINK_MODE_Backplane_BIT,
+};
+
+static const int phy_10_100_features_array[] = {
+	ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_100baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+};
+
+static const int phy_basic_t1_features_array[] = {
+	ETHTOOL_LINK_MODE_TP_BIT,
+	ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+};
+
+static const int phy_gbit_features_array[] = {
+	ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
+	ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+};
+
+static const int phy_10gbit_features_array[] = {
+	ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+};
+
+__ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init;
+EXPORT_SYMBOL_GPL(phy_10gbit_full_features);
+
+static const int phy_10gbit_full_features_array[] = {
+	ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
+	ETHTOOL_LINK_MODE_10000baseT_Full_BIT,
+};
+
+static void features_init(void)
+{
+	/* 10/100 half/full*/
+	linkmode_set_bit_array(phy_basic_ports_array,
+			       ARRAY_SIZE(phy_basic_ports_array),
+			       phy_basic_features);
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       phy_basic_features);
+
+	/* 100 full, TP */
+	linkmode_set_bit_array(phy_basic_t1_features_array,
+			       ARRAY_SIZE(phy_basic_t1_features_array),
+			       phy_basic_t1_features);
+
+	/* 10/100 half/full + 1000 half/full */
+	linkmode_set_bit_array(phy_basic_ports_array,
+			       ARRAY_SIZE(phy_basic_ports_array),
+			       phy_gbit_features);
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       phy_gbit_features);
+	linkmode_set_bit_array(phy_gbit_features_array,
+			       ARRAY_SIZE(phy_gbit_features_array),
+			       phy_gbit_features);
+
+	/* 10/100 half/full + 1000 half/full + fibre*/
+	linkmode_set_bit_array(phy_basic_ports_array,
+			       ARRAY_SIZE(phy_basic_ports_array),
+			       phy_gbit_fibre_features);
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       phy_gbit_fibre_features);
+	linkmode_set_bit_array(phy_gbit_features_array,
+			       ARRAY_SIZE(phy_gbit_features_array),
+			       phy_gbit_fibre_features);
+	linkmode_set_bit_array(phy_fibre_port_array,
+			       ARRAY_SIZE(phy_fibre_port_array),
+			       phy_gbit_fibre_features);
+
+	/* 10/100 half/full + 1000 half/full + TP/MII/FIBRE/AUI/BNC/Backplane*/
+	linkmode_set_bit_array(phy_all_ports_features_array,
+			       ARRAY_SIZE(phy_all_ports_features_array),
+			       phy_gbit_all_ports_features);
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       phy_gbit_all_ports_features);
+	linkmode_set_bit_array(phy_gbit_features_array,
+			       ARRAY_SIZE(phy_gbit_features_array),
+			       phy_gbit_all_ports_features);
+
+	/* 10/100 half/full + 1000 half/full + 10G full*/
+	linkmode_set_bit_array(phy_all_ports_features_array,
+			       ARRAY_SIZE(phy_all_ports_features_array),
+			       phy_10gbit_features);
+	linkmode_set_bit_array(phy_10_100_features_array,
+			       ARRAY_SIZE(phy_10_100_features_array),
+			       phy_10gbit_features);
+	linkmode_set_bit_array(phy_gbit_features_array,
+			       ARRAY_SIZE(phy_gbit_features_array),
+			       phy_10gbit_features);
+	linkmode_set_bit_array(phy_10gbit_features_array,
+			       ARRAY_SIZE(phy_10gbit_features_array),
+			       phy_10gbit_features);
+
+	/* 10/100/1000/10G full */
+	linkmode_set_bit_array(phy_all_ports_features_array,
+			       ARRAY_SIZE(phy_all_ports_features_array),
+			       phy_10gbit_full_features);
+	linkmode_set_bit_array(phy_10gbit_full_features_array,
+			       ARRAY_SIZE(phy_10gbit_full_features_array),
+			       phy_10gbit_full_features);
+}
+
 void phy_device_free(struct phy_device *phydev)
 {
 	put_device(&phydev->mdio.dev);
@@ -1936,6 +2080,7 @@ static int phy_probe(struct device *dev)
 	struct phy_device *phydev = to_phy_device(dev);
 	struct device_driver *drv = phydev->mdio.dev.driver;
 	struct phy_driver *phydrv = to_phy_driver(drv);
+	u32 features;
 	int err = 0;
 
 	phydev->drv = phydrv;
@@ -1956,7 +2101,8 @@ static int phy_probe(struct device *dev)
 	 * a controller will attach, and may modify one
 	 * or both of these values
 	 */
-	phydev->supported = phydrv->features;
+	ethtool_convert_link_mode_to_legacy_u32(&features, phydrv->features);
+	phydev->supported = features;
 	of_set_phy_supported(phydev);
 	phydev->advertising = phydev->supported;
 
@@ -1976,10 +2122,14 @@ static int phy_probe(struct device *dev)
 	 * (e.g. hardware erratum) where the driver wants to set only one
 	 * of these bits.
 	 */
-	if (phydrv->features & (SUPPORTED_Pause | SUPPORTED_Asym_Pause)) {
+	if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features) ||
+	    test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, phydrv->features)) {
 		phydev->supported &= ~(SUPPORTED_Pause | SUPPORTED_Asym_Pause);
-		phydev->supported |= phydrv->features &
-				     (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+		if (test_bit(ETHTOOL_LINK_MODE_Pause_BIT, phydrv->features))
+			phydev->supported |= SUPPORTED_Pause;
+		if (test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
+			     phydrv->features))
+			phydev->supported |= SUPPORTED_Asym_Pause;
 	} else {
 		phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 	}
@@ -2092,9 +2242,7 @@ static struct phy_driver genphy_driver = {
 	.name		= "Generic PHY",
 	.soft_reset	= genphy_no_soft_reset,
 	.config_init	= genphy_config_init,
-	.features	= PHY_GBIT_FEATURES | SUPPORTED_MII |
-			  SUPPORTED_AUI | SUPPORTED_FIBRE |
-			  SUPPORTED_BNC,
+	.features	= PHY_GBIT_ALL_PORTS_FEATURES,
 	.aneg_done	= genphy_aneg_done,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
@@ -2109,6 +2257,8 @@ static int __init phy_init(void)
 	if (rc)
 		return rc;
 
+	features_init();
+
 	rc = phy_driver_register(&genphy_10g_driver, THIS_MODULE);
 	if (rc)
 		goto err_10g;
diff --git a/include/linux/linkmode.h b/include/linux/linkmode.h
index 014fb86c7114..22443d7fb5cd 100644
--- a/include/linux/linkmode.h
+++ b/include/linux/linkmode.h
@@ -43,6 +43,15 @@ static inline void linkmode_set_bit(int nr, volatile unsigned long *addr)
 	__set_bit(nr, addr);
 }
 
+static inline void linkmode_set_bit_array(const int *array, int array_size,
+					  unsigned long *addr)
+{
+	int i;
+
+	for (i = 0; i < array_size; i++)
+		linkmode_set_bit(array[i], addr);
+}
+
 static inline void linkmode_clear_bit(int nr, volatile unsigned long *addr)
 {
 	__clear_bit(nr, addr);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 0f6e7bf5e9c5..dff51dd36e52 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -42,13 +42,21 @@
 #define PHY_1000BT_FEATURES	(SUPPORTED_1000baseT_Half | \
 				 SUPPORTED_1000baseT_Full)
 
-#define PHY_BASIC_FEATURES	(PHY_10BT_FEATURES | \
-				 PHY_100BT_FEATURES | \
-				 PHY_DEFAULT_FEATURES)
-
-#define PHY_GBIT_FEATURES	(PHY_BASIC_FEATURES | \
-				 PHY_1000BT_FEATURES)
-
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_basic_t1_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_fibre_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_gbit_all_ports_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_features) __ro_after_init;
+extern __ETHTOOL_DECLARE_LINK_MODE_MASK(phy_10gbit_full_features) __ro_after_init;
+
+#define PHY_BASIC_FEATURES ((unsigned long *)&phy_basic_features)
+#define PHY_BASIC_T1_FEATURES ((unsigned long *)&phy_basic_t1_features)
+#define PHY_GBIT_FEATURES ((unsigned long *)&phy_gbit_features)
+#define PHY_GBIT_FIBRE_FEATURES ((unsigned long *)&phy_gbit_fibre_features)
+#define PHY_GBIT_ALL_PORTS_FEATURES ((unsigned long *)&phy_gbit_all_ports_features)
+#define PHY_10GBIT_FEATURES ((unsigned long *)&phy_10gbit_features)
+#define PHY_10GBIT_FULL_FEATURES ((unsigned long *)&phy_10gbit_full_features)
 
 /*
  * Set phydev->irq to PHY_POLL if interrupts are not supported,
@@ -510,7 +518,7 @@ struct phy_driver {
 	u32 phy_id;
 	char *name;
 	u32 phy_id_mask;
-	u32 features;
+	const unsigned long * const features;
 	u32 flags;
 	const void *driver_data;
 
-- 
cgit v1.2.3


From 9f2959b6b52d43326b2f6a0e0d7ffe6f4fc3b5ca Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 28 Sep 2018 08:51:09 +0200
Subject: net: phy: improve handling delayed work

Using mod_delayed_work() allows to simplify handling delayed work and
removes the need for the sync parameter in phy_trigger_machine().
Also introduce a helper phy_queue_state_machine() to encapsulate the
low-level delayed work calls. No functional change intended.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c | 29 +++++++++++++++--------------
 include/linux/phy.h   |  2 +-
 2 files changed, 16 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index a1f8e4816f72..14509a8903c6 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -537,7 +537,7 @@ out_unlock:
 	mutex_unlock(&phydev->lock);
 
 	if (trigger)
-		phy_trigger_machine(phydev, sync);
+		phy_trigger_machine(phydev);
 
 	return err;
 }
@@ -635,6 +635,13 @@ int phy_speed_up(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(phy_speed_up);
 
+static void phy_queue_state_machine(struct phy_device *phydev,
+				    unsigned int secs)
+{
+	mod_delayed_work(system_power_efficient_wq, &phydev->state_queue,
+			 secs * HZ);
+}
+
 /**
  * phy_start_machine - start PHY state machine tracking
  * @phydev: the phy_device struct
@@ -647,7 +654,7 @@ EXPORT_SYMBOL_GPL(phy_speed_up);
  */
 void phy_start_machine(struct phy_device *phydev)
 {
-	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, HZ);
+	phy_queue_state_machine(phydev, 1);
 }
 EXPORT_SYMBOL_GPL(phy_start_machine);
 
@@ -655,19 +662,14 @@ EXPORT_SYMBOL_GPL(phy_start_machine);
  * phy_trigger_machine - trigger the state machine to run
  *
  * @phydev: the phy_device struct
- * @sync: indicate whether we should wait for the workqueue cancelation
  *
  * Description: There has been a change in state which requires that the
  *   state machine runs.
  */
 
-void phy_trigger_machine(struct phy_device *phydev, bool sync)
+void phy_trigger_machine(struct phy_device *phydev)
 {
-	if (sync)
-		cancel_delayed_work_sync(&phydev->state_queue);
-	else
-		cancel_delayed_work(&phydev->state_queue);
-	queue_delayed_work(system_power_efficient_wq, &phydev->state_queue, 0);
+	phy_queue_state_machine(phydev, 0);
 }
 
 /**
@@ -703,7 +705,7 @@ static void phy_error(struct phy_device *phydev)
 	phydev->state = PHY_HALTED;
 	mutex_unlock(&phydev->lock);
 
-	phy_trigger_machine(phydev, false);
+	phy_trigger_machine(phydev);
 }
 
 /**
@@ -745,7 +747,7 @@ static irqreturn_t phy_change(struct phy_device *phydev)
 	mutex_unlock(&phydev->lock);
 
 	/* reschedule state queue work to run as soon as possible */
-	phy_trigger_machine(phydev, true);
+	phy_trigger_machine(phydev);
 
 	if (phy_interrupt_is_valid(phydev) && phy_clear_interrupt(phydev))
 		goto phy_err;
@@ -911,7 +913,7 @@ void phy_start(struct phy_device *phydev)
 	}
 	mutex_unlock(&phydev->lock);
 
-	phy_trigger_machine(phydev, true);
+	phy_trigger_machine(phydev);
 }
 EXPORT_SYMBOL(phy_start);
 
@@ -1130,8 +1132,7 @@ void phy_state_machine(struct work_struct *work)
 	 * called from phy_disconnect() synchronously.
 	 */
 	if (phy_polling_mode(phydev) && old_state != PHY_HALTED)
-		queue_delayed_work(system_power_efficient_wq, &phydev->state_queue,
-				   PHY_STATE_TIME * HZ);
+		phy_queue_state_machine(phydev, PHY_STATE_TIME);
 }
 
 /**
diff --git a/include/linux/phy.h b/include/linux/phy.h
index dff51dd36e52..3ea87f774a76 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1054,7 +1054,7 @@ void phy_change_work(struct work_struct *work);
 void phy_mac_interrupt(struct phy_device *phydev);
 void phy_start_machine(struct phy_device *phydev);
 void phy_stop_machine(struct phy_device *phydev);
-void phy_trigger_machine(struct phy_device *phydev, bool sync);
+void phy_trigger_machine(struct phy_device *phydev);
 int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
 void phy_ethtool_ksettings_get(struct phy_device *phydev,
 			       struct ethtool_link_ksettings *cmd);
-- 
cgit v1.2.3


From efaffc5e40aeced0bcb497ed7a0a5b8c14abfcdf Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Mon, 1 Oct 2018 11:05:24 +0100
Subject: mm, sched/numa: Remove rate-limiting of automatic NUMA balancing
 migration

Rate limiting of page migrations due to automatic NUMA balancing was
introduced to mitigate the worst-case scenario of migrating at high
frequency due to false sharing or slowly ping-ponging between nodes.
Since then, a lot of effort was spent on correctly identifying these
pages and avoiding unnecessary migrations and the safety net may no longer
be required.

Jirka Hladky reported a regression in 4.17 due to a scheduler patch that
avoids spreading STREAM tasks wide prematurely. However, once the task
was properly placed, it delayed migrating the memory due to rate limiting.
Increasing the limit fixed the problem for him.

Currently, the limit is hard-coded and does not account for the real
capabilities of the hardware. Even if an estimate was attempted, it would
not properly account for the number of memory controllers and it could
not account for the amount of bandwidth used for normal accesses. Rather
than fudging, this patch simply eliminates the rate limiting.

However, Jirka reports that a STREAM configuration using multiple
processes achieved similar performance to 4.16. In local tests, this patch
improved performance of STREAM relative to the baseline but it is somewhat
machine-dependent. Most workloads show little or not performance difference
implying that there is not a heavily reliance on the throttling mechanism
and it is safe to remove.

STREAM on 2-socket machine
                         4.19.0-rc5             4.19.0-rc5
                         numab-v1r1       noratelimit-v1r1
MB/sec copy     43298.52 (   0.00%)    44673.38 (   3.18%)
MB/sec scale    30115.06 (   0.00%)    31293.06 (   3.91%)
MB/sec add      32825.12 (   0.00%)    34883.62 (   6.27%)
MB/sec triad    32549.52 (   0.00%)    34906.60 (   7.24%

Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Rik van Riel <riel@surriel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Jirka Hladky <jhladky@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux-MM <linux-mm@kvack.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181001100525.29789-2-mgorman@techsingularity.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mmzone.h         |  6 ----
 include/trace/events/migrate.h | 27 ------------------
 mm/migrate.c                   | 65 ------------------------------------------
 mm/page_alloc.c                |  2 --
 4 files changed, 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1e22d96734e0..3f4c0b167333 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -671,12 +671,6 @@ typedef struct pglist_data {
 #ifdef CONFIG_NUMA_BALANCING
 	/* Lock serializing the migrate rate limiting window */
 	spinlock_t numabalancing_migrate_lock;
-
-	/* Rate limiting time interval */
-	unsigned long numabalancing_migrate_next_window;
-
-	/* Number of pages migrated during the rate limiting time interval */
-	unsigned long numabalancing_migrate_nr_pages;
 #endif
 	/*
 	 * This is a per-node reserve of pages that are not available
diff --git a/include/trace/events/migrate.h b/include/trace/events/migrate.h
index 711372845945..705b33d1e395 100644
--- a/include/trace/events/migrate.h
+++ b/include/trace/events/migrate.h
@@ -70,33 +70,6 @@ TRACE_EVENT(mm_migrate_pages,
 		__print_symbolic(__entry->mode, MIGRATE_MODE),
 		__print_symbolic(__entry->reason, MIGRATE_REASON))
 );
-
-TRACE_EVENT(mm_numa_migrate_ratelimit,
-
-	TP_PROTO(struct task_struct *p, int dst_nid, unsigned long nr_pages),
-
-	TP_ARGS(p, dst_nid, nr_pages),
-
-	TP_STRUCT__entry(
-		__array(	char,		comm,	TASK_COMM_LEN)
-		__field(	pid_t,		pid)
-		__field(	int,		dst_nid)
-		__field(	unsigned long,	nr_pages)
-	),
-
-	TP_fast_assign(
-		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
-		__entry->pid		= p->pid;
-		__entry->dst_nid	= dst_nid;
-		__entry->nr_pages	= nr_pages;
-	),
-
-	TP_printk("comm=%s pid=%d dst_nid=%d nr_pages=%lu",
-		__entry->comm,
-		__entry->pid,
-		__entry->dst_nid,
-		__entry->nr_pages)
-);
 #endif /* _TRACE_MIGRATE_H */
 
 /* This part must be outside protection */
diff --git a/mm/migrate.c b/mm/migrate.c
index 4f1d894835b5..5e285c1249a0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1855,54 +1855,6 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
 	return newpage;
 }
 
-/*
- * page migration rate limiting control.
- * Do not migrate more than @pages_to_migrate in a @migrate_interval_millisecs
- * window of time. Default here says do not migrate more than 1280M per second.
- */
-static unsigned int migrate_interval_millisecs __read_mostly = 100;
-static unsigned int ratelimit_pages __read_mostly = 128 << (20 - PAGE_SHIFT);
-
-/* Returns true if the node is migrate rate-limited after the update */
-static bool numamigrate_update_ratelimit(pg_data_t *pgdat,
-					unsigned long nr_pages)
-{
-	unsigned long next_window, interval;
-
-	next_window = READ_ONCE(pgdat->numabalancing_migrate_next_window);
-	interval = msecs_to_jiffies(migrate_interval_millisecs);
-
-	/*
-	 * Rate-limit the amount of data that is being migrated to a node.
-	 * Optimal placement is no good if the memory bus is saturated and
-	 * all the time is being spent migrating!
-	 */
-	if (time_after(jiffies, next_window) &&
-			spin_trylock(&pgdat->numabalancing_migrate_lock)) {
-		pgdat->numabalancing_migrate_nr_pages = 0;
-		do {
-			next_window += interval;
-		} while (unlikely(time_after(jiffies, next_window)));
-
-		WRITE_ONCE(pgdat->numabalancing_migrate_next_window, next_window);
-		spin_unlock(&pgdat->numabalancing_migrate_lock);
-	}
-	if (pgdat->numabalancing_migrate_nr_pages > ratelimit_pages) {
-		trace_mm_numa_migrate_ratelimit(current, pgdat->node_id,
-								nr_pages);
-		return true;
-	}
-
-	/*
-	 * This is an unlocked non-atomic update so errors are possible.
-	 * The consequences are failing to migrate when we potentiall should
-	 * have which is not severe enough to warrant locking. If it is ever
-	 * a problem, it can be converted to a per-cpu counter.
-	 */
-	pgdat->numabalancing_migrate_nr_pages += nr_pages;
-	return false;
-}
-
 static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
 {
 	int page_lru;
@@ -1975,14 +1927,6 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
 	if (page_is_file_cache(page) && PageDirty(page))
 		goto out;
 
-	/*
-	 * Rate-limit the amount of data that is being migrated to a node.
-	 * Optimal placement is no good if the memory bus is saturated and
-	 * all the time is being spent migrating!
-	 */
-	if (numamigrate_update_ratelimit(pgdat, 1))
-		goto out;
-
 	isolated = numamigrate_isolate_page(pgdat, page);
 	if (!isolated)
 		goto out;
@@ -2029,14 +1973,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 	unsigned long mmun_start = address & HPAGE_PMD_MASK;
 	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
 
-	/*
-	 * Rate-limit the amount of data that is being migrated to a node.
-	 * Optimal placement is no good if the memory bus is saturated and
-	 * all the time is being spent migrating!
-	 */
-	if (numamigrate_update_ratelimit(pgdat, HPAGE_PMD_NR))
-		goto out_dropref;
-
 	new_page = alloc_pages_node(node,
 		(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
 		HPAGE_PMD_ORDER);
@@ -2133,7 +2069,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
 
 out_fail:
 	count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
-out_dropref:
 	ptl = pmd_lock(mm, pmd);
 	if (pmd_same(*pmd, entry)) {
 		entry = pmd_modify(entry, vma->vm_page_prot);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 89d2a2ab3fe6..706a738c0aee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6197,8 +6197,6 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 static void pgdat_init_numabalancing(struct pglist_data *pgdat)
 {
 	spin_lock_init(&pgdat->numabalancing_migrate_lock);
-	pgdat->numabalancing_migrate_nr_pages = 0;
-	pgdat->numabalancing_migrate_next_window = jiffies;
 }
 #else
 static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}
-- 
cgit v1.2.3


From 11e37d357f6ba7a9af850a872396082cc0a0001f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 27 Jul 2018 13:38:54 +0100
Subject: irqchip/gic-v3-its: Move pending table allocation to init time

Pending tables for the redistributors are currently allocated
one at a time as each CPU boots. This is causing some grief
for Linux/RT (allocation from within a CPU hotplug notifier is
frown upon).

Let's move this allocation to take place at init time, when we
only have a single CPU. It means we're allocating memory for CPUs
that are not online yet, but most system will boot all of their
CPUs anyway, so that's not completely wasted.

Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Tested-by: Bhupesh Sharma <bhsharma@redhat.com>
Tested-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3-its.c   | 80 +++++++++++++++++++++++++-------------
 include/linux/irqchip/arm-gic-v3.h |  1 +
 2 files changed, 53 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index cb59a4d513c9..02196682821b 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -173,6 +173,7 @@ static DEFINE_RAW_SPINLOCK(vmovp_lock);
 static DEFINE_IDA(its_vpeid_ida);
 
 #define gic_data_rdist()		(raw_cpu_ptr(gic_rdists->rdist))
+#define gic_data_rdist_cpu(cpu)		(per_cpu_ptr(gic_rdists->rdist, cpu))
 #define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
 #define gic_data_rdist_vlpi_base()	(gic_data_rdist_rd_base() + SZ_128K)
 
@@ -1625,7 +1626,7 @@ static void its_free_prop_table(struct page *prop_page)
 		   get_order(LPI_PROPBASE_SZ));
 }
 
-static int __init its_alloc_lpi_tables(void)
+static int __init its_setup_lpi_prop_table(void)
 {
 	phys_addr_t paddr;
 
@@ -1944,30 +1945,47 @@ static void its_free_pending_table(struct page *pt)
 	free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ));
 }
 
-static void its_cpu_init_lpis(void)
+static int __init allocate_lpi_tables(void)
 {
-	void __iomem *rbase = gic_data_rdist_rd_base();
-	struct page *pend_page;
-	u64 val, tmp;
+	int err, cpu;
 
-	/* If we didn't allocate the pending table yet, do it now */
-	pend_page = gic_data_rdist()->pend_page;
-	if (!pend_page) {
-		phys_addr_t paddr;
+	err = its_setup_lpi_prop_table();
+	if (err)
+		return err;
+
+	/*
+	 * We allocate all the pending tables anyway, as we may have a
+	 * mix of RDs that have had LPIs enabled, and some that
+	 * don't. We'll free the unused ones as each CPU comes online.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct page *pend_page;
 
 		pend_page = its_allocate_pending_table(GFP_NOWAIT);
 		if (!pend_page) {
-			pr_err("Failed to allocate PENDBASE for CPU%d\n",
-			       smp_processor_id());
-			return;
+			pr_err("Failed to allocate PENDBASE for CPU%d\n", cpu);
+			return -ENOMEM;
 		}
 
-		paddr = page_to_phys(pend_page);
-		pr_info("CPU%d: using LPI pending table @%pa\n",
-			smp_processor_id(), &paddr);
-		gic_data_rdist()->pend_page = pend_page;
+		gic_data_rdist_cpu(cpu)->pend_page = pend_page;
 	}
 
+	return 0;
+}
+
+static void its_cpu_init_lpis(void)
+{
+	void __iomem *rbase = gic_data_rdist_rd_base();
+	struct page *pend_page;
+	phys_addr_t paddr;
+	u64 val, tmp;
+
+	if (gic_data_rdist()->lpi_enabled)
+		return;
+
+	pend_page = gic_data_rdist()->pend_page;
+	paddr = page_to_phys(pend_page);
+
 	/* set PROPBASE */
 	val = (page_to_phys(gic_rdists->prop_page) |
 	       GICR_PROPBASER_InnerShareable |
@@ -2019,6 +2037,10 @@ static void its_cpu_init_lpis(void)
 
 	/* Make sure the GIC has seen the above */
 	dsb(sy);
+	gic_data_rdist()->lpi_enabled = true;
+	pr_info("GICv3: CPU%d: using LPI pending table @%pa\n",
+		smp_processor_id(),
+		&paddr);
 }
 
 static void its_cpu_init_collection(struct its_node *its)
@@ -3497,16 +3519,6 @@ static int redist_disable_lpis(void)
 	u64 timeout = USEC_PER_SEC;
 	u64 val;
 
-	/*
-	 * If coming via a CPU hotplug event, we don't need to disable
-	 * LPIs before trying to re-enable them. They are already
-	 * configured and all is well in the world. Detect this case
-	 * by checking the allocation of the pending table for the
-	 * current CPU.
-	 */
-	if (gic_data_rdist()->pend_page)
-		return 0;
-
 	if (!gic_rdists_supports_plpis()) {
 		pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
 		return -ENXIO;
@@ -3516,7 +3528,18 @@ static int redist_disable_lpis(void)
 	if (!(val & GICR_CTLR_ENABLE_LPIS))
 		return 0;
 
-	pr_warn("CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
+	/*
+	 * If coming via a CPU hotplug event, we don't need to disable
+	 * LPIs before trying to re-enable them. They are already
+	 * configured and all is well in the world.
+	 */
+	if (gic_data_rdist()->lpi_enabled)
+		return 0;
+
+	/*
+	 * From that point on, we only try to do some damage control.
+	 */
+	pr_warn("GICv3: CPU%d: Booted with LPIs enabled, memory probably corrupted\n",
 		smp_processor_id());
 	add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
 
@@ -3772,7 +3795,8 @@ int __init its_init(struct fwnode_handle *handle, struct rdists *rdists,
 	}
 
 	gic_rdists = rdists;
-	err = its_alloc_lpi_tables();
+
+	err = allocate_lpi_tables();
 	if (err)
 		return err;
 
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 8bdbb5f29494..266093e845bb 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -585,6 +585,7 @@ struct rdists {
 		void __iomem	*rd_base;
 		struct page	*pend_page;
 		phys_addr_t	phys_base;
+		bool		lpi_enabled;
 	} __percpu		*rdist;
 	struct page		*prop_page;
 	u64			flags;
-- 
cgit v1.2.3


From e1a2e2010ba9d3c765b2e37a7ae8b332564716f1 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 27 Jul 2018 14:36:00 +0100
Subject: irqchip/gic-v3-its: Keep track of property table's PA and VA

We're currently only tracking the page allocated to contain the
property table by its struct page. In the future, it is going to
be convenient to track both PA and VA for that page instead. Let's
do that.

Tested-by: Jeremy Linton <jeremy.linton@arm.com>
Tested-by: Bhupesh Sharma <bhsharma@redhat.com>
Tested-by: Lei Zhang <zhang.lei@jp.fujitsu.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3-its.c   | 23 +++++++++++++----------
 include/linux/irqchip/arm-gic-v3.h |  3 ++-
 2 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 02196682821b..9f26445eee4a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1029,7 +1029,7 @@ static inline u32 its_get_event_id(struct irq_data *d)
 static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
 {
 	irq_hw_number_t hwirq;
-	struct page *prop_page;
+	void *va;
 	u8 *cfg;
 
 	if (irqd_is_forwarded_to_vcpu(d)) {
@@ -1037,7 +1037,7 @@ static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
 		u32 event = its_get_event_id(d);
 		struct its_vlpi_map *map;
 
-		prop_page = its_dev->event_map.vm->vprop_page;
+		va = page_address(its_dev->event_map.vm->vprop_page);
 		map = &its_dev->event_map.vlpi_maps[event];
 		hwirq = map->vintid;
 
@@ -1045,11 +1045,11 @@ static void lpi_write_config(struct irq_data *d, u8 clr, u8 set)
 		map->properties &= ~clr;
 		map->properties |= set | LPI_PROP_GROUP1;
 	} else {
-		prop_page = gic_rdists->prop_page;
+		va = gic_rdists->prop_table_va;
 		hwirq = d->hwirq;
 	}
 
-	cfg = page_address(prop_page) + hwirq - 8192;
+	cfg = va + hwirq - 8192;
 	*cfg &= ~clr;
 	*cfg |= set | LPI_PROP_GROUP1;
 
@@ -1628,18 +1628,21 @@ static void its_free_prop_table(struct page *prop_page)
 
 static int __init its_setup_lpi_prop_table(void)
 {
-	phys_addr_t paddr;
+	struct page *page;
 
 	lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
 				ITS_MAX_LPI_NRBITS);
-	gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
-	if (!gic_rdists->prop_page) {
+	page = its_allocate_prop_table(GFP_NOWAIT);
+	if (!page) {
 		pr_err("Failed to allocate PROPBASE\n");
 		return -ENOMEM;
 	}
 
-	paddr = page_to_phys(gic_rdists->prop_page);
-	pr_info("GIC: using LPI property table @%pa\n", &paddr);
+	gic_rdists->prop_table_pa = page_to_phys(page);
+	gic_rdists->prop_table_va = page_address(page);
+
+	pr_info("GICv3: using LPI property table @%pa\n",
+		&gic_rdists->prop_table_pa);
 
 	return its_lpi_init(lpi_id_bits);
 }
@@ -1987,7 +1990,7 @@ static void its_cpu_init_lpis(void)
 	paddr = page_to_phys(pend_page);
 
 	/* set PROPBASE */
-	val = (page_to_phys(gic_rdists->prop_page) |
+	val = (gic_rdists->prop_table_pa |
 	       GICR_PROPBASER_InnerShareable |
 	       GICR_PROPBASER_RaWaWb |
 	       ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK));
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 266093e845bb..c2a7b863fc2e 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -587,7 +587,8 @@ struct rdists {
 		phys_addr_t	phys_base;
 		bool		lpi_enabled;
 	} __percpu		*rdist;
-	struct page		*prop_page;
+	phys_addr_t		prop_table_pa;
+	void			*prop_table_va;
 	u64			flags;
 	u32			gicd_typer;
 	bool			has_vlpis;
-- 
cgit v1.2.3


From 1f83515bebc236d2acda59976a8e852f1a6d50b7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 1 Oct 2018 16:13:45 +0200
Subject: genirq/msi: Allow creation of a tree-based irqdomain for platform-msi

platform_msi_create_device_domain() always creates a revmap-based
irqdomain, which has the drawback of requiring the number of MSIs
that can be allocated ahead of time. This is not always possible,
and we sometimes need to use a tree-based irqdomain instead.

Add a new platform_msi_create_device_tree_domain() helper to
that effect.

Reported-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/base/platform-msi.c | 14 ++++++++------
 include/linux/msi.h         | 17 ++++++++++++-----
 2 files changed, 20 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 60d6cc618f1c..f39a920496fb 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -321,11 +321,12 @@ void *platform_msi_get_host_data(struct irq_domain *domain)
  * Returns an irqdomain for @nvec interrupts
  */
 struct irq_domain *
-platform_msi_create_device_domain(struct device *dev,
-				  unsigned int nvec,
-				  irq_write_msi_msg_t write_msi_msg,
-				  const struct irq_domain_ops *ops,
-				  void *host_data)
+__platform_msi_create_device_domain(struct device *dev,
+				    unsigned int nvec,
+				    bool is_tree,
+				    irq_write_msi_msg_t write_msi_msg,
+				    const struct irq_domain_ops *ops,
+				    void *host_data)
 {
 	struct platform_msi_priv_data *data;
 	struct irq_domain *domain;
@@ -336,7 +337,8 @@ platform_msi_create_device_domain(struct device *dev,
 		return NULL;
 
 	data->host_data = host_data;
-	domain = irq_domain_create_hierarchy(dev->msi_domain, 0, nvec,
+	domain = irq_domain_create_hierarchy(dev->msi_domain, 0,
+					     is_tree ? 0 : nvec,
 					     dev->fwnode, ops, data);
 	if (!domain)
 		goto free_priv;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 5839d8062dfc..0e9c50052ff3 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -317,11 +317,18 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev,
 int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev,
 			     int virq, int nvec, msi_alloc_info_t *args);
 struct irq_domain *
-platform_msi_create_device_domain(struct device *dev,
-				  unsigned int nvec,
-				  irq_write_msi_msg_t write_msi_msg,
-				  const struct irq_domain_ops *ops,
-				  void *host_data);
+__platform_msi_create_device_domain(struct device *dev,
+				    unsigned int nvec,
+				    bool is_tree,
+				    irq_write_msi_msg_t write_msi_msg,
+				    const struct irq_domain_ops *ops,
+				    void *host_data);
+
+#define platform_msi_create_device_domain(dev, nvec, write, ops, data)	\
+	__platform_msi_create_device_domain(dev, nvec, false, write, ops, data)
+#define platform_msi_create_device_tree_domain(dev, nvec, write, ops, data) \
+	__platform_msi_create_device_domain(dev, nvec, true, write, ops, data)
+
 int platform_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
 			      unsigned int nr_irqs);
 void platform_msi_domain_free(struct irq_domain *domain, unsigned int virq,
-- 
cgit v1.2.3


From 61ce8d8d8a8168a714de80fc31b3d6ac160fc5b0 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Mon, 1 Oct 2018 16:13:51 +0200
Subject: irqchip/irq-mvebu-sei: Add new driver for Marvell SEI

This is a cascaded interrupt controller in the AP806 GIC that collapses
SEIs (System Error Interrupt) coming from the AP and the CPs (through
the ICU).

The SEI handles up to 64 interrupts. The first 21 interrupts are wired
from the AP. The next 43 interrupts are from the CPs and are triggered
through MSI messages. To handle this complexity, the driver has to
declare to the upper layer: one IRQ domain for the wired interrupts,
one IRQ domain for the MSIs; and acts as a MSI controller ('parent')
by declaring an MSI domain.

Suggested-by: Haim Boot <hayim@marvell.com>
Suggested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/Kconfig         |   3 +
 drivers/irqchip/Makefile        |   1 +
 drivers/irqchip/irq-mvebu-sei.c | 507 ++++++++++++++++++++++++++++++++++++++++
 include/linux/irqdomain.h       |   1 +
 4 files changed, 512 insertions(+)
 create mode 100644 drivers/irqchip/irq-mvebu-sei.c

(limited to 'include/linux')

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 383e7b70221d..96451b581452 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -310,6 +310,9 @@ config MVEBU_ODMI
 config MVEBU_PIC
 	bool
 
+config MVEBU_SEI
+        bool
+
 config LS_SCFG_MSI
 	def_bool y if SOC_LS1021A || ARCH_LAYERSCAPE
 	depends on PCI && PCI_MSI
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index fbd1ec8070ef..b822199445ff 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_MVEBU_GICP)		+= irq-mvebu-gicp.o
 obj-$(CONFIG_MVEBU_ICU)			+= irq-mvebu-icu.o
 obj-$(CONFIG_MVEBU_ODMI)		+= irq-mvebu-odmi.o
 obj-$(CONFIG_MVEBU_PIC)			+= irq-mvebu-pic.o
+obj-$(CONFIG_MVEBU_SEI)			+= irq-mvebu-sei.o
 obj-$(CONFIG_LS_SCFG_MSI)		+= irq-ls-scfg-msi.o
 obj-$(CONFIG_EZNPS_GIC)			+= irq-eznps.o
 obj-$(CONFIG_ARCH_ASPEED)		+= irq-aspeed-vic.o irq-aspeed-i2c-ic.o
diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c
new file mode 100644
index 000000000000..566d69a2edbc
--- /dev/null
+++ b/drivers/irqchip/irq-mvebu-sei.c
@@ -0,0 +1,507 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "mvebu-sei: " fmt
+
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/platform_device.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+
+/* Cause register */
+#define GICP_SECR(idx)		(0x0  + ((idx) * 0x4))
+/* Mask register */
+#define GICP_SEMR(idx)		(0x20 + ((idx) * 0x4))
+#define GICP_SET_SEI_OFFSET	0x30
+
+#define SEI_IRQ_COUNT_PER_REG	32
+#define SEI_IRQ_REG_COUNT	2
+#define SEI_IRQ_COUNT		(SEI_IRQ_COUNT_PER_REG * SEI_IRQ_REG_COUNT)
+#define SEI_IRQ_REG_IDX(irq_id)	((irq_id) / SEI_IRQ_COUNT_PER_REG)
+#define SEI_IRQ_REG_BIT(irq_id)	((irq_id) % SEI_IRQ_COUNT_PER_REG)
+
+struct mvebu_sei_interrupt_range {
+	u32 first;
+	u32 size;
+};
+
+struct mvebu_sei_caps {
+	struct mvebu_sei_interrupt_range ap_range;
+	struct mvebu_sei_interrupt_range cp_range;
+};
+
+struct mvebu_sei {
+	struct device *dev;
+	void __iomem *base;
+	struct resource *res;
+	struct irq_domain *sei_domain;
+	struct irq_domain *ap_domain;
+	struct irq_domain *cp_domain;
+	const struct mvebu_sei_caps *caps;
+
+	/* Lock on MSI allocations/releases */
+	struct mutex cp_msi_lock;
+	DECLARE_BITMAP(cp_msi_bitmap, SEI_IRQ_COUNT);
+
+	/* Lock on IRQ masking register */
+	raw_spinlock_t mask_lock;
+};
+
+static void mvebu_sei_ack_irq(struct irq_data *d)
+{
+	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
+	u32 reg_idx = SEI_IRQ_REG_IDX(d->hwirq);
+
+	writel_relaxed(BIT(SEI_IRQ_REG_BIT(d->hwirq)),
+		       sei->base + GICP_SECR(reg_idx));
+}
+
+static void mvebu_sei_mask_irq(struct irq_data *d)
+{
+	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
+	u32 reg, reg_idx = SEI_IRQ_REG_IDX(d->hwirq);
+	unsigned long flags;
+
+	/* 1 disables the interrupt */
+	raw_spin_lock_irqsave(&sei->mask_lock, flags);
+	reg = readl_relaxed(sei->base + GICP_SEMR(reg_idx));
+	reg |= BIT(SEI_IRQ_REG_BIT(d->hwirq));
+	writel_relaxed(reg, sei->base + GICP_SEMR(reg_idx));
+	raw_spin_unlock_irqrestore(&sei->mask_lock, flags);
+}
+
+static void mvebu_sei_unmask_irq(struct irq_data *d)
+{
+	struct mvebu_sei *sei = irq_data_get_irq_chip_data(d);
+	u32 reg, reg_idx = SEI_IRQ_REG_IDX(d->hwirq);
+	unsigned long flags;
+
+	/* 0 enables the interrupt */
+	raw_spin_lock_irqsave(&sei->mask_lock, flags);
+	reg = readl_relaxed(sei->base + GICP_SEMR(reg_idx));
+	reg &= ~BIT(SEI_IRQ_REG_BIT(d->hwirq));
+	writel_relaxed(reg, sei->base + GICP_SEMR(reg_idx));
+	raw_spin_unlock_irqrestore(&sei->mask_lock, flags);
+}
+
+static int mvebu_sei_set_affinity(struct irq_data *d,
+				  const struct cpumask *mask_val,
+				  bool force)
+{
+	return -EINVAL;
+}
+
+static int mvebu_sei_set_irqchip_state(struct irq_data *d,
+				       enum irqchip_irq_state which,
+				       bool state)
+{
+	/* We can only clear the pending state by acking the interrupt */
+	if (which != IRQCHIP_STATE_PENDING || state)
+		return -EINVAL;
+
+	mvebu_sei_ack_irq(d);
+	return 0;
+}
+
+static struct irq_chip mvebu_sei_irq_chip = {
+	.name			= "SEI",
+	.irq_ack		= mvebu_sei_ack_irq,
+	.irq_mask		= mvebu_sei_mask_irq,
+	.irq_unmask		= mvebu_sei_unmask_irq,
+	.irq_set_affinity       = mvebu_sei_set_affinity,
+	.irq_set_irqchip_state	= mvebu_sei_set_irqchip_state,
+};
+
+static int mvebu_sei_ap_set_type(struct irq_data *data, unsigned int type)
+{
+	if ((type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_LEVEL_HIGH)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct irq_chip mvebu_sei_ap_irq_chip = {
+	.name			= "AP SEI",
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_set_affinity       = irq_chip_set_affinity_parent,
+	.irq_set_type		= mvebu_sei_ap_set_type,
+};
+
+static void mvebu_sei_cp_compose_msi_msg(struct irq_data *data,
+					 struct msi_msg *msg)
+{
+	struct mvebu_sei *sei = data->chip_data;
+	phys_addr_t set = sei->res->start + GICP_SET_SEI_OFFSET;
+
+	msg->data = data->hwirq + sei->caps->cp_range.first;
+	msg->address_lo = lower_32_bits(set);
+	msg->address_hi = upper_32_bits(set);
+}
+
+static int mvebu_sei_cp_set_type(struct irq_data *data, unsigned int type)
+{
+	if ((type & IRQ_TYPE_SENSE_MASK) != IRQ_TYPE_EDGE_RISING)
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct irq_chip mvebu_sei_cp_irq_chip = {
+	.name			= "CP SEI",
+	.irq_ack		= irq_chip_ack_parent,
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_set_affinity       = irq_chip_set_affinity_parent,
+	.irq_set_type		= mvebu_sei_cp_set_type,
+	.irq_compose_msi_msg	= mvebu_sei_cp_compose_msi_msg,
+};
+
+static int mvebu_sei_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs, void *arg)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_fwspec *fwspec = arg;
+
+	/* Not much to do, just setup the irqdata */
+	irq_domain_set_hwirq_and_chip(domain, virq, fwspec->param[0],
+				      &mvebu_sei_irq_chip, sei);
+
+	return 0;
+}
+
+static void mvebu_sei_domain_free(struct irq_domain *domain, unsigned int virq,
+				  unsigned int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *d = irq_domain_get_irq_data(domain, virq + i);
+		irq_set_handler(virq + i, NULL);
+		irq_domain_reset_irq_data(d);
+	}
+}
+
+static const struct irq_domain_ops mvebu_sei_domain_ops = {
+	.alloc	= mvebu_sei_domain_alloc,
+	.free	= mvebu_sei_domain_free,
+};
+
+static int mvebu_sei_ap_translate(struct irq_domain *domain,
+				  struct irq_fwspec *fwspec,
+				  unsigned long *hwirq,
+				  unsigned int *type)
+{
+	*hwirq = fwspec->param[0];
+	*type  = IRQ_TYPE_LEVEL_HIGH;
+
+	return 0;
+}
+
+static int mvebu_sei_ap_alloc(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs, void *arg)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_fwspec fwspec;
+	unsigned long hwirq;
+	unsigned int type;
+	int err;
+
+	mvebu_sei_ap_translate(domain, arg, &hwirq, &type);
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 1;
+	fwspec.param[0] = hwirq + sei->caps->ap_range.first;
+
+	err = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (err)
+		return err;
+
+	irq_domain_set_info(domain, virq, hwirq,
+			    &mvebu_sei_ap_irq_chip, sei,
+			    handle_level_irq, NULL, NULL);
+	irq_set_probe(virq);
+
+	return 0;
+}
+
+static const struct irq_domain_ops mvebu_sei_ap_domain_ops = {
+	.translate	= mvebu_sei_ap_translate,
+	.alloc		= mvebu_sei_ap_alloc,
+	.free		= irq_domain_free_irqs_parent,
+};
+
+static void mvebu_sei_cp_release_irq(struct mvebu_sei *sei, unsigned long hwirq)
+{
+	mutex_lock(&sei->cp_msi_lock);
+	clear_bit(hwirq, sei->cp_msi_bitmap);
+	mutex_unlock(&sei->cp_msi_lock);
+}
+
+static int mvebu_sei_cp_domain_alloc(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs,
+				     void *args)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_fwspec fwspec;
+	unsigned long hwirq;
+	int ret;
+
+	/* The software only supports single allocations for now */
+	if (nr_irqs != 1)
+		return -ENOTSUPP;
+
+	mutex_lock(&sei->cp_msi_lock);
+	hwirq = find_first_zero_bit(sei->cp_msi_bitmap,
+				    sei->caps->cp_range.size);
+	if (hwirq < sei->caps->cp_range.size)
+		set_bit(hwirq, sei->cp_msi_bitmap);
+	mutex_unlock(&sei->cp_msi_lock);
+
+	if (hwirq == sei->caps->cp_range.size)
+		return -ENOSPC;
+
+	fwspec.fwnode = domain->parent->fwnode;
+	fwspec.param_count = 1;
+	fwspec.param[0] = hwirq + sei->caps->cp_range.first;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec);
+	if (ret)
+		goto free_irq;
+
+	irq_domain_set_info(domain, virq, hwirq,
+			    &mvebu_sei_cp_irq_chip, sei,
+			    handle_edge_irq, NULL, NULL);
+
+	return 0;
+
+free_irq:
+	mvebu_sei_cp_release_irq(sei, hwirq);
+	return ret;
+}
+
+static void mvebu_sei_cp_domain_free(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs)
+{
+	struct mvebu_sei *sei = domain->host_data;
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+
+	if (nr_irqs != 1 || d->hwirq >= sei->caps->cp_range.size) {
+		dev_err(sei->dev, "Invalid hwirq %lu\n", d->hwirq);
+		return;
+	}
+
+	mvebu_sei_cp_release_irq(sei, d->hwirq);
+	irq_domain_free_irqs_parent(domain, virq, 1);
+}
+
+static const struct irq_domain_ops mvebu_sei_cp_domain_ops = {
+	.alloc	= mvebu_sei_cp_domain_alloc,
+	.free	= mvebu_sei_cp_domain_free,
+};
+
+static struct irq_chip mvebu_sei_msi_irq_chip = {
+	.name		= "SEI pMSI",
+	.irq_ack	= irq_chip_ack_parent,
+	.irq_set_type	= irq_chip_set_type_parent,
+};
+
+static struct msi_domain_ops mvebu_sei_msi_ops = {
+};
+
+static struct msi_domain_info mvebu_sei_msi_domain_info = {
+	.flags	= MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS,
+	.ops	= &mvebu_sei_msi_ops,
+	.chip	= &mvebu_sei_msi_irq_chip,
+};
+
+static void mvebu_sei_handle_cascade_irq(struct irq_desc *desc)
+{
+	struct mvebu_sei *sei = irq_desc_get_handler_data(desc);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+	u32 idx;
+
+	chained_irq_enter(chip, desc);
+
+	for (idx = 0; idx < SEI_IRQ_REG_COUNT; idx++) {
+		unsigned long irqmap;
+		int bit;
+
+		irqmap = readl_relaxed(sei->base + GICP_SECR(idx));
+		for_each_set_bit(bit, &irqmap, SEI_IRQ_COUNT_PER_REG) {
+			unsigned long hwirq;
+			unsigned int virq;
+
+			hwirq = idx * SEI_IRQ_COUNT_PER_REG + bit;
+			virq = irq_find_mapping(sei->sei_domain, hwirq);
+			if (likely(virq)) {
+				generic_handle_irq(virq);
+				continue;
+			}
+
+			dev_warn(sei->dev,
+				 "Spurious IRQ detected (hwirq %lu)\n", hwirq);
+		}
+	}
+
+	chained_irq_exit(chip, desc);
+}
+
+static void mvebu_sei_reset(struct mvebu_sei *sei)
+{
+	u32 reg_idx;
+
+	/* Clear IRQ cause registers, mask all interrupts */
+	for (reg_idx = 0; reg_idx < SEI_IRQ_REG_COUNT; reg_idx++) {
+		writel_relaxed(0xFFFFFFFF, sei->base + GICP_SECR(reg_idx));
+		writel_relaxed(0xFFFFFFFF, sei->base + GICP_SEMR(reg_idx));
+	}
+}
+
+static int mvebu_sei_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct irq_domain *plat_domain;
+	struct mvebu_sei *sei;
+	u32 parent_irq;
+	int ret;
+
+	sei = devm_kzalloc(&pdev->dev, sizeof(*sei), GFP_KERNEL);
+	if (!sei)
+		return -ENOMEM;
+
+	sei->dev = &pdev->dev;
+
+	mutex_init(&sei->cp_msi_lock);
+	raw_spin_lock_init(&sei->mask_lock);
+
+	sei->res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sei->base = devm_ioremap_resource(sei->dev, sei->res);
+	if (!sei->base) {
+		dev_err(sei->dev, "Failed to remap SEI resource\n");
+		return -ENODEV;
+	}
+
+	/* Retrieve the SEI capabilities with the interrupt ranges */
+	sei->caps = of_device_get_match_data(&pdev->dev);
+	if (!sei->caps) {
+		dev_err(sei->dev,
+			"Could not retrieve controller capabilities\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Reserve the single (top-level) parent SPI IRQ from which all the
+	 * interrupts handled by this driver will be signaled.
+	 */
+	parent_irq = irq_of_parse_and_map(node, 0);
+	if (parent_irq <= 0) {
+		dev_err(sei->dev, "Failed to retrieve top-level SPI IRQ\n");
+		return -ENODEV;
+	}
+
+	/* Create the root SEI domain */
+	sei->sei_domain = irq_domain_create_linear(of_node_to_fwnode(node),
+						   (sei->caps->ap_range.size +
+						    sei->caps->cp_range.size),
+						   &mvebu_sei_domain_ops,
+						   sei);
+	if (!sei->sei_domain) {
+		dev_err(sei->dev, "Failed to create SEI IRQ domain\n");
+		ret = -ENOMEM;
+		goto dispose_irq;
+	}
+
+	irq_domain_update_bus_token(sei->sei_domain, DOMAIN_BUS_NEXUS);
+
+	/* Create the 'wired' domain */
+	sei->ap_domain = irq_domain_create_hierarchy(sei->sei_domain, 0,
+						     sei->caps->ap_range.size,
+						     of_node_to_fwnode(node),
+						     &mvebu_sei_ap_domain_ops,
+						     sei);
+	if (!sei->ap_domain) {
+		dev_err(sei->dev, "Failed to create AP IRQ domain\n");
+		ret = -ENOMEM;
+		goto remove_sei_domain;
+	}
+
+	irq_domain_update_bus_token(sei->ap_domain, DOMAIN_BUS_WIRED);
+
+	/* Create the 'MSI' domain */
+	sei->cp_domain = irq_domain_create_hierarchy(sei->sei_domain, 0,
+						     sei->caps->cp_range.size,
+						     of_node_to_fwnode(node),
+						     &mvebu_sei_cp_domain_ops,
+						     sei);
+	if (!sei->cp_domain) {
+		pr_err("Failed to create CPs IRQ domain\n");
+		ret = -ENOMEM;
+		goto remove_ap_domain;
+	}
+
+	irq_domain_update_bus_token(sei->cp_domain, DOMAIN_BUS_GENERIC_MSI);
+
+	plat_domain = platform_msi_create_irq_domain(of_node_to_fwnode(node),
+						     &mvebu_sei_msi_domain_info,
+						     sei->cp_domain);
+	if (!plat_domain) {
+		pr_err("Failed to create CPs MSI domain\n");
+		ret = -ENOMEM;
+		goto remove_cp_domain;
+	}
+
+	mvebu_sei_reset(sei);
+
+	irq_set_chained_handler_and_data(parent_irq,
+					 mvebu_sei_handle_cascade_irq,
+					 sei);
+
+	return 0;
+
+remove_cp_domain:
+	irq_domain_remove(sei->cp_domain);
+remove_ap_domain:
+	irq_domain_remove(sei->ap_domain);
+remove_sei_domain:
+	irq_domain_remove(sei->sei_domain);
+dispose_irq:
+	irq_dispose_mapping(parent_irq);
+
+	return ret;
+}
+
+struct mvebu_sei_caps mvebu_sei_ap806_caps = {
+	.ap_range = {
+		.first = 0,
+		.size = 21,
+	},
+	.cp_range = {
+		.first = 21,
+		.size = 43,
+	},
+};
+
+static const struct of_device_id mvebu_sei_of_match[] = {
+	{
+		.compatible = "marvell,ap806-sei",
+		.data = &mvebu_sei_ap806_caps,
+	},
+	{},
+};
+
+static struct platform_driver mvebu_sei_driver = {
+	.probe  = mvebu_sei_probe,
+	.driver = {
+		.name = "mvebu-sei",
+		.of_match_table = mvebu_sei_of_match,
+	},
+};
+builtin_platform_driver(mvebu_sei_driver);
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index dccfa65aee96..068aa46f0d55 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -75,6 +75,7 @@ struct irq_fwspec {
 enum irq_domain_bus_token {
 	DOMAIN_BUS_ANY		= 0,
 	DOMAIN_BUS_WIRED,
+	DOMAIN_BUS_GENERIC_MSI,
 	DOMAIN_BUS_PCI_MSI,
 	DOMAIN_BUS_PLATFORM_MSI,
 	DOMAIN_BUS_NEXUS,
-- 
cgit v1.2.3


From 9524d6b265f9b2b9a61fceb2ee2ce1c2a83e39ca Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 11 Sep 2018 16:40:20 -0700
Subject: dmaengine: ep93xx: Return proper enum in ep93xx_dma_chan_direction

Clang warns when implicitly converting from one enumerated type to
another. Avoid this by using the equivalent value from the expected
type.

In file included from drivers/dma/ep93xx_dma.c:30:
./include/linux/platform_data/dma-ep93xx.h:88:10: warning: implicit
conversion from enumeration type 'enum dma_data_direction' to different
enumeration type 'enum dma_transfer_direction' [-Wenum-conversion]
                return DMA_NONE;
                ~~~~~~ ^~~~~~~~
1 warning generated.

Reported-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/linux/platform_data/dma-ep93xx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h
index f8f1f6b952a6..eb9805bb3fe8 100644
--- a/include/linux/platform_data/dma-ep93xx.h
+++ b/include/linux/platform_data/dma-ep93xx.h
@@ -85,7 +85,7 @@ static inline enum dma_transfer_direction
 ep93xx_dma_chan_direction(struct dma_chan *chan)
 {
 	if (!ep93xx_dma_chan_is_m2p(chan))
-		return DMA_NONE;
+		return DMA_TRANS_NONE;
 
 	/* even channels are for TX, odd for RX */
 	return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM;
-- 
cgit v1.2.3


From cc16567e5a8a7bb9439ef61ab80069acdd33f76f Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 2 Oct 2018 11:03:40 +0200
Subject: net: drop unused skb_append_datato_frags()

This helper is unused since commit 988cf74deb45 ("inet:
Stop generating UFO packets.")

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  5 -----
 net/core/skbuff.c      | 58 --------------------------------------------------
 2 files changed, 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 87e29710373f..119d092c6b13 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1082,11 +1082,6 @@ static inline int skb_pad(struct sk_buff *skb, int pad)
 }
 #define dev_kfree_skb(a)	consume_skb(a)
 
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			    int getfrag(void *from, char *to, int offset,
-					int len, int odd, struct sk_buff *skb),
-			    void *from, int length);
-
 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 			 int offset, size_t size);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b2c807f67aba..0e937d3d85b5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3381,64 +3381,6 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
 }
 EXPORT_SYMBOL(skb_find_text);
 
-/**
- * skb_append_datato_frags - append the user data to a skb
- * @sk: sock  structure
- * @skb: skb structure to be appended with user data.
- * @getfrag: call back function to be used for getting the user data
- * @from: pointer to user message iov
- * @length: length of the iov message
- *
- * Description: This procedure append the user data in the fragment part
- * of the skb if any page alloc fails user this procedure returns  -ENOMEM
- */
-int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
-			int (*getfrag)(void *from, char *to, int offset,
-					int len, int odd, struct sk_buff *skb),
-			void *from, int length)
-{
-	int frg_cnt = skb_shinfo(skb)->nr_frags;
-	int copy;
-	int offset = 0;
-	int ret;
-	struct page_frag *pfrag = &current->task_frag;
-
-	do {
-		/* Return error if we don't have space for new frag */
-		if (frg_cnt >= MAX_SKB_FRAGS)
-			return -EMSGSIZE;
-
-		if (!sk_page_frag_refill(sk, pfrag))
-			return -ENOMEM;
-
-		/* copy the user data to page */
-		copy = min_t(int, length, pfrag->size - pfrag->offset);
-
-		ret = getfrag(from, page_address(pfrag->page) + pfrag->offset,
-			      offset, copy, 0, skb);
-		if (ret < 0)
-			return -EFAULT;
-
-		/* copy was successful so update the size parameters */
-		skb_fill_page_desc(skb, frg_cnt, pfrag->page, pfrag->offset,
-				   copy);
-		frg_cnt++;
-		pfrag->offset += copy;
-		get_page(pfrag->page);
-
-		skb->truesize += copy;
-		refcount_add(copy, &sk->sk_wmem_alloc);
-		skb->len += copy;
-		skb->data_len += copy;
-		offset += copy;
-		length -= copy;
-
-	} while (length > 0);
-
-	return 0;
-}
-EXPORT_SYMBOL(skb_append_datato_frags);
-
 int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
 			 int offset, size_t size)
 {
-- 
cgit v1.2.3


From 5bf0961cc6a180c077793f2615a8fd842c655876 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <sudarsana.kalluru@cavium.com>
Date: Tue, 2 Oct 2018 06:16:11 -0700
Subject: qed: Add driver support for 20G link speed.

Add driver support for configuring/reading the 20G link speed.

Signed-off-by: Sudarsana Reddy Kalluru <Sudarsana.Kalluru@cavium.com>
Signed-off-by: Michal Kalderon <Michal.Kalderon@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c  |  3 +++
 drivers/net/ethernet/qlogic/qed/qed_hsi.h  |  2 ++
 drivers/net/ethernet/qlogic/qed/qed_main.c | 11 +++++++++++
 include/linux/qed/qed_if.h                 |  9 +++++----
 4 files changed, 21 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 0fbeafeef7a0..7ceb2b97538d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2679,6 +2679,9 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	case NVM_CFG1_PORT_DRV_LINK_SPEED_10G:
 		link->speed.forced_speed = 10000;
 		break;
+	case NVM_CFG1_PORT_DRV_LINK_SPEED_20G:
+		link->speed.forced_speed = 20000;
+		break;
 	case NVM_CFG1_PORT_DRV_LINK_SPEED_25G:
 		link->speed.forced_speed = 25000;
 		break;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index d4d08383c753..56578f888b70 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -13154,6 +13154,7 @@ struct nvm_cfg1_port {
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_OFFSET		0
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G		0x1
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G		0x2
+#define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G             0x4
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G		0x8
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G		0x10
 #define NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G		0x20
@@ -13164,6 +13165,7 @@ struct nvm_cfg1_port {
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_AUTONEG			0x0
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_1G				0x1
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_10G			0x2
+#define NVM_CFG1_PORT_DRV_LINK_SPEED_20G                        0x3
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_25G			0x4
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_40G			0x5
 #define NVM_CFG1_PORT_DRV_LINK_SPEED_50G			0x6
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 2094d86a7a08..75d217aaf8ce 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1337,6 +1337,9 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
 		if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		if (params->adv_speeds & QED_LM_20000baseKR2_Full_BIT)
+			link_params->speed.advertised_speeds |=
+				NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G;
 		if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
@@ -1502,6 +1505,9 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 	if (params.speed.advertised_speeds &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
 		if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT;
+	if (params.speed.advertised_speeds &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+		if_link->advertised_caps |= QED_LM_20000baseKR2_Full_BIT;
 	if (params.speed.advertised_speeds &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
 		if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT;
@@ -1522,6 +1528,9 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 	if (link_caps.speed_capabilities &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
 		if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT;
+	if (link_caps.speed_capabilities &
+	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+		if_link->supported_caps |= QED_LM_20000baseKR2_Full_BIT;
 	if (link_caps.speed_capabilities &
 	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
 		if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT;
@@ -1559,6 +1568,8 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 		if_link->lp_caps |= QED_LM_1000baseT_Full_BIT;
 	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G)
 		if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT;
+	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_20G)
+		if_link->lp_caps |= QED_LM_20000baseKR2_Full_BIT;
 	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_25G)
 		if_link->lp_caps |= QED_LM_25000baseKR_Full_BIT;
 	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_40G)
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 8cd34645e892..dee3c9c744f7 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -670,10 +670,11 @@ enum qed_link_mode_bits {
 	QED_LM_1000baseT_Half_BIT = BIT(4),
 	QED_LM_1000baseT_Full_BIT = BIT(5),
 	QED_LM_10000baseKR_Full_BIT = BIT(6),
-	QED_LM_25000baseKR_Full_BIT = BIT(7),
-	QED_LM_40000baseLR4_Full_BIT = BIT(8),
-	QED_LM_50000baseKR2_Full_BIT = BIT(9),
-	QED_LM_100000baseKR4_Full_BIT = BIT(10),
+	QED_LM_20000baseKR2_Full_BIT = BIT(7),
+	QED_LM_25000baseKR_Full_BIT = BIT(8),
+	QED_LM_40000baseLR4_Full_BIT = BIT(9),
+	QED_LM_50000baseKR2_Full_BIT = BIT(10),
+	QED_LM_100000baseKR4_Full_BIT = BIT(11),
 	QED_LM_COUNT = 11
 };
 
-- 
cgit v1.2.3


From c550f01c810f2197c98e6e3103f81797f5e063be Mon Sep 17 00:00:00 2001
From: Steve Sakoman <steve@sakoman.com>
Date: Thu, 20 Sep 2018 09:20:34 -1000
Subject: serial:serial_core: Allow use of CTS for PPS line discipline

Add a "pps_4wire" file to serial ports in sysfs in case the kernel is
configured with CONFIG_PPS_CLIENT_LDISC. Writing 1 to the file enables
the use of CTS instead of DCD for PPS signal input. This is necessary
in case a serial port is not completely wired.
Though this affects PPS processing the patch is against the serial core
as the source of the serial port PPS event dispatching has to be
modified. Furthermore it should be possible to modify the source of
serial port PPS event dispatching before changing the line discipline.

Signed-off-by: Andreas Steinmetz <ast@domdv.de>
Signed-off-by: Steve Sakoman <steve@sakoman.com>
Tested-by: Steve Sakoman <steve@sakoman.com>
Tested-by: Eric Gallimore <egallimore@ucsd.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-tty |  9 +++++
 drivers/tty/serial/serial_core.c    | 70 ++++++++++++++++++++++++++++++++++++-
 include/linux/serial_core.h         |  3 +-
 3 files changed, 80 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
index 9eb3c2b6b040..441105a75d1f 100644
--- a/Documentation/ABI/testing/sysfs-tty
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -154,3 +154,12 @@ Description:
 		 device specification. For example, when user sets 7bytes on
 		 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
 		 automatically changed to 4 bytes.
+
+What:		/sys/class/tty/ttyS0/pps_4wire
+Date:		September 2018
+Contact:	Steve Sakoman <steve@sakoman.com>
+Description:
+		 Shows/sets "4 wire" mode for the PPS input to the serial driver.
+		 For fully implemented serial ports PPS is normally provided
+		 on the DCD line. For partial "4 wire" implementations CTS is
+		 used instead of DCD.
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 80bb56facfb6..ed0133395cc7 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2664,6 +2664,57 @@ static ssize_t uart_get_attr_iomem_reg_shift(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", tmp.iomem_reg_shift);
 }
 
+static ssize_t pps_4wire_show(struct device *dev,
+	struct device_attribute *attr, char *buf)
+{
+	struct tty_port *port = dev_get_drvdata(dev);
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport;
+	int mode = 0;
+
+	mutex_lock(&port->mutex);
+	uport = uart_port_check(state);
+	if (!uport)
+		goto out;
+
+	mode = uport->pps_4wire;
+
+out:
+	mutex_unlock(&port->mutex);
+	return sprintf(buf, mode ? "yes\n" : "no\n");
+}
+
+static ssize_t pps_4wire_store(struct device *dev,
+	struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct tty_port *port = dev_get_drvdata(dev);
+	struct uart_state *state = container_of(port, struct uart_state, port);
+	struct uart_port *uport;
+	bool mode;
+	int ret;
+
+	if (!count)
+		return -EINVAL;
+
+	ret = kstrtobool(buf, &mode);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&port->mutex);
+	uport = uart_port_check(state);
+	if (!uport)
+		goto out;
+
+	spin_lock_irq(&uport->lock);
+	uport->pps_4wire = mode;
+	spin_unlock_irq(&uport->lock);
+
+out:
+	mutex_unlock(&port->mutex);
+	return count;
+}
+static DEVICE_ATTR_RW(pps_4wire);
+
 static DEVICE_ATTR(type, S_IRUSR | S_IRGRP, uart_get_attr_type, NULL);
 static DEVICE_ATTR(line, S_IRUSR | S_IRGRP, uart_get_attr_line, NULL);
 static DEVICE_ATTR(port, S_IRUSR | S_IRGRP, uart_get_attr_port, NULL);
@@ -2692,6 +2743,7 @@ static struct attribute *tty_dev_attrs[] = {
 	&dev_attr_io_type.attr,
 	&dev_attr_iomem_base.attr,
 	&dev_attr_iomem_reg_shift.attr,
+	&dev_attr_pps_4wire.attr,
 	NULL,
 	};
 
@@ -2748,6 +2800,9 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 		goto out;
 	}
 
+	/* assert that pps handling is done via DCD as default */
+	uport->pps_4wire = 0;
+
 	/*
 	 * If this port is a console, then the spinlock is already
 	 * initialised.
@@ -2923,7 +2978,7 @@ void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 	lockdep_assert_held_once(&uport->lock);
 
-	if (tty) {
+	if (tty && !uport->pps_4wire) {
 		ld = tty_ldisc_ref(tty);
 		if (ld) {
 			if (ld->ops->dcd_change)
@@ -2952,8 +3007,21 @@ EXPORT_SYMBOL_GPL(uart_handle_dcd_change);
  */
 void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 {
+	struct tty_port *port = &uport->state->port;
+	struct tty_struct *tty = port->tty;
+	struct tty_ldisc *ld;
+
 	lockdep_assert_held_once(&uport->lock);
 
+	if (tty && uport->pps_4wire) {
+		ld = tty_ldisc_ref(tty);
+		if (ld) {
+			if (ld->ops->dcd_change)
+				ld->ops->dcd_change(tty, status);
+			tty_ldisc_deref(ld);
+		}
+	}
+
 	uport->icount.cts++;
 
 	if (uart_softcts_mode(uport)) {
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 406edae44ca3..079793e5d3fa 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -255,7 +255,8 @@ struct uart_port {
 	struct device		*dev;			/* parent device */
 	unsigned char		hub6;			/* this should be in the 8250 driver */
 	unsigned char		suspended;
-	unsigned char		unused[2];
+	unsigned char		pps_4wire;		/* CTS instead of DCD */
+	unsigned char		unused;
 	const char		*name;			/* port name */
 	struct attribute_group	*attr_group;		/* port specific attributes */
 	const struct attribute_group **tty_groups;	/* all attributes (serial core use only) */
-- 
cgit v1.2.3


From ad8c0eaa0a418ae8ef3f9217638bb86439399eac Mon Sep 17 00:00:00 2001
From: Nicolas Ferre <nicolas.ferre@microchip.com>
Date: Wed, 26 Sep 2018 14:58:47 +0200
Subject: tty/serial_core: add ISO7816 infrastructure

Add the ISO7816 ioctl and associated accessors and data structure.
Drivers can then use this common implementation to handle ISO7816
(smart cards).

Signed-off-by: Nicolas Ferre <nicolas.ferre@microchip.com>
[ludovic.desroches@microchip.com: squash and rebase, removal of gpios, checkpatch fixes]
Signed-off-by: Ludovic Desroches <ludovic.desroches@microchip.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/serial/serial-iso7816.txt | 83 +++++++++++++++++++++++++++++++++
 arch/alpha/include/uapi/asm/ioctls.h    |  2 +
 arch/mips/include/uapi/asm/ioctls.h     |  2 +
 arch/parisc/include/uapi/asm/ioctls.h   |  2 +
 arch/powerpc/include/uapi/asm/ioctls.h  |  2 +
 arch/sh/include/uapi/asm/ioctls.h       |  2 +
 arch/sparc/include/uapi/asm/ioctls.h    |  2 +
 arch/xtensa/include/uapi/asm/ioctls.h   |  2 +
 drivers/tty/serial/serial_core.c        | 60 ++++++++++++++++++++++++
 include/linux/serial_core.h             |  3 ++
 include/uapi/asm-generic/ioctls.h       |  2 +
 include/uapi/linux/serial.h             | 17 +++++++
 12 files changed, 179 insertions(+)
 create mode 100644 Documentation/serial/serial-iso7816.txt

(limited to 'include/linux')

diff --git a/Documentation/serial/serial-iso7816.txt b/Documentation/serial/serial-iso7816.txt
new file mode 100644
index 000000000000..3193d24a2b0f
--- /dev/null
+++ b/Documentation/serial/serial-iso7816.txt
@@ -0,0 +1,83 @@
+                        ISO7816 SERIAL COMMUNICATIONS
+
+1. INTRODUCTION
+
+  ISO/IEC7816 is a series of standards specifying integrated circuit cards (ICC)
+  also known as smart cards.
+
+2. HARDWARE-RELATED CONSIDERATIONS
+
+  Some CPUs/UARTs (e.g., Microchip AT91) contain a built-in mode capable of
+  handling communication with a smart card.
+
+  For these microcontrollers, the Linux driver should be made capable of
+  working in both modes, and proper ioctls (see later) should be made
+  available at user-level to allow switching from one mode to the other, and
+  vice versa.
+
+3. DATA STRUCTURES ALREADY AVAILABLE IN THE KERNEL
+
+  The Linux kernel provides the serial_iso7816 structure (see [1]) to handle
+  ISO7816 communications. This data structure is used to set and configure
+  ISO7816 parameters in ioctls.
+
+  Any driver for devices capable of working both as RS232 and ISO7816 should
+  implement the iso7816_config callback in the uart_port structure. The
+  serial_core calls iso7816_config to do the device specific part in response
+  to TIOCGISO7816 and TIOCSISO7816 ioctls (see below). The iso7816_config
+  callback receives a pointer to struct serial_iso7816.
+
+4. USAGE FROM USER-LEVEL
+
+  From user-level, ISO7816 configuration can be get/set using the previous
+  ioctls. For instance, to set ISO7816 you can use the following code:
+
+	#include <linux/serial.h>
+
+	/* Include definition for ISO7816 ioctls: TIOCSISO7816 and TIOCGISO7816 */
+	#include <sys/ioctl.h>
+
+	/* Open your specific device (e.g., /dev/mydevice): */
+	int fd = open ("/dev/mydevice", O_RDWR);
+	if (fd < 0) {
+		/* Error handling. See errno. */
+	}
+
+	struct serial_iso7816 iso7816conf;
+
+	/* Reserved fields as to be zeroed */
+	memset(&iso7816conf, 0, sizeof(iso7816conf));
+
+	/* Enable ISO7816 mode: */
+	iso7816conf.flags |= SER_ISO7816_ENABLED;
+
+	/* Select the protocol: */
+	/* T=0 */
+	iso7816conf.flags |= SER_ISO7816_T(0);
+	/* or T=1 */
+	iso7816conf.flags |= SER_ISO7816_T(1);
+
+	/* Set the guard time: */
+	iso7816conf.tg = 2;
+
+	/* Set the clock frequency*/
+	iso7816conf.clk = 3571200;
+
+	/* Set transmission factors: */
+	iso7816conf.sc_fi = 372;
+	iso7816conf.sc_di = 1;
+
+	if (ioctl(fd_usart, TIOCSISO7816, &iso7816conf) < 0) {
+		/* Error handling. See errno. */
+	}
+
+	/* Use read() and write() syscalls here... */
+
+	/* Close the device when finished: */
+	if (close (fd) < 0) {
+		/* Error handling. See errno. */
+	}
+
+5. REFERENCES
+
+ [1]    include/uapi/linux/serial.h
diff --git a/arch/alpha/include/uapi/asm/ioctls.h b/arch/alpha/include/uapi/asm/ioctls.h
index 3729d92d3fa8..1e9121c9b3c7 100644
--- a/arch/alpha/include/uapi/asm/ioctls.h
+++ b/arch/alpha/include/uapi/asm/ioctls.h
@@ -102,6 +102,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define TIOCSERCONFIG	0x5453
 #define TIOCSERGWILD	0x5454
diff --git a/arch/mips/include/uapi/asm/ioctls.h b/arch/mips/include/uapi/asm/ioctls.h
index 890245a9f0c4..16aa8a766aec 100644
--- a/arch/mips/include/uapi/asm/ioctls.h
+++ b/arch/mips/include/uapi/asm/ioctls.h
@@ -93,6 +93,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 /* I hope the range from 0x5480 on is free ... */
 #define TIOCSCTTY	0x5480		/* become controlling tty */
diff --git a/arch/parisc/include/uapi/asm/ioctls.h b/arch/parisc/include/uapi/asm/ioctls.h
index aafb1c0ca0af..82d1148c6379 100644
--- a/arch/parisc/include/uapi/asm/ioctls.h
+++ b/arch/parisc/include/uapi/asm/ioctls.h
@@ -62,6 +62,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define FIONCLEX	0x5450  /* these numbers need to be adjusted. */
 #define FIOCLEX		0x5451
diff --git a/arch/powerpc/include/uapi/asm/ioctls.h b/arch/powerpc/include/uapi/asm/ioctls.h
index 41b1a5c15734..2c145da3b774 100644
--- a/arch/powerpc/include/uapi/asm/ioctls.h
+++ b/arch/powerpc/include/uapi/asm/ioctls.h
@@ -102,6 +102,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define TIOCSERCONFIG	0x5453
 #define TIOCSERGWILD	0x5454
diff --git a/arch/sh/include/uapi/asm/ioctls.h b/arch/sh/include/uapi/asm/ioctls.h
index cc62f6f98103..11866d4f60e1 100644
--- a/arch/sh/include/uapi/asm/ioctls.h
+++ b/arch/sh/include/uapi/asm/ioctls.h
@@ -95,6 +95,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define TIOCSERCONFIG	_IO('T', 83) /* 0x5453 */
 #define TIOCSERGWILD	_IOR('T', 84,  int) /* 0x5454 */
diff --git a/arch/sparc/include/uapi/asm/ioctls.h b/arch/sparc/include/uapi/asm/ioctls.h
index 2df52711e170..7fd2f5873c9e 100644
--- a/arch/sparc/include/uapi/asm/ioctls.h
+++ b/arch/sparc/include/uapi/asm/ioctls.h
@@ -27,6 +27,8 @@
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGRS485	_IOR('T', 0x41, struct serial_rs485)
 #define TIOCSRS485	_IOWR('T', 0x42, struct serial_rs485)
+#define TIOCGISO7816	_IOR('T', 0x43, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x44, struct serial_iso7816)
 
 /* Note that all the ioctls that are not available in Linux have a
  * double underscore on the front to: a) avoid some programs to
diff --git a/arch/xtensa/include/uapi/asm/ioctls.h b/arch/xtensa/include/uapi/asm/ioctls.h
index ec43609cbfc5..6d4a87296c95 100644
--- a/arch/xtensa/include/uapi/asm/ioctls.h
+++ b/arch/xtensa/include/uapi/asm/ioctls.h
@@ -107,6 +107,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define TIOCSERCONFIG	_IO('T', 83)
 #define TIOCSERGWILD	_IOR('T', 84,  int)
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index ed0133395cc7..0a4e6eeb5ff3 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1308,6 +1308,58 @@ static int uart_set_rs485_config(struct uart_port *port,
 	return 0;
 }
 
+static int uart_get_iso7816_config(struct uart_port *port,
+				   struct serial_iso7816 __user *iso7816)
+{
+	unsigned long flags;
+	struct serial_iso7816 aux;
+
+	if (!port->iso7816_config)
+		return -ENOIOCTLCMD;
+
+	spin_lock_irqsave(&port->lock, flags);
+	aux = port->iso7816;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (copy_to_user(iso7816, &aux, sizeof(aux)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int uart_set_iso7816_config(struct uart_port *port,
+				   struct serial_iso7816 __user *iso7816_user)
+{
+	struct serial_iso7816 iso7816;
+	int i, ret;
+	unsigned long flags;
+
+	if (!port->iso7816_config)
+		return -ENOIOCTLCMD;
+
+	if (copy_from_user(&iso7816, iso7816_user, sizeof(*iso7816_user)))
+		return -EFAULT;
+
+	/*
+	 * There are 5 words reserved for future use. Check that userspace
+	 * doesn't put stuff in there to prevent breakages in the future.
+	 */
+	for (i = 0; i < 5; i++)
+		if (iso7816.reserved[i])
+			return -EINVAL;
+
+	spin_lock_irqsave(&port->lock, flags);
+	ret = port->iso7816_config(port, &iso7816);
+	spin_unlock_irqrestore(&port->lock, flags);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(iso7816_user, &port->iso7816, sizeof(port->iso7816)))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
  * Called via sys_ioctl.  We can use spin_lock_irq() here.
  */
@@ -1392,6 +1444,14 @@ uart_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg)
 	case TIOCSRS485:
 		ret = uart_set_rs485_config(uport, uarg);
 		break;
+
+	case TIOCSISO7816:
+		ret = uart_set_iso7816_config(state->uart_port, uarg);
+		break;
+
+	case TIOCGISO7816:
+		ret = uart_get_iso7816_config(state->uart_port, uarg);
+		break;
 	default:
 		if (uport->ops->ioctl)
 			ret = uport->ops->ioctl(uport, cmd, arg);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 079793e5d3fa..4e2ba4894dcc 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -144,6 +144,8 @@ struct uart_port {
 	void			(*handle_break)(struct uart_port *);
 	int			(*rs485_config)(struct uart_port *,
 						struct serial_rs485 *rs485);
+	int			(*iso7816_config)(struct uart_port *,
+						  struct serial_iso7816 *iso7816);
 	unsigned int		irq;			/* irq number */
 	unsigned long		irqflags;		/* irq flags  */
 	unsigned int		uartclk;		/* base uart clock */
@@ -261,6 +263,7 @@ struct uart_port {
 	struct attribute_group	*attr_group;		/* port specific attributes */
 	const struct attribute_group **tty_groups;	/* all attributes (serial core use only) */
 	struct serial_rs485     rs485;
+	struct serial_iso7816   iso7816;
 	void			*private_data;		/* generic platform data pointer */
 };
 
diff --git a/include/uapi/asm-generic/ioctls.h b/include/uapi/asm-generic/ioctls.h
index 040651735662..cdc9f4ca8c27 100644
--- a/include/uapi/asm-generic/ioctls.h
+++ b/include/uapi/asm-generic/ioctls.h
@@ -79,6 +79,8 @@
 #define TIOCGPTLCK	_IOR('T', 0x39, int) /* Get Pty lock state */
 #define TIOCGEXCL	_IOR('T', 0x40, int) /* Get exclusive mode state */
 #define TIOCGPTPEER	_IO('T', 0x41) /* Safely open the slave */
+#define TIOCGISO7816	_IOR('T', 0x42, struct serial_iso7816)
+#define TIOCSISO7816	_IOWR('T', 0x43, struct serial_iso7816)
 
 #define FIONCLEX	0x5450
 #define FIOCLEX		0x5451
diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h
index 3fdd0dee8b41..93eb3c496ff1 100644
--- a/include/uapi/linux/serial.h
+++ b/include/uapi/linux/serial.h
@@ -132,4 +132,21 @@ struct serial_rs485 {
 					   are a royal PITA .. */
 };
 
+/*
+ * Serial interface for controlling ISO7816 settings on chips with suitable
+ * support. Set with TIOCSISO7816 and get with TIOCGISO7816 if supported by
+ * your platform.
+ */
+struct serial_iso7816 {
+	__u32	flags;			/* ISO7816 feature flags */
+#define SER_ISO7816_ENABLED		(1 << 0)
+#define SER_ISO7816_T_PARAM		(0x0f << 4)
+#define SER_ISO7816_T(t)		(((t) & 0x0f) << 4)
+	__u32	tg;
+	__u32	sc_fi;
+	__u32	sc_di;
+	__u32	clk;
+	__u32	reserved[5];
+};
+
 #endif /* _UAPI_LINUX_SERIAL_H */
-- 
cgit v1.2.3


From f0157160b359b1d263ee9d4e0a435a7ad85bbcea Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Thu, 20 Sep 2018 10:27:17 -0600
Subject: PCI: Make link active reporting detection generic

The spec has timing requirements when waiting for a link to become active
after a conventional reset.  Implement those hard delays when waiting for
an active link so pciehp and dpc drivers don't need to duplicate this.

For devices that don't support data link layer active reporting, wait the
fixed time recommended by the PCIe spec.

Signed-off-by: Keith Busch <keith.busch@intel.com>
[bhelgaas: changelog]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Sinan Kaya <okaya@kernel.org>
---
 drivers/pci/hotplug/pciehp.h     |  6 ------
 drivers/pci/hotplug/pciehp_hpc.c | 22 ++--------------------
 drivers/pci/pci.c                | 33 +++++++++++++++++++++++++++------
 drivers/pci/pcie/dpc.c           |  4 +++-
 drivers/pci/probe.c              |  1 +
 include/linux/pci.h              |  1 +
 6 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 3740f1a759c5..75fd52571107 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -62,11 +62,6 @@ do {									\
  * struct controller - PCIe hotplug controller
  * @pcie: pointer to the controller's PCIe port service device
  * @slot_cap: cached copy of the Slot Capabilities register
- * @link_active_reporting: cached copy of Data Link Layer Link Active Reporting
- *	Capable bit in Link Capabilities register; if this bit is zero, the
- *	Data Link Layer Link Active bit in the Link Status register will never
- *	be set and the driver is thus confined to wait 1 second before assuming
- *	the link to a hotplugged device is up and accessing it
  * @slot_ctrl: cached copy of the Slot Control register
  * @ctrl_lock: serializes writes to the Slot Control register
  * @cmd_started: jiffies when the Slot Control register was last written;
@@ -103,7 +98,6 @@ struct controller {
 	struct pcie_device *pcie;
 
 	u32 slot_cap;				/* capabilities and quirks */
-	unsigned int link_active_reporting:1;
 
 	u16 slot_ctrl;				/* control register access */
 	struct mutex ctrl_lock;
diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 7b5f9db60d9a..f0f3f4a3dac4 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -214,13 +214,6 @@ bool pciehp_check_link_active(struct controller *ctrl)
 	return ret;
 }
 
-static void pcie_wait_link_active(struct controller *ctrl)
-{
-	struct pci_dev *pdev = ctrl_dev(ctrl);
-
-	pcie_wait_for_link(pdev, true);
-}
-
 static bool pci_bus_check_dev(struct pci_bus *bus, int devfn)
 {
 	u32 l;
@@ -253,18 +246,9 @@ int pciehp_check_link_status(struct controller *ctrl)
 	bool found;
 	u16 lnk_status;
 
-	/*
-	 * Data Link Layer Link Active Reporting must be capable for
-	 * hot-plug capable downstream port. But old controller might
-	 * not implement it. In this case, we wait for 1000 ms.
-	*/
-	if (ctrl->link_active_reporting)
-		pcie_wait_link_active(ctrl);
-	else
-		msleep(1000);
+	if (!pcie_wait_for_link(pdev, true))
+		return -1;
 
-	/* wait 100ms before read pci conf, and try in 1s */
-	msleep(100);
 	found = pci_bus_check_dev(ctrl->pcie->port->subordinate,
 					PCI_DEVFN(0, 0));
 
@@ -865,8 +849,6 @@ struct controller *pcie_init(struct pcie_device *dev)
 
 	/* Check if Data Link Layer Link Active Reporting is implemented */
 	pcie_capability_read_dword(pdev, PCI_EXP_LNKCAP, &link_cap);
-	if (link_cap & PCI_EXP_LNKCAP_DLLLARC)
-		ctrl->link_active_reporting = 1;
 
 	/* Clear all remaining event bits in Slot Status register. */
 	pcie_capability_write_word(pdev, PCI_EXP_SLTSTA,
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6916af269b19..4b0b1d0548f0 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4489,21 +4489,42 @@ bool pcie_wait_for_link(struct pci_dev *pdev, bool active)
 	bool ret;
 	u16 lnk_status;
 
+	/*
+	 * Some controllers might not implement link active reporting. In this
+	 * case, we wait for 1000 + 100 ms.
+	 */
+	if (!pdev->link_active_reporting) {
+		msleep(1100);
+		return true;
+	}
+
+	/*
+	 * PCIe r4.0 sec 6.6.1, a component must enter LTSSM Detect within 20ms,
+	 * after which we should expect an link active if the reset was
+	 * successful. If so, software must wait a minimum 100ms before sending
+	 * configuration requests to devices downstream this port.
+	 *
+	 * If the link fails to activate, either the device was physically
+	 * removed or the link is permanently failed.
+	 */
+	if (active)
+		msleep(20);
 	for (;;) {
 		pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status);
 		ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA);
 		if (ret == active)
-			return true;
+			break;
 		if (timeout <= 0)
 			break;
 		msleep(10);
 		timeout -= 10;
 	}
-
-	pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n",
-		 active ? "set" : "cleared");
-
-	return false;
+	if (active && ret)
+		msleep(100);
+	else if (ret != active)
+		pci_info(pdev, "Data Link Layer Link Active not %s in 1000 msec\n",
+			active ? "set" : "cleared");
+	return ret == active;
 }
 
 void pci_reset_secondary_bus(struct pci_dev *dev)
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 23e063aefddf..e435d12e61a0 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -140,10 +140,12 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
 	pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
 			      PCI_EXP_DPC_STATUS_TRIGGER);
 
+	if (!pcie_wait_for_link(pdev, true))
+		return PCI_ERS_RESULT_DISCONNECT;
+
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
-
 static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
 {
 	struct device *dev = &dpc->dev->device;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 201f9e5ff55c..bb2999d1b199 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -713,6 +713,7 @@ static void pci_set_bus_speed(struct pci_bus *bus)
 
 		pcie_capability_read_dword(bridge, PCI_EXP_LNKCAP, &linkcap);
 		bus->max_bus_speed = pcie_link_speed[linkcap & PCI_EXP_LNKCAP_SLS];
+		bridge->link_active_reporting = !!(linkcap & PCI_EXP_LNKCAP_DLLLARC);
 
 		pcie_capability_read_word(bridge, PCI_EXP_LNKSTA, &linksta);
 		pcie_update_link_speed(bus, linksta);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6925828f9f25..896b42032ec5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -402,6 +402,7 @@ struct pci_dev {
 	unsigned int	has_secondary_link:1;
 	unsigned int	non_compliant_bars:1;	/* Broken BARs; ignore them */
 	unsigned int	is_probed:1;		/* Device probing in progress */
+	unsigned int	link_active_reporting:1;/* Device capable of reporting link active */
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From 5f5e4890d57a8af5da72c9d73a4efa9bad43a7a3 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Thu, 27 Sep 2018 16:57:05 -0500
Subject: ACPI / property: Allow multiple property compatible _DSD entries

It is possible to have _DSD entries where the data is compatible with
device properties format but are using different GUID for various reasons.
In addition to that there can be many such _DSD entries for a single device
such as for PCIe root port used to host a Thunderbolt hierarchy:

    Scope (\_SB.PCI0.RP21)
    {
        Name (_DSD, Package () {
            ToUUID ("6211e2c0-58a3-4af3-90e1-927a4e0c55a4"),
            Package () {
                Package () {"HotPlugSupportInD3", 1}
            },

            ToUUID ("efcc06cc-73ac-4bc3-bff0-76143807c389"),
            Package () {
                Package () {"ExternalFacingPort", 1},
                Package () {"UID", 0 }
            }
        })
    }

More information about these new _DSD entries can be found in:

  https://docs.microsoft.com/en-us/windows-hardware/drivers/pci/dsd-for-pcie-root-ports

To make these available for drivers via unified device property APIs,
modify ACPI property core so that it supports multiple _DSD entries
organized in a linked list. We also store GUID of each _DSD entry in struct
acpi_device_properties in case there is need to differentiate between
entries. The supported GUIDs are then listed in prp_guids array.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
---
 drivers/acpi/property.c     | 94 ++++++++++++++++++++++++++++++++-------------
 drivers/acpi/x86/apple.c    |  2 +-
 drivers/gpio/gpiolib-acpi.c |  2 +-
 include/acpi/acpi_bus.h     |  8 +++-
 include/linux/acpi.h        |  9 +++++
 5 files changed, 86 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 693cf05b0cc4..90ba9371bae6 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -24,11 +24,12 @@ static int acpi_data_get_property_array(const struct acpi_device_data *data,
 					acpi_object_type type,
 					const union acpi_object **obj);
 
-/* ACPI _DSD device properties GUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301 */
-static const guid_t prp_guid =
+static const guid_t prp_guids[] = {
+	/* ACPI _DSD device properties GUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301 */
 	GUID_INIT(0xdaffd814, 0x6eba, 0x4d8c,
-		  0x8a, 0x91, 0xbc, 0x9b, 0xbf, 0x4a, 0xa3, 0x01);
-/* ACPI _DSD data subnodes GUID: dbb8e3e6-5886-4ba6-8795-1319f52a966b */
+		  0x8a, 0x91, 0xbc, 0x9b, 0xbf, 0x4a, 0xa3, 0x01),
+};
+
 static const guid_t ads_guid =
 	GUID_INIT(0xdbb8e3e6, 0x5886, 0x4ba6,
 		  0x87, 0x95, 0x13, 0x19, 0xf5, 0x2a, 0x96, 0x6b);
@@ -56,6 +57,7 @@ static bool acpi_nondev_subnode_extract(const union acpi_object *desc,
 	dn->name = link->package.elements[0].string.pointer;
 	dn->fwnode.ops = &acpi_data_fwnode_ops;
 	dn->parent = parent;
+	INIT_LIST_HEAD(&dn->data.properties);
 	INIT_LIST_HEAD(&dn->data.subnodes);
 
 	result = acpi_extract_properties(desc, &dn->data);
@@ -288,6 +290,35 @@ static void acpi_init_of_compatible(struct acpi_device *adev)
 	adev->flags.of_compatible_ok = 1;
 }
 
+static bool acpi_is_property_guid(const guid_t *guid)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(prp_guids); i++) {
+		if (guid_equal(guid, &prp_guids[i]))
+			return true;
+	}
+
+	return false;
+}
+
+struct acpi_device_properties *
+acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid,
+		    const union acpi_object *properties)
+{
+	struct acpi_device_properties *props;
+
+	props = kzalloc(sizeof(*props), GFP_KERNEL);
+	if (props) {
+		INIT_LIST_HEAD(&props->list);
+		props->guid = guid;
+		props->properties = properties;
+		list_add_tail(&props->list, &data->properties);
+	}
+
+	return props;
+}
+
 static bool acpi_extract_properties(const union acpi_object *desc,
 				    struct acpi_device_data *data)
 {
@@ -312,7 +343,7 @@ static bool acpi_extract_properties(const union acpi_object *desc,
 		    properties->type != ACPI_TYPE_PACKAGE)
 			break;
 
-		if (!guid_equal((guid_t *)guid->buffer.pointer, &prp_guid))
+		if (!acpi_is_property_guid((guid_t *)guid->buffer.pointer))
 			continue;
 
 		/*
@@ -320,13 +351,13 @@ static bool acpi_extract_properties(const union acpi_object *desc,
 		 * package immediately following it.
 		 */
 		if (!acpi_properties_format_valid(properties))
-			break;
+			continue;
 
-		data->properties = properties;
-		return true;
+		acpi_data_add_props(data, (const guid_t *)guid->buffer.pointer,
+				    properties);
 	}
 
-	return false;
+	return !list_empty(&data->properties);
 }
 
 void acpi_init_properties(struct acpi_device *adev)
@@ -336,6 +367,7 @@ void acpi_init_properties(struct acpi_device *adev)
 	acpi_status status;
 	bool acpi_of = false;
 
+	INIT_LIST_HEAD(&adev->data.properties);
 	INIT_LIST_HEAD(&adev->data.subnodes);
 
 	if (!adev->handle)
@@ -398,11 +430,16 @@ static void acpi_destroy_nondev_subnodes(struct list_head *list)
 
 void acpi_free_properties(struct acpi_device *adev)
 {
+	struct acpi_device_properties *props, *tmp;
+
 	acpi_destroy_nondev_subnodes(&adev->data.subnodes);
 	ACPI_FREE((void *)adev->data.pointer);
 	adev->data.of_compatible = NULL;
 	adev->data.pointer = NULL;
-	adev->data.properties = NULL;
+	list_for_each_entry_safe(props, tmp, &adev->data.properties, list) {
+		list_del(&props->list);
+		kfree(props);
+	}
 }
 
 /**
@@ -427,32 +464,37 @@ static int acpi_data_get_property(const struct acpi_device_data *data,
 				  const char *name, acpi_object_type type,
 				  const union acpi_object **obj)
 {
-	const union acpi_object *properties;
-	int i;
+	const struct acpi_device_properties *props;
 
 	if (!data || !name)
 		return -EINVAL;
 
-	if (!data->pointer || !data->properties)
+	if (!data->pointer || list_empty(&data->properties))
 		return -EINVAL;
 
-	properties = data->properties;
-	for (i = 0; i < properties->package.count; i++) {
-		const union acpi_object *propname, *propvalue;
-		const union acpi_object *property;
+	list_for_each_entry(props, &data->properties, list) {
+		const union acpi_object *properties;
+		unsigned int i;
 
-		property = &properties->package.elements[i];
+		properties = props->properties;
+		for (i = 0; i < properties->package.count; i++) {
+			const union acpi_object *propname, *propvalue;
+			const union acpi_object *property;
 
-		propname = &property->package.elements[0];
-		propvalue = &property->package.elements[1];
+			property = &properties->package.elements[i];
 
-		if (!strcmp(name, propname->string.pointer)) {
-			if (type != ACPI_TYPE_ANY && propvalue->type != type)
-				return -EPROTO;
-			if (obj)
-				*obj = propvalue;
+			propname = &property->package.elements[0];
+			propvalue = &property->package.elements[1];
 
-			return 0;
+			if (!strcmp(name, propname->string.pointer)) {
+				if (type != ACPI_TYPE_ANY &&
+				    propvalue->type != type)
+					return -EPROTO;
+				if (obj)
+					*obj = propvalue;
+
+				return 0;
+			}
 		}
 	}
 	return -EINVAL;
diff --git a/drivers/acpi/x86/apple.c b/drivers/acpi/x86/apple.c
index 51b4cf9f25da..130df1c8ed7d 100644
--- a/drivers/acpi/x86/apple.c
+++ b/drivers/acpi/x86/apple.c
@@ -132,8 +132,8 @@ void acpi_extract_apple_properties(struct acpi_device *adev)
 	}
 	WARN_ON(free_space != (void *)newprops + newsize);
 
-	adev->data.properties = newprops;
 	adev->data.pointer = newprops;
+	acpi_data_add_props(&adev->data, &apple_prp_guid, newprops);
 
 out_free:
 	ACPI_FREE(props);
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 8b9d7e42c600..f74aa0e60300 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1198,7 +1198,7 @@ int acpi_gpio_count(struct device *dev, const char *con_id)
 bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id)
 {
 	/* Never allow fallback if the device has properties */
-	if (adev->data.properties || adev->driver_gpios)
+	if (acpi_dev_has_props(adev) || adev->driver_gpios)
 		return false;
 
 	return con_id == NULL;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index ba4dd54f2c82..cd35e3ce9a8b 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -346,10 +346,16 @@ struct acpi_device_physical_node {
 	bool put_online:1;
 };
 
+struct acpi_device_properties {
+	const guid_t *guid;
+	const union acpi_object *properties;
+	struct list_head list;
+};
+
 /* ACPI Device Specific Data (_DSD) */
 struct acpi_device_data {
 	const union acpi_object *pointer;
-	const union acpi_object *properties;
+	struct list_head properties;
 	const union acpi_object *of_compatible;
 	struct list_head subnodes;
 };
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index de8d3d3fa651..51e3c29663fe 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -1074,6 +1074,15 @@ static inline int acpi_node_get_property_reference(
 		NR_FWNODE_REFERENCE_ARGS, args);
 }
 
+static inline bool acpi_dev_has_props(const struct acpi_device *adev)
+{
+	return !list_empty(&adev->data.properties);
+}
+
+struct acpi_device_properties *
+acpi_data_add_props(struct acpi_device_data *data, const guid_t *guid,
+		    const union acpi_object *properties);
+
 int acpi_node_prop_get(const struct fwnode_handle *fwnode, const char *propname,
 		       void **valptr);
 int acpi_dev_prop_read_single(struct acpi_device *adev,
-- 
cgit v1.2.3


From 10653022456dc77b398777fd8e95126c77954b49 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 30 Aug 2018 14:54:03 +0200
Subject: Revert "serial: sh-sci: Remove SCIx_RZ_SCIFA_REGTYPE"

This reverts commit 7acece71a517cad83a0842a94d94c13f271b680c.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Chris Brandt <chris.brandt@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 31 +++++++++++++++++++++++++++++++
 include/linux/serial_sci.h  |  1 +
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index ac4424bf6b13..5d42c9a63001 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -291,6 +291,33 @@ static const struct sci_port_params sci_port_params[SCIx_NR_REGTYPES] = {
 		.error_clear = SCIF_ERROR_CLEAR,
 	},
 
+	/*
+	 * The "SCIFA" that is in RZ/T and RZ/A2.
+	 * It looks like a normal SCIF with FIFO data, but with a
+	 * compressed address space. Also, the break out of interrupts
+	 * are different: ERI/BRI, RXI, TXI, TEI, DRI.
+	 */
+	[SCIx_RZ_SCIFA_REGTYPE] = {
+		.regs = {
+			[SCSMR]		= { 0x00, 16 },
+			[SCBRR]		= { 0x02,  8 },
+			[SCSCR]		= { 0x04, 16 },
+			[SCxTDR]	= { 0x06,  8 },
+			[SCxSR]		= { 0x08, 16 },
+			[SCxRDR]	= { 0x0A,  8 },
+			[SCFCR]		= { 0x0C, 16 },
+			[SCFDR]		= { 0x0E, 16 },
+			[SCSPTR]	= { 0x10, 16 },
+			[SCLSR]		= { 0x12, 16 },
+		},
+		.fifosize = 16,
+		.overrun_reg = SCLSR,
+		.overrun_mask = SCLSR_ORER,
+		.sampling_rate_mask = SCI_SR(32),
+		.error_mask = SCIF_DEFAULT_ERROR_MASK,
+		.error_clear = SCIF_ERROR_CLEAR,
+	},
+
 	/*
 	 * Common SH-3 SCIF definitions.
 	 */
@@ -3110,6 +3137,10 @@ static const struct of_device_id of_sci_match[] = {
 		.compatible = "renesas,scif-r7s72100",
 		.data = SCI_OF_DATA(PORT_SCIF, SCIx_SH2_SCIF_FIFODATA_REGTYPE),
 	},
+	{
+		.compatible = "renesas,scif-r7s9210",
+		.data = SCI_OF_DATA(PORT_SCIF, SCIx_RZ_SCIFA_REGTYPE),
+	},
 	/* Family-specific types */
 	{
 		.compatible = "renesas,rcar-gen1-scif",
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index c0e795d95477..1c89611e0e06 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -36,6 +36,7 @@ enum {
 	SCIx_SH4_SCIF_FIFODATA_REGTYPE,
 	SCIx_SH7705_SCIF_REGTYPE,
 	SCIx_HSCIF_REGTYPE,
+	SCIx_RZ_SCIFA_REGTYPE,
 
 	SCIx_NR_REGTYPES,
 };
-- 
cgit v1.2.3


From fd3b339cbb6047dacfa7ccc81c846efcb61032a9 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 1 Oct 2018 12:31:21 +0300
Subject: thunderbolt: Convert rest of the driver files to use SPDX identifier

This gets rid of the licence boilerplate duplicated in each file. While
there fix doubled space in domain.c author line.

No functional changes intended.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Yehezkel Bernat <yehezkelshb@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/thunderbolt/dma_port.c | 5 +----
 drivers/thunderbolt/dma_port.h | 5 +----
 drivers/thunderbolt/domain.c   | 7 ++-----
 drivers/thunderbolt/icm.c      | 5 +----
 drivers/thunderbolt/property.c | 5 +----
 drivers/thunderbolt/tb_msgs.h  | 5 +----
 drivers/thunderbolt/xdomain.c  | 5 +----
 include/linux/thunderbolt.h    | 5 +----
 8 files changed, 9 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thunderbolt/dma_port.c b/drivers/thunderbolt/dma_port.c
index f2701194f810..847dd07a7b17 100644
--- a/drivers/thunderbolt/dma_port.c
+++ b/drivers/thunderbolt/dma_port.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Thunderbolt DMA configuration based mailbox support
  *
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/delay.h>
diff --git a/drivers/thunderbolt/dma_port.h b/drivers/thunderbolt/dma_port.h
index c4a69e0fbff7..7deadd97ce31 100644
--- a/drivers/thunderbolt/dma_port.h
+++ b/drivers/thunderbolt/dma_port.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Thunderbolt DMA configuration based mailbox support
  *
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef DMA_PORT_H_
diff --git a/drivers/thunderbolt/domain.c b/drivers/thunderbolt/domain.c
index 092381e2accf..93e562f18d40 100644
--- a/drivers/thunderbolt/domain.c
+++ b/drivers/thunderbolt/domain.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Thunderbolt bus support
  *
  * Copyright (C) 2017, Intel Corporation
- * Author:  Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
  */
 
 #include <linux/device.h>
diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
index e1e264a9a4c7..533068651f90 100644
--- a/drivers/thunderbolt/icm.c
+++ b/drivers/thunderbolt/icm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Internal Thunderbolt Connection Manager. This is a firmware running on
  * the Thunderbolt host controller performing most of the low-level
@@ -6,10 +7,6 @@
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/delay.h>
diff --git a/drivers/thunderbolt/property.c b/drivers/thunderbolt/property.c
index 8fe913a95b4a..b2f0d6386cee 100644
--- a/drivers/thunderbolt/property.c
+++ b/drivers/thunderbolt/property.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Thunderbolt XDomain property support
  *
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/err.h>
diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h
index 2487e162c885..02c84aa3d018 100644
--- a/drivers/thunderbolt/tb_msgs.h
+++ b/drivers/thunderbolt/tb_msgs.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Thunderbolt control channel messages
  *
  * Copyright (C) 2014 Andreas Noever <andreas.noever@gmail.com>
  * Copyright (C) 2017, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _TB_MSGS
diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
index db8bece63327..e27dd8beb94b 100644
--- a/drivers/thunderbolt/xdomain.c
+++ b/drivers/thunderbolt/xdomain.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Thunderbolt XDomain discovery protocol support
  *
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/device.h>
diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
index a3ed26082bc1..bf6ec83e60ee 100644
--- a/include/linux/thunderbolt.h
+++ b/include/linux/thunderbolt.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Thunderbolt service API
  *
@@ -5,10 +6,6 @@
  * Copyright (C) 2017, Intel Corporation
  * Authors: Michael Jamet <michael.jamet@intel.com>
  *          Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef THUNDERBOLT_H_
-- 
cgit v1.2.3


From f3709f69b7c5cba6323cc03c29b64293b93be817 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Tue, 2 Oct 2018 13:35:29 -0700
Subject: bpf: Add iterator for spilled registers

Add this iterator for spilled registers, it concentrates the details of
how to get the current frame's spilled registers into a single macro
while clarifying the intention of the code which is calling the macro.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h | 11 +++++++++++
 kernel/bpf/verifier.c        | 16 +++++++---------
 2 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b42b60a83e19..d0e7f97e8b60 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -131,6 +131,17 @@ struct bpf_verifier_state {
 	u32 curframe;
 };
 
+#define bpf_get_spilled_reg(slot, frame)				\
+	(((slot < frame->allocated_stack / BPF_REG_SIZE) &&		\
+	  (frame->stack[slot].slot_type[0] == STACK_SPILL))		\
+	 ? &frame->stack[slot].spilled_ptr : NULL)
+
+/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
+#define bpf_for_each_spilled_reg(iter, frame, reg)			\
+	for (iter = 0, reg = bpf_get_spilled_reg(iter, frame);		\
+	     iter < frame->allocated_stack / BPF_REG_SIZE;		\
+	     iter++, reg = bpf_get_spilled_reg(iter, frame))
+
 /* linked list of verifier states used to prune search */
 struct bpf_verifier_state_list {
 	struct bpf_verifier_state state;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a8cc83a970d1..9c82d8f58085 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2252,10 +2252,9 @@ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
 		if (reg_is_pkt_pointer_any(&regs[i]))
 			mark_reg_unknown(env, regs, i);
 
-	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
-		if (state->stack[i].slot_type[0] != STACK_SPILL)
+	bpf_for_each_spilled_reg(i, state, reg) {
+		if (!reg)
 			continue;
-		reg = &state->stack[i].spilled_ptr;
 		if (reg_is_pkt_pointer_any(reg))
 			__mark_reg_unknown(reg);
 	}
@@ -3395,10 +3394,9 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 
 	for (j = 0; j <= vstate->curframe; j++) {
 		state = vstate->frame[j];
-		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
-			if (state->stack[i].slot_type[0] != STACK_SPILL)
+		bpf_for_each_spilled_reg(i, state, reg) {
+			if (!reg)
 				continue;
-			reg = &state->stack[i].spilled_ptr;
 			if (reg->type == type && reg->id == dst_reg->id)
 				reg->range = max(reg->range, new_range);
 		}
@@ -3643,7 +3641,7 @@ static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
 			  bool is_null)
 {
 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
-	struct bpf_reg_state *regs = state->regs;
+	struct bpf_reg_state *reg, *regs = state->regs;
 	u32 id = regs[regno].id;
 	int i, j;
 
@@ -3652,8 +3650,8 @@ static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
 
 	for (j = 0; j <= vstate->curframe; j++) {
 		state = vstate->frame[j];
-		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
-			if (state->stack[i].slot_type[0] != STACK_SPILL)
+		bpf_for_each_spilled_reg(i, state, reg) {
+			if (!reg)
 				continue;
 			mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
 		}
-- 
cgit v1.2.3


From c64b7983288e636356f7f5f652de4813e1cfedac Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Tue, 2 Oct 2018 13:35:33 -0700
Subject: bpf: Add PTR_TO_SOCKET verifier type

Teach the verifier a little bit about a new type of pointer, a
PTR_TO_SOCKET. This pointer type is accessed from BPF through the
'struct bpf_sock' structure.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h          |  34 ++++++++++++
 include/linux/bpf_verifier.h |   2 +
 kernel/bpf/verifier.c        | 120 ++++++++++++++++++++++++++++++++++++++-----
 net/core/filter.c            |  30 ++++++-----
 4 files changed, 160 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 018299a595c8..027697b6a22f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -154,6 +154,7 @@ enum bpf_arg_type {
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
+	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
 };
 
 /* type of values returned from helper functions */
@@ -162,6 +163,7 @@ enum bpf_return_type {
 	RET_VOID,			/* function doesn't return anything */
 	RET_PTR_TO_MAP_VALUE,		/* returns a pointer to map elem value */
 	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
+	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -213,6 +215,8 @@ enum bpf_reg_type {
 	PTR_TO_PACKET,		 /* reg points to skb->data */
 	PTR_TO_PACKET_END,	 /* skb->data + headlen */
 	PTR_TO_FLOW_KEYS,	 /* reg points to bpf_flow_keys */
+	PTR_TO_SOCKET,		 /* reg points to struct bpf_sock */
+	PTR_TO_SOCKET_OR_NULL,	 /* reg points to struct bpf_sock or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -343,6 +347,11 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 					unsigned long off, unsigned long len);
+typedef u32 (*bpf_convert_ctx_access_t)(enum bpf_access_type type,
+					const struct bpf_insn *src,
+					struct bpf_insn *dst,
+					struct bpf_prog *prog,
+					u32 *target_size);
 
 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
@@ -836,4 +845,29 @@ extern const struct bpf_func_proto bpf_get_local_storage_proto;
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
+#if defined(CONFIG_NET)
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+			      struct bpf_insn_access_aux *info);
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+				const struct bpf_insn *si,
+				struct bpf_insn *insn_buf,
+				struct bpf_prog *prog,
+				u32 *target_size);
+#else
+static inline bool bpf_sock_is_valid_access(int off, int size,
+					    enum bpf_access_type type,
+					    struct bpf_insn_access_aux *info)
+{
+	return false;
+}
+static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+					      const struct bpf_insn *si,
+					      struct bpf_insn *insn_buf,
+					      struct bpf_prog *prog,
+					      u32 *target_size)
+{
+	return 0;
+}
+#endif
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index d0e7f97e8b60..a411363098a5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -58,6 +58,8 @@ struct bpf_reg_state {
 	 * offset, so they can share range knowledge.
 	 * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
 	 * came from, when one is tested for != NULL.
+	 * For PTR_TO_SOCKET this is used to share which pointers retain the
+	 * same reference to the socket, to determine proper reference freeing.
 	 */
 	u32 id;
 	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 98b218bd46e8..f86386c9affd 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -80,8 +80,8 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  * (like pointer plus pointer becomes SCALAR_VALUE type)
  *
  * When verifier sees load or store instructions the type of base register
- * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK. These are three pointer
- * types recognized by check_mem_access() function.
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
+ * four pointer types recognized by check_mem_access() function.
  *
  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  * and the range of [ptr, ptr + map's value_size) is accessible.
@@ -267,6 +267,8 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_PACKET_META]	= "pkt_meta",
 	[PTR_TO_PACKET_END]	= "pkt_end",
 	[PTR_TO_FLOW_KEYS]	= "flow_keys",
+	[PTR_TO_SOCKET]		= "sock",
+	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 };
 
 static char slot_type_char[] = {
@@ -973,6 +975,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_PACKET_END:
 	case PTR_TO_FLOW_KEYS:
 	case CONST_PTR_TO_MAP:
+	case PTR_TO_SOCKET:
+	case PTR_TO_SOCKET_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -1341,6 +1345,28 @@ static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
 	return 0;
 }
 
+static int check_sock_access(struct bpf_verifier_env *env, u32 regno, int off,
+			     int size, enum bpf_access_type t)
+{
+	struct bpf_reg_state *regs = cur_regs(env);
+	struct bpf_reg_state *reg = &regs[regno];
+	struct bpf_insn_access_aux info;
+
+	if (reg->smin_value < 0) {
+		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+			regno);
+		return -EACCES;
+	}
+
+	if (!bpf_sock_is_valid_access(off, size, t, &info)) {
+		verbose(env, "invalid bpf_sock access off=%d size=%d\n",
+			off, size);
+		return -EACCES;
+	}
+
+	return 0;
+}
+
 static bool __is_pointer_value(bool allow_ptr_leaks,
 			       const struct bpf_reg_state *reg)
 {
@@ -1459,6 +1485,9 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
 		 */
 		strict = true;
 		break;
+	case PTR_TO_SOCKET:
+		pointer_desc = "sock ";
+		break;
 	default:
 		break;
 	}
@@ -1726,6 +1755,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		err = check_flow_keys_access(env, off, size);
 		if (!err && t == BPF_READ && value_regno >= 0)
 			mark_reg_unknown(env, regs, value_regno);
+	} else if (reg->type == PTR_TO_SOCKET) {
+		if (t == BPF_WRITE) {
+			verbose(env, "cannot write into socket\n");
+			return -EACCES;
+		}
+		err = check_sock_access(env, regno, off, size, t);
+		if (!err && value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
 	} else {
 		verbose(env, "R%d invalid mem access '%s'\n", regno,
 			reg_type_str[reg->type]);
@@ -1948,6 +1985,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		err = check_ctx_reg(env, reg, regno);
 		if (err < 0)
 			return err;
+	} else if (arg_type == ARG_PTR_TO_SOCKET) {
+		expected_type = PTR_TO_SOCKET;
+		if (type != expected_type)
+			goto err_type;
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
@@ -2543,6 +2584,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		}
 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
 		regs[BPF_REG_0].id = ++env->id_gen;
+	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
+		regs[BPF_REG_0].id = ++env->id_gen;
 	} else {
 		verbose(env, "unknown return type %d of func %s#%d\n",
 			fn->ret_type, func_id_name(func_id), func_id);
@@ -2680,6 +2725,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		return -EACCES;
 	case CONST_PTR_TO_MAP:
 	case PTR_TO_PACKET_END:
+	case PTR_TO_SOCKET:
+	case PTR_TO_SOCKET_OR_NULL:
 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 			dst, reg_type_str[ptr_reg->type]);
 		return -EACCES;
@@ -3627,6 +3674,8 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id,
 			} else {
 				reg->type = PTR_TO_MAP_VALUE;
 			}
+		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
+			reg->type = PTR_TO_SOCKET;
 		}
 		/* We don't need id from this point onwards anymore, thus we
 		 * should better reset it, so that state pruning has chances
@@ -4402,6 +4451,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 	case CONST_PTR_TO_MAP:
 	case PTR_TO_PACKET_END:
 	case PTR_TO_FLOW_KEYS:
+	case PTR_TO_SOCKET:
+	case PTR_TO_SOCKET_OR_NULL:
 		/* Only valid matches are exact, which memcmp() above
 		 * would have accepted
 		 */
@@ -4679,6 +4730,37 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 	return 0;
 }
 
+/* Return true if it's OK to have the same insn return a different type. */
+static bool reg_type_mismatch_ok(enum bpf_reg_type type)
+{
+	switch (type) {
+	case PTR_TO_CTX:
+	case PTR_TO_SOCKET:
+	case PTR_TO_SOCKET_OR_NULL:
+		return false;
+	default:
+		return true;
+	}
+}
+
+/* If an instruction was previously used with particular pointer types, then we
+ * need to be careful to avoid cases such as the below, where it may be ok
+ * for one branch accessing the pointer, but not ok for the other branch:
+ *
+ * R1 = sock_ptr
+ * goto X;
+ * ...
+ * R1 = some_other_valid_ptr;
+ * goto X;
+ * ...
+ * R2 = *(u32 *)(R1 + 0);
+ */
+static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
+{
+	return src != prev && (!reg_type_mismatch_ok(src) ||
+			       !reg_type_mismatch_ok(prev));
+}
+
 static int do_check(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *state;
@@ -4811,9 +4893,7 @@ static int do_check(struct bpf_verifier_env *env)
 				 */
 				*prev_src_type = src_reg_type;
 
-			} else if (src_reg_type != *prev_src_type &&
-				   (src_reg_type == PTR_TO_CTX ||
-				    *prev_src_type == PTR_TO_CTX)) {
+			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
 				/* ABuser program is trying to use the same insn
 				 * dst_reg = *(u32*) (src_reg + off)
 				 * with different pointer types:
@@ -4858,9 +4938,7 @@ static int do_check(struct bpf_verifier_env *env)
 
 			if (*prev_dst_type == NOT_INIT) {
 				*prev_dst_type = dst_reg_type;
-			} else if (dst_reg_type != *prev_dst_type &&
-				   (dst_reg_type == PTR_TO_CTX ||
-				    *prev_dst_type == PTR_TO_CTX)) {
+			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
 				verbose(env, "same insn cannot be used with different pointers\n");
 				return -EINVAL;
 			}
@@ -5286,8 +5364,10 @@ static void sanitize_dead_code(struct bpf_verifier_env *env)
 	}
 }
 
-/* convert load instructions that access fields of 'struct __sk_buff'
- * into sequence of instructions that access fields of 'struct sk_buff'
+/* convert load instructions that access fields of a context type into a
+ * sequence of instructions that access fields of the underlying structure:
+ *     struct __sk_buff    -> struct sk_buff
+ *     struct bpf_sock_ops -> struct sock
  */
 static int convert_ctx_accesses(struct bpf_verifier_env *env)
 {
@@ -5316,12 +5396,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		}
 	}
 
-	if (!ops->convert_ctx_access || bpf_prog_is_dev_bound(env->prog->aux))
+	if (bpf_prog_is_dev_bound(env->prog->aux))
 		return 0;
 
 	insn = env->prog->insnsi + delta;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		bpf_convert_ctx_access_t convert_ctx_access;
+
 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
@@ -5363,8 +5445,18 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 			continue;
 		}
 
-		if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
+		switch (env->insn_aux_data[i + delta].ptr_type) {
+		case PTR_TO_CTX:
+			if (!ops->convert_ctx_access)
+				continue;
+			convert_ctx_access = ops->convert_ctx_access;
+			break;
+		case PTR_TO_SOCKET:
+			convert_ctx_access = bpf_sock_convert_ctx_access;
+			break;
+		default:
 			continue;
+		}
 
 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
 		size = BPF_LDST_BYTES(insn);
@@ -5396,8 +5488,8 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
 		}
 
 		target_size = 0;
-		cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog,
-					      &target_size);
+		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
+					 &target_size);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
 		    (ctx_field_size && !target_size)) {
 			verbose(env, "bpf verifier is misconfigured\n");
diff --git a/net/core/filter.c b/net/core/filter.c
index 72db8afb7cb6..b2cb186252e4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5394,23 +5394,29 @@ static bool __sock_filter_check_size(int off, int size,
 	return size == size_default;
 }
 
-static bool sock_filter_is_valid_access(int off, int size,
-					enum bpf_access_type type,
-					const struct bpf_prog *prog,
-					struct bpf_insn_access_aux *info)
+bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+			      struct bpf_insn_access_aux *info)
 {
 	if (off < 0 || off >= sizeof(struct bpf_sock))
 		return false;
 	if (off % size != 0)
 		return false;
-	if (!__sock_filter_check_attach_type(off, type,
-					     prog->expected_attach_type))
-		return false;
 	if (!__sock_filter_check_size(off, size, info))
 		return false;
 	return true;
 }
 
+static bool sock_filter_is_valid_access(int off, int size,
+					enum bpf_access_type type,
+					const struct bpf_prog *prog,
+					struct bpf_insn_access_aux *info)
+{
+	if (!bpf_sock_is_valid_access(off, size, type, info))
+		return false;
+	return __sock_filter_check_attach_type(off, type,
+					       prog->expected_attach_type);
+}
+
 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
 				const struct bpf_prog *prog, int drop_verdict)
 {
@@ -6122,10 +6128,10 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 	return insn - insn_buf;
 }
 
-static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
-					  const struct bpf_insn *si,
-					  struct bpf_insn *insn_buf,
-					  struct bpf_prog *prog, u32 *target_size)
+u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
+				const struct bpf_insn *si,
+				struct bpf_insn *insn_buf,
+				struct bpf_prog *prog, u32 *target_size)
 {
 	struct bpf_insn *insn = insn_buf;
 	int off;
@@ -7037,7 +7043,7 @@ const struct bpf_prog_ops lwt_seg6local_prog_ops = {
 const struct bpf_verifier_ops cg_sock_verifier_ops = {
 	.get_func_proto		= sock_filter_func_proto,
 	.is_valid_access	= sock_filter_is_valid_access,
-	.convert_ctx_access	= sock_filter_convert_ctx_access,
+	.convert_ctx_access	= bpf_sock_convert_ctx_access,
 };
 
 const struct bpf_prog_ops cg_sock_prog_ops = {
-- 
cgit v1.2.3


From fd978bf7fd312581a7ca454a991f0ffb34c4204b Mon Sep 17 00:00:00 2001
From: Joe Stringer <joe@wand.net.nz>
Date: Tue, 2 Oct 2018 13:35:35 -0700
Subject: bpf: Add reference tracking to verifier

Allow helper functions to acquire a reference and return it into a
register. Specific pointer types such as the PTR_TO_SOCKET will
implicitly represent such a reference. The verifier must ensure that
these references are released exactly once in each path through the
program.

To achieve this, this commit assigns an id to the pointer and tracks it
in the 'bpf_func_state', then when the function or program exits,
verifies that all of the acquired references have been freed. When the
pointer is passed to a function that frees the reference, it is removed
from the 'bpf_func_state` and all existing copies of the pointer in
registers are marked invalid.

Signed-off-by: Joe Stringer <joe@wand.net.nz>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf_verifier.h |  24 +++-
 kernel/bpf/verifier.c        | 306 ++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 308 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index a411363098a5..7b6fd2ab3263 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -104,6 +104,17 @@ struct bpf_stack_state {
 	u8 slot_type[BPF_REG_SIZE];
 };
 
+struct bpf_reference_state {
+	/* Track each reference created with a unique id, even if the same
+	 * instruction creates the reference multiple times (eg, via CALL).
+	 */
+	int id;
+	/* Instruction where the allocation of this reference occurred. This
+	 * is used purely to inform the user of a reference leak.
+	 */
+	int insn_idx;
+};
+
 /* state of the program:
  * type of all registers and stack info
  */
@@ -121,7 +132,9 @@ struct bpf_func_state {
 	 */
 	u32 subprogno;
 
-	/* should be second to last. See copy_func_state() */
+	/* The following fields should be last. See copy_func_state() */
+	int acquired_refs;
+	struct bpf_reference_state *refs;
 	int allocated_stack;
 	struct bpf_stack_state *stack;
 };
@@ -217,11 +230,16 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 					   const char *fmt, ...);
 
-static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
 {
 	struct bpf_verifier_state *cur = env->cur_state;
 
-	return cur->frame[cur->curframe]->regs;
+	return cur->frame[cur->curframe];
+}
+
+static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
+{
+	return cur_func(env)->regs;
 }
 
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 11e982381061..cd0d8bc00bd1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1,5 +1,6 @@
 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  * Copyright (c) 2016 Facebook
+ * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of version 2 of the GNU General Public
@@ -140,6 +141,18 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  *
  * After the call R0 is set to return type of the function and registers R1-R5
  * are set to NOT_INIT to indicate that they are no longer readable.
+ *
+ * The following reference types represent a potential reference to a kernel
+ * resource which, after first being allocated, must be checked and freed by
+ * the BPF program:
+ * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
+ *
+ * When the verifier sees a helper call return a reference type, it allocates a
+ * pointer id for the reference and stores it in the current function state.
+ * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
+ * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
+ * passes through a NULL-check conditional. For the branch wherein the state is
+ * changed to CONST_IMM, the verifier releases the reference.
  */
 
 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
@@ -189,6 +202,7 @@ struct bpf_call_arg_meta {
 	int access_size;
 	s64 msize_smax_value;
 	u64 msize_umax_value;
+	int ptr_id;
 };
 
 static DEFINE_MUTEX(bpf_verifier_lock);
@@ -251,7 +265,42 @@ static bool type_is_pkt_pointer(enum bpf_reg_type type)
 
 static bool reg_type_may_be_null(enum bpf_reg_type type)
 {
-	return type == PTR_TO_MAP_VALUE_OR_NULL;
+	return type == PTR_TO_MAP_VALUE_OR_NULL ||
+	       type == PTR_TO_SOCKET_OR_NULL;
+}
+
+static bool type_is_refcounted(enum bpf_reg_type type)
+{
+	return type == PTR_TO_SOCKET;
+}
+
+static bool type_is_refcounted_or_null(enum bpf_reg_type type)
+{
+	return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
+}
+
+static bool reg_is_refcounted(const struct bpf_reg_state *reg)
+{
+	return type_is_refcounted(reg->type);
+}
+
+static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
+{
+	return type_is_refcounted_or_null(reg->type);
+}
+
+static bool arg_type_is_refcounted(enum bpf_arg_type type)
+{
+	return type == ARG_PTR_TO_SOCKET;
+}
+
+/* Determine whether the function releases some resources allocated by another
+ * function call. The first reference type argument will be assumed to be
+ * released by release_reference().
+ */
+static bool is_release_function(enum bpf_func_id func_id)
+{
+	return false;
 }
 
 /* string representation of 'enum bpf_reg_type' */
@@ -385,6 +434,12 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 		else
 			verbose(env, "=%s", types_buf);
 	}
+	if (state->acquired_refs && state->refs[0].id) {
+		verbose(env, " refs=%d", state->refs[0].id);
+		for (i = 1; i < state->acquired_refs; i++)
+			if (state->refs[i].id)
+				verbose(env, ",%d", state->refs[i].id);
+	}
 	verbose(env, "\n");
 }
 
@@ -403,6 +458,8 @@ static int copy_##NAME##_state(struct bpf_func_state *dst,		\
 	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
 	return 0;							\
 }
+/* copy_reference_state() */
+COPY_STATE_FN(reference, acquired_refs, refs, 1)
 /* copy_stack_state() */
 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 #undef COPY_STATE_FN
@@ -441,6 +498,8 @@ static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
 	state->FIELD = new_##FIELD;					\
 	return 0;							\
 }
+/* realloc_reference_state() */
+REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
 /* realloc_stack_state() */
 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 #undef REALLOC_STATE_FN
@@ -452,16 +511,89 @@ REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
  * which realloc_stack_state() copies over. It points to previous
  * bpf_verifier_state which is never reallocated.
  */
-static int realloc_func_state(struct bpf_func_state *state, int size,
-			      bool copy_old)
+static int realloc_func_state(struct bpf_func_state *state, int stack_size,
+			      int refs_size, bool copy_old)
 {
-	return realloc_stack_state(state, size, copy_old);
+	int err = realloc_reference_state(state, refs_size, copy_old);
+	if (err)
+		return err;
+	return realloc_stack_state(state, stack_size, copy_old);
+}
+
+/* Acquire a pointer id from the env and update the state->refs to include
+ * this new pointer reference.
+ * On success, returns a valid pointer id to associate with the register
+ * On failure, returns a negative errno.
+ */
+static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
+{
+	struct bpf_func_state *state = cur_func(env);
+	int new_ofs = state->acquired_refs;
+	int id, err;
+
+	err = realloc_reference_state(state, state->acquired_refs + 1, true);
+	if (err)
+		return err;
+	id = ++env->id_gen;
+	state->refs[new_ofs].id = id;
+	state->refs[new_ofs].insn_idx = insn_idx;
+
+	return id;
+}
+
+/* release function corresponding to acquire_reference_state(). Idempotent. */
+static int __release_reference_state(struct bpf_func_state *state, int ptr_id)
+{
+	int i, last_idx;
+
+	if (!ptr_id)
+		return -EFAULT;
+
+	last_idx = state->acquired_refs - 1;
+	for (i = 0; i < state->acquired_refs; i++) {
+		if (state->refs[i].id == ptr_id) {
+			if (last_idx && i != last_idx)
+				memcpy(&state->refs[i], &state->refs[last_idx],
+				       sizeof(*state->refs));
+			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
+			state->acquired_refs--;
+			return 0;
+		}
+	}
+	return -EFAULT;
+}
+
+/* variation on the above for cases where we expect that there must be an
+ * outstanding reference for the specified ptr_id.
+ */
+static int release_reference_state(struct bpf_verifier_env *env, int ptr_id)
+{
+	struct bpf_func_state *state = cur_func(env);
+	int err;
+
+	err = __release_reference_state(state, ptr_id);
+	if (WARN_ON_ONCE(err != 0))
+		verbose(env, "verifier internal error: can't release reference\n");
+	return err;
+}
+
+static int transfer_reference_state(struct bpf_func_state *dst,
+				    struct bpf_func_state *src)
+{
+	int err = realloc_reference_state(dst, src->acquired_refs, false);
+	if (err)
+		return err;
+	err = copy_reference_state(dst, src);
+	if (err)
+		return err;
+	return 0;
 }
 
 static void free_func_state(struct bpf_func_state *state)
 {
 	if (!state)
 		return;
+	kfree(state->refs);
 	kfree(state->stack);
 	kfree(state);
 }
@@ -487,10 +619,14 @@ static int copy_func_state(struct bpf_func_state *dst,
 {
 	int err;
 
-	err = realloc_func_state(dst, src->allocated_stack, false);
+	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
+				 false);
+	if (err)
+		return err;
+	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
+	err = copy_reference_state(dst, src);
 	if (err)
 		return err;
-	memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
 	return copy_stack_state(dst, src);
 }
 
@@ -1015,7 +1151,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
 	enum bpf_reg_type type;
 
 	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
-				 true);
+				 state->acquired_refs, true);
 	if (err)
 		return err;
 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@ -1399,7 +1535,8 @@ static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 {
 	const struct bpf_reg_state *reg = cur_regs(env) + regno;
 
-	return reg->type == PTR_TO_CTX;
+	return reg->type == PTR_TO_CTX ||
+	       reg->type == PTR_TO_SOCKET;
 }
 
 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
@@ -2003,6 +2140,12 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		expected_type = PTR_TO_SOCKET;
 		if (type != expected_type)
 			goto err_type;
+		if (meta->ptr_id || !reg->id) {
+			verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
+				meta->ptr_id, reg->id);
+			return -EFAULT;
+		}
+		meta->ptr_id = reg->id;
 	} else if (arg_type_is_mem_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		/* One exception here. In case function allows for NULL to be
@@ -2292,10 +2435,32 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
 	return true;
 }
 
+static bool check_refcount_ok(const struct bpf_func_proto *fn)
+{
+	int count = 0;
+
+	if (arg_type_is_refcounted(fn->arg1_type))
+		count++;
+	if (arg_type_is_refcounted(fn->arg2_type))
+		count++;
+	if (arg_type_is_refcounted(fn->arg3_type))
+		count++;
+	if (arg_type_is_refcounted(fn->arg4_type))
+		count++;
+	if (arg_type_is_refcounted(fn->arg5_type))
+		count++;
+
+	/* We only support one arg being unreferenced at the moment,
+	 * which is sufficient for the helper functions we have right now.
+	 */
+	return count <= 1;
+}
+
 static int check_func_proto(const struct bpf_func_proto *fn)
 {
 	return check_raw_mode_ok(fn) &&
-	       check_arg_pair_ok(fn) ? 0 : -EINVAL;
+	       check_arg_pair_ok(fn) &&
+	       check_refcount_ok(fn) ? 0 : -EINVAL;
 }
 
 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2328,12 +2493,45 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 		__clear_all_pkt_pointers(env, vstate->frame[i]);
 }
 
+static void release_reg_references(struct bpf_verifier_env *env,
+				   struct bpf_func_state *state, int id)
+{
+	struct bpf_reg_state *regs = state->regs, *reg;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++)
+		if (regs[i].id == id)
+			mark_reg_unknown(env, regs, i);
+
+	bpf_for_each_spilled_reg(i, state, reg) {
+		if (!reg)
+			continue;
+		if (reg_is_refcounted(reg) && reg->id == id)
+			__mark_reg_unknown(reg);
+	}
+}
+
+/* The pointer with the specified id has released its reference to kernel
+ * resources. Identify all copies of the same pointer and clear the reference.
+ */
+static int release_reference(struct bpf_verifier_env *env,
+			     struct bpf_call_arg_meta *meta)
+{
+	struct bpf_verifier_state *vstate = env->cur_state;
+	int i;
+
+	for (i = 0; i <= vstate->curframe; i++)
+		release_reg_references(env, vstate->frame[i], meta->ptr_id);
+
+	return release_reference_state(env, meta->ptr_id);
+}
+
 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			   int *insn_idx)
 {
 	struct bpf_verifier_state *state = env->cur_state;
 	struct bpf_func_state *caller, *callee;
-	int i, subprog, target_insn;
+	int i, err, subprog, target_insn;
 
 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
 		verbose(env, "the call stack of %d frames is too deep\n",
@@ -2371,6 +2569,11 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			state->curframe + 1 /* frameno within this callchain */,
 			subprog /* subprog number within this prog */);
 
+	/* Transfer references to the callee */
+	err = transfer_reference_state(callee, caller);
+	if (err)
+		return err;
+
 	/* copy r1 - r5 args that callee can access.  The copy includes parent
 	 * pointers, which connects us up to the liveness chain
 	 */
@@ -2403,6 +2606,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 	struct bpf_verifier_state *state = env->cur_state;
 	struct bpf_func_state *caller, *callee;
 	struct bpf_reg_state *r0;
+	int err;
 
 	callee = state->frame[state->curframe];
 	r0 = &callee->regs[BPF_REG_0];
@@ -2422,6 +2626,11 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 	/* return to the caller whatever r0 had in the callee */
 	caller->regs[BPF_REG_0] = *r0;
 
+	/* Transfer references to the caller */
+	err = transfer_reference_state(caller, callee);
+	if (err)
+		return err;
+
 	*insn_idx = callee->callsite + 1;
 	if (env->log.level) {
 		verbose(env, "returning from callee:\n");
@@ -2478,6 +2687,18 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	return 0;
 }
 
+static int check_reference_leak(struct bpf_verifier_env *env)
+{
+	struct bpf_func_state *state = cur_func(env);
+	int i;
+
+	for (i = 0; i < state->acquired_refs; i++) {
+		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
+			state->refs[i].id, state->refs[i].insn_idx);
+	}
+	return state->acquired_refs ? -EINVAL : 0;
+}
+
 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 {
 	const struct bpf_func_proto *fn = NULL;
@@ -2556,6 +2777,18 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 			return err;
 	}
 
+	if (func_id == BPF_FUNC_tail_call) {
+		err = check_reference_leak(env);
+		if (err) {
+			verbose(env, "tail_call would lead to reference leak\n");
+			return err;
+		}
+	} else if (is_release_function(func_id)) {
+		err = release_reference(env, &meta);
+		if (err)
+			return err;
+	}
+
 	regs = cur_regs(env);
 
 	/* check that flags argument in get_local_storage(map, flags) is 0,
@@ -2599,9 +2832,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
 		regs[BPF_REG_0].id = ++env->id_gen;
 	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
+		int id = acquire_reference_state(env, insn_idx);
+		if (id < 0)
+			return id;
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
-		regs[BPF_REG_0].id = ++env->id_gen;
+		regs[BPF_REG_0].id = id;
 	} else {
 		verbose(env, "unknown return type %d of func %s#%d\n",
 			fn->ret_type, func_id_name(func_id), func_id);
@@ -3665,7 +3901,8 @@ static void reg_combine_min_max(struct bpf_reg_state *true_src,
 	}
 }
 
-static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id,
+static void mark_ptr_or_null_reg(struct bpf_func_state *state,
+				 struct bpf_reg_state *reg, u32 id,
 				 bool is_null)
 {
 	if (reg_type_may_be_null(reg->type) && reg->id == id) {
@@ -3691,11 +3928,13 @@ static void mark_ptr_or_null_reg(struct bpf_reg_state *reg, u32 id,
 		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
 			reg->type = PTR_TO_SOCKET;
 		}
-		/* We don't need id from this point onwards anymore, thus we
-		 * should better reset it, so that state pruning has chances
-		 * to take effect.
-		 */
-		reg->id = 0;
+		if (is_null || !reg_is_refcounted(reg)) {
+			/* We don't need id from this point onwards anymore,
+			 * thus we should better reset it, so that state
+			 * pruning has chances to take effect.
+			 */
+			reg->id = 0;
+		}
 	}
 }
 
@@ -3710,15 +3949,18 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 	u32 id = regs[regno].id;
 	int i, j;
 
+	if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
+		__release_reference_state(state, id);
+
 	for (i = 0; i < MAX_BPF_REG; i++)
-		mark_ptr_or_null_reg(&regs[i], id, is_null);
+		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
 
 	for (j = 0; j <= vstate->curframe; j++) {
 		state = vstate->frame[j];
 		bpf_for_each_spilled_reg(i, state, reg) {
 			if (!reg)
 				continue;
-			mark_ptr_or_null_reg(reg, id, is_null);
+			mark_ptr_or_null_reg(state, reg, id, is_null);
 		}
 	}
 }
@@ -4050,6 +4292,16 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 	if (err)
 		return err;
 
+	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
+	 * gen_ld_abs() may terminate the program at runtime, leading to
+	 * reference leak.
+	 */
+	err = check_reference_leak(env);
+	if (err) {
+		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
+		return err;
+	}
+
 	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
 		verbose(env,
 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
@@ -4542,6 +4794,14 @@ static bool stacksafe(struct bpf_func_state *old,
 	return true;
 }
 
+static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
+{
+	if (old->acquired_refs != cur->acquired_refs)
+		return false;
+	return !memcmp(old->refs, cur->refs,
+		       sizeof(*old->refs) * old->acquired_refs);
+}
+
 /* compare two verifier states
  *
  * all states stored in state_list are known to be valid, since
@@ -4587,6 +4847,9 @@ static bool func_states_equal(struct bpf_func_state *old,
 
 	if (!stacksafe(old, cur, idmap))
 		goto out_free;
+
+	if (!refsafe(old, cur))
+		goto out_free;
 	ret = true;
 out_free:
 	kfree(idmap);
@@ -4868,6 +5131,7 @@ static int do_check(struct bpf_verifier_env *env)
 
 		regs = cur_regs(env);
 		env->insn_aux_data[insn_idx].seen = true;
+
 		if (class == BPF_ALU || class == BPF_ALU64) {
 			err = check_alu_op(env, insn);
 			if (err)
@@ -5032,6 +5296,10 @@ static int do_check(struct bpf_verifier_env *env)
 					continue;
 				}
 
+				err = check_reference_leak(env);
+				if (err)
+					return err;
+
 				/* eBPF calling convetion is such that R0 is used
 				 * to return the value from eBPF program.
 				 * Make sure that it's readable at this time
-- 
cgit v1.2.3


From 401b25aa1a75e7fe4e3202a6336604269697d705 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Tue, 2 Oct 2018 22:20:50 -0400
Subject: ext4: convert fault handler to use vm_fault_t type

Return type of ext4_page_mkwrite and ext4_filemap_fault are
changed to use vm_fault_t type.

With this patch all the callers of block_page_mkwrite_return()
are changed to handle vm_fault_t. So converting the return type
of block_page_mkwrite_return() to vm_fault_t.

Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Matthew Wilcox <willy@infradead.org>
---
 fs/ext4/ext4.h              |  4 ++--
 fs/ext4/inode.c             | 29 +++++++++++++++--------------
 include/linux/buffer_head.h |  2 +-
 3 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 57cbc98d730f..86e1bacac757 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2487,8 +2487,8 @@ extern int ext4_writepage_trans_blocks(struct inode *);
 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
 			     loff_t lstart, loff_t lend);
-extern int ext4_page_mkwrite(struct vm_fault *vmf);
-extern int ext4_filemap_fault(struct vm_fault *vmf);
+extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
+extern vm_fault_t ext4_filemap_fault(struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
 extern void ext4_da_release_space(struct inode *inode, int to_free);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9b69f88bdacc..c3d9a42c561e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -6184,13 +6184,14 @@ static int ext4_bh_unmapped(handle_t *handle, struct buffer_head *bh)
 	return !buffer_mapped(bh);
 }
 
-int ext4_page_mkwrite(struct vm_fault *vmf)
+vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = vmf->page;
 	loff_t size;
 	unsigned long len;
-	int ret;
+	int err;
+	vm_fault_t ret;
 	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct address_space *mapping = inode->i_mapping;
@@ -6203,8 +6204,8 @@ int ext4_page_mkwrite(struct vm_fault *vmf)
 
 	down_read(&EXT4_I(inode)->i_mmap_sem);
 
-	ret = ext4_convert_inline_data(inode);
-	if (ret)
+	err = ext4_convert_inline_data(inode);
+	if (err)
 		goto out_ret;
 
 	/* Delalloc case is easy... */
@@ -6212,9 +6213,9 @@ int ext4_page_mkwrite(struct vm_fault *vmf)
 	    !ext4_should_journal_data(inode) &&
 	    !ext4_nonda_switch(inode->i_sb)) {
 		do {
-			ret = block_page_mkwrite(vma, vmf,
+			err = block_page_mkwrite(vma, vmf,
 						   ext4_da_get_block_prep);
-		} while (ret == -ENOSPC &&
+		} while (err == -ENOSPC &&
 		       ext4_should_retry_alloc(inode->i_sb, &retries));
 		goto out_ret;
 	}
@@ -6259,8 +6260,8 @@ retry_alloc:
 		ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
-	ret = block_page_mkwrite(vma, vmf, get_block);
-	if (!ret && ext4_should_journal_data(inode)) {
+	err = block_page_mkwrite(vma, vmf, get_block);
+	if (!err && ext4_should_journal_data(inode)) {
 		if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
 			  PAGE_SIZE, NULL, do_journal_get_write_access)) {
 			unlock_page(page);
@@ -6271,24 +6272,24 @@ retry_alloc:
 		ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 	}
 	ext4_journal_stop(handle);
-	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+	if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry_alloc;
 out_ret:
-	ret = block_page_mkwrite_return(ret);
+	ret = block_page_mkwrite_return(err);
 out:
 	up_read(&EXT4_I(inode)->i_mmap_sem);
 	sb_end_pagefault(inode->i_sb);
 	return ret;
 }
 
-int ext4_filemap_fault(struct vm_fault *vmf)
+vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
 {
 	struct inode *inode = file_inode(vmf->vma->vm_file);
-	int err;
+	vm_fault_t ret;
 
 	down_read(&EXT4_I(inode)->i_mmap_sem);
-	err = filemap_fault(vmf);
+	ret = filemap_fault(vmf);
 	up_read(&EXT4_I(inode)->i_mmap_sem);
 
-	return err;
+	return ret;
 }
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 96225a77c112..7b73ef7f902d 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -242,7 +242,7 @@ int block_commit_write(struct page *page, unsigned from, unsigned to);
 int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 				get_block_t get_block);
 /* Convert errno to return value from ->page_mkwrite() call */
-static inline int block_page_mkwrite_return(int err)
+static inline vm_fault_t block_page_mkwrite_return(int err)
 {
 	if (err == 0)
 		return VM_FAULT_LOCKED;
-- 
cgit v1.2.3


From f3edc2dbe0ad0bbbd8450cd37328f99acf215fd8 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben-linux@fluff.org>
Date: Mon, 1 Oct 2018 17:02:43 +0100
Subject: net: usbnet: make driver_info const

The driver_info field that is used for describing each of the usb-net
drivers using the usbnet.c core all declare their information as const
and the usbnet.c itself does not try and modify the struct.

It is therefore a good idea to make this const in the usbnet.c structure
in case anyone tries to modify it.

Signed-off-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Ben Dooks <ben.dooks@codethink.co.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 12 ++++++------
 include/linux/usb/usbnet.h |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 73aa33364d80..504282af27e5 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -802,7 +802,7 @@ static void usbnet_terminate_urbs(struct usbnet *dev)
 int usbnet_stop (struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
-	struct driver_info	*info = dev->driver_info;
+	const struct driver_info *info = dev->driver_info;
 	int			retval, pm, mpn;
 
 	clear_bit(EVENT_DEV_OPEN, &dev->flags);
@@ -865,7 +865,7 @@ int usbnet_open (struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
 	int			retval;
-	struct driver_info	*info = dev->driver_info;
+	const struct driver_info *info = dev->driver_info;
 
 	if ((retval = usb_autopm_get_interface(dev->intf)) < 0) {
 		netif_info(dev, ifup, dev->net,
@@ -1205,7 +1205,7 @@ fail_lowmem:
 	}
 
 	if (test_bit (EVENT_LINK_RESET, &dev->flags)) {
-		struct driver_info	*info = dev->driver_info;
+		const struct driver_info *info = dev->driver_info;
 		int			retval = 0;
 
 		clear_bit (EVENT_LINK_RESET, &dev->flags);
@@ -1353,7 +1353,7 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
 	unsigned int			length;
 	struct urb		*urb = NULL;
 	struct skb_data		*entry;
-	struct driver_info	*info = dev->driver_info;
+	const struct driver_info *info = dev->driver_info;
 	unsigned long		flags;
 	int retval;
 
@@ -1647,7 +1647,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	struct usbnet			*dev;
 	struct net_device		*net;
 	struct usb_host_interface	*interface;
-	struct driver_info		*info;
+	const struct driver_info	*info;
 	struct usb_device		*xdev;
 	int				status;
 	const char			*name;
@@ -1663,7 +1663,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
 	}
 
 	name = udev->dev.driver->name;
-	info = (struct driver_info *) prod->driver_info;
+	info = (const struct driver_info *) prod->driver_info;
 	if (!info) {
 		dev_dbg (&udev->dev, "blacklisted by %s\n", name);
 		return -ENODEV;
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index e2ec3582e549..d8860f2d0976 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -28,7 +28,7 @@ struct usbnet {
 	/* housekeeping */
 	struct usb_device	*udev;
 	struct usb_interface	*intf;
-	struct driver_info	*driver_info;
+	const struct driver_info *driver_info;
 	const char		*driver_name;
 	void			*driver_priv;
 	wait_queue_head_t	wait;
-- 
cgit v1.2.3


From 47bd59e538d4e7b3ad9c18bef5c1052657bdff59 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:13 +0200
Subject: mtd: rawnand: plat_nand: Pass a nand_chip object to all
 platform_nand_ctrl hooks

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

In order to do that, we first need to update the platform_nand_ctrl
hooks to take a nand_chip object instead of an mtd_info.

We add temporary plat_nand_xxx() wrappers to the do the mtd -> chip
conversion, but those will be dropped when patching nand_chip hooks to
take a nand_chip object.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Acked-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Krzysztof Halasa <khalasa@piap.pl>
Acked-by: Paul Burton <paul.burton@mips.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 arch/arm/mach-ep93xx/snappercl15.c      |  7 ++--
 arch/arm/mach-ep93xx/ts72xx.c           |  7 ++--
 arch/arm/mach-imx/mach-qong.c           | 11 +++----
 arch/arm/mach-ixp4xx/ixdp425-setup.c    |  3 +-
 arch/arm/mach-omap1/board-fsample.c     |  2 +-
 arch/arm/mach-omap1/board-h2.c          |  2 +-
 arch/arm/mach-omap1/board-h3.c          |  2 +-
 arch/arm/mach-omap1/board-nand.c        |  3 +-
 arch/arm/mach-omap1/board-perseus2.c    |  2 +-
 arch/arm/mach-omap1/common.h            |  4 +--
 arch/arm/mach-orion5x/ts78xx-setup.c    | 18 ++++-------
 arch/arm/mach-pxa/balloon3.c            |  8 ++---
 arch/arm/mach-pxa/em-x270.c             |  5 ++-
 arch/arm/mach-pxa/palmtx.c              |  5 ++-
 arch/mips/alchemy/devboards/db1200.c    |  5 ++-
 arch/mips/alchemy/devboards/db1300.c    |  5 ++-
 arch/mips/alchemy/devboards/db1550.c    |  5 ++-
 arch/mips/netlogic/xlr/platform-flash.c |  4 +--
 arch/mips/pnx833x/common/platform.c     |  3 +-
 arch/mips/rb532/devices.c               |  5 ++-
 arch/sh/boards/mach-migor/setup.c       |  6 ++--
 drivers/mtd/nand/raw/plat_nand.c        | 57 ++++++++++++++++++++++++++++++---
 include/linux/mtd/rawnand.h             | 10 +++---
 23 files changed, 102 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c
index 45940c1d7787..aa03ea79c5f5 100644
--- a/arch/arm/mach-ep93xx/snappercl15.c
+++ b/arch/arm/mach-ep93xx/snappercl15.c
@@ -45,10 +45,9 @@
 
 #define NAND_CTRL_ADDR(chip) 	(chip->IO_ADDR_W + 0x40)
 
-static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void snappercl15_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 				      unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	static u16 nand_state = SNAPPERCL15_NAND_WPN;
 	u16 set;
 
@@ -73,10 +72,8 @@ static void snappercl15_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
 		__raw_writew((cmd & 0xff) | nand_state, chip->IO_ADDR_W);
 }
 
-static int snappercl15_nand_dev_ready(struct mtd_info *mtd)
+static int snappercl15_nand_dev_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	return !!(__raw_readw(NAND_CTRL_ADDR(chip)) & SNAPPERCL15_NAND_RDY);
 }
 
diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c
index c089a2a4fe30..26259dd9e951 100644
--- a/arch/arm/mach-ep93xx/ts72xx.c
+++ b/arch/arm/mach-ep93xx/ts72xx.c
@@ -76,11 +76,9 @@ static void __init ts72xx_map_io(void)
 #define TS72XX_NAND_CONTROL_ADDR_LINE	22	/* 0xN0400000 */
 #define TS72XX_NAND_BUSY_ADDR_LINE	23	/* 0xN0800000 */
 
-static void ts72xx_nand_hwcontrol(struct mtd_info *mtd,
+static void ts72xx_nand_hwcontrol(struct nand_chip *chip,
 				  int cmd, unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	if (ctrl & NAND_CTRL_CHANGE) {
 		void __iomem *addr = chip->IO_ADDR_R;
 		unsigned char bits;
@@ -99,9 +97,8 @@ static void ts72xx_nand_hwcontrol(struct mtd_info *mtd,
 		__raw_writeb(cmd, chip->IO_ADDR_W);
 }
 
-static int ts72xx_nand_device_ready(struct mtd_info *mtd)
+static int ts72xx_nand_device_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	void __iomem *addr = chip->IO_ADDR_R;
 
 	addr += (1 << TS72XX_NAND_BUSY_ADDR_LINE);
diff --git a/arch/arm/mach-imx/mach-qong.c b/arch/arm/mach-imx/mach-qong.c
index 42a700053103..ff015f603ac9 100644
--- a/arch/arm/mach-imx/mach-qong.c
+++ b/arch/arm/mach-imx/mach-qong.c
@@ -129,10 +129,9 @@ static void qong_init_nor_mtd(void)
 /*
  * Hardware specific access to control-lines
  */
-static void qong_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void qong_nand_cmd_ctrl(struct nand_chip *nand_chip, int cmd,
+			       unsigned int ctrl)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-
 	if (cmd == NAND_CMD_NONE)
 		return;
 
@@ -145,14 +144,14 @@ static void qong_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 /*
  * Read the Device Ready pin.
  */
-static int qong_nand_device_ready(struct mtd_info *mtd)
+static int qong_nand_device_ready(struct nand_chip *chip)
 {
 	return gpio_get_value(IOMUX_TO_GPIO(MX31_PIN_NFRB));
 }
 
-static void qong_nand_select_chip(struct mtd_info *mtd, int chip)
+static void qong_nand_select_chip(struct nand_chip *chip, int cs)
 {
-	if (chip >= 0)
+	if (cs >= 0)
 		gpio_set_value(IOMUX_TO_GPIO(MX31_PIN_NFCE_B), 0);
 	else
 		gpio_set_value(IOMUX_TO_GPIO(MX31_PIN_NFCE_B), 1);
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index 3ec829d52cdd..7c39edc121ba 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -75,9 +75,8 @@ static struct mtd_partition ixdp425_partitions[] = {
 };
 
 static void
-ixdp425_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+ixdp425_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	int offset = (int)nand_get_controller_data(this);
 
 	if (ctrl & NAND_CTRL_CHANGE) {
diff --git a/arch/arm/mach-omap1/board-fsample.c b/arch/arm/mach-omap1/board-fsample.c
index 69bd601feb83..e9f512a0602e 100644
--- a/arch/arm/mach-omap1/board-fsample.c
+++ b/arch/arm/mach-omap1/board-fsample.c
@@ -186,7 +186,7 @@ static struct platform_device nor_device = {
 
 #define FSAMPLE_NAND_RB_GPIO_PIN	62
 
-static int nand_dev_ready(struct mtd_info *mtd)
+static int nand_dev_ready(struct nand_chip *chip)
 {
 	return gpio_get_value(FSAMPLE_NAND_RB_GPIO_PIN);
 }
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index 9aeb8ad8c327..d5dd2acd6f78 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -182,7 +182,7 @@ static struct mtd_partition h2_nand_partitions[] = {
 
 #define H2_NAND_RB_GPIO_PIN	62
 
-static int h2_nand_dev_ready(struct mtd_info *mtd)
+static int h2_nand_dev_ready(struct nand_chip *chip)
 {
 	return gpio_get_value(H2_NAND_RB_GPIO_PIN);
 }
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index 2edcd6356f2d..a75856fe4259 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -185,7 +185,7 @@ static struct mtd_partition nand_partitions[] = {
 
 #define H3_NAND_RB_GPIO_PIN	10
 
-static int nand_dev_ready(struct mtd_info *mtd)
+static int nand_dev_ready(struct nand_chip *chip)
 {
 	return gpio_get_value(H3_NAND_RB_GPIO_PIN);
 }
diff --git a/arch/arm/mach-omap1/board-nand.c b/arch/arm/mach-omap1/board-nand.c
index 1bffbb4e050f..59d56a30bc63 100644
--- a/arch/arm/mach-omap1/board-nand.c
+++ b/arch/arm/mach-omap1/board-nand.c
@@ -20,9 +20,8 @@
 
 #include "common.h"
 
-void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+void omap1_nand_cmd_ctl(struct nand_chip *this, int cmd, unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	unsigned long mask;
 
 	if (cmd == NAND_CMD_NONE)
diff --git a/arch/arm/mach-omap1/board-perseus2.c b/arch/arm/mach-omap1/board-perseus2.c
index b4951eb82898..c61c7c7520ca 100644
--- a/arch/arm/mach-omap1/board-perseus2.c
+++ b/arch/arm/mach-omap1/board-perseus2.c
@@ -144,7 +144,7 @@ static struct platform_device nor_device = {
 
 #define P2_NAND_RB_GPIO_PIN	62
 
-static int nand_dev_ready(struct mtd_info *mtd)
+static int nand_dev_ready(struct nand_chip *chip)
 {
 	return gpio_get_value(P2_NAND_RB_GPIO_PIN);
 }
diff --git a/arch/arm/mach-omap1/common.h b/arch/arm/mach-omap1/common.h
index c6537d2c2859..504b959ba5cf 100644
--- a/arch/arm/mach-omap1/common.h
+++ b/arch/arm/mach-omap1/common.h
@@ -26,7 +26,6 @@
 #ifndef __ARCH_ARM_MACH_OMAP1_COMMON_H
 #define __ARCH_ARM_MACH_OMAP1_COMMON_H
 
-#include <linux/mtd/mtd.h>
 #include <linux/platform_data/i2c-omap.h>
 #include <linux/reboot.h>
 
@@ -82,7 +81,8 @@ void omap1_restart(enum reboot_mode, const char *);
 
 extern void __init omap_check_revision(void);
 
-extern void omap1_nand_cmd_ctl(struct mtd_info *mtd, int cmd,
+struct nand_chip;
+extern void omap1_nand_cmd_ctl(struct nand_chip *this, int cmd,
 			       unsigned int ctrl);
 
 extern void omap1_timer_init(void);
diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c
index 94778739e38f..48d85ddf7c31 100644
--- a/arch/arm/mach-orion5x/ts78xx-setup.c
+++ b/arch/arm/mach-orion5x/ts78xx-setup.c
@@ -131,11 +131,9 @@ static void ts78xx_ts_rtc_unload(void)
  * NAND_CLE: bit 1 -> bit 1
  * NAND_ALE: bit 2 -> bit 0
  */
-static void ts78xx_ts_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
-			unsigned int ctrl)
+static void ts78xx_ts_nand_cmd_ctrl(struct nand_chip *this, int cmd,
+				    unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
-
 	if (ctrl & NAND_CTRL_CHANGE) {
 		unsigned char bits;
 
@@ -150,15 +148,14 @@ static void ts78xx_ts_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
 		writeb(cmd, this->IO_ADDR_W);
 }
 
-static int ts78xx_ts_nand_dev_ready(struct mtd_info *mtd)
+static int ts78xx_ts_nand_dev_ready(struct nand_chip *chip)
 {
 	return readb(TS_NAND_CTRL) & 0x20;
 }
 
-static void ts78xx_ts_nand_write_buf(struct mtd_info *mtd,
-			const uint8_t *buf, int len)
+static void ts78xx_ts_nand_write_buf(struct nand_chip *chip,
+				     const uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	void __iomem *io_base = chip->IO_ADDR_W;
 	unsigned long off = ((unsigned long)buf & 3);
 	int sz;
@@ -182,10 +179,9 @@ static void ts78xx_ts_nand_write_buf(struct mtd_info *mtd,
 		writesb(io_base, buf, len);
 }
 
-static void ts78xx_ts_nand_read_buf(struct mtd_info *mtd,
-			uint8_t *buf, int len)
+static void ts78xx_ts_nand_read_buf(struct nand_chip *chip,
+				    uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	void __iomem *io_base = chip->IO_ADDR_R;
 	unsigned long off = ((unsigned long)buf & 3);
 	int sz;
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index af46d2182533..71fda90b9599 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -571,9 +571,9 @@ static inline void balloon3_i2c_init(void) {}
  * NAND
  ******************************************************************************/
 #if defined(CONFIG_MTD_NAND_PLATFORM)||defined(CONFIG_MTD_NAND_PLATFORM_MODULE)
-static void balloon3_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void balloon3_nand_cmd_ctl(struct nand_chip *this, int cmd,
+				  unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	uint8_t balloon3_ctl_set = 0, balloon3_ctl_clr = 0;
 
 	if (ctrl & NAND_CTRL_CHANGE) {
@@ -600,7 +600,7 @@ static void balloon3_nand_cmd_ctl(struct mtd_info *mtd, int cmd, unsigned int ct
 		writeb(cmd, this->IO_ADDR_W);
 }
 
-static void balloon3_nand_select_chip(struct mtd_info *mtd, int chip)
+static void balloon3_nand_select_chip(struct nand_chip *this, int chip)
 {
 	if (chip < 0 || chip > 3)
 		return;
@@ -616,7 +616,7 @@ static void balloon3_nand_select_chip(struct mtd_info *mtd, int chip)
 		BALLOON3_NAND_CONTROL_REG);
 }
 
-static int balloon3_nand_dev_ready(struct mtd_info *mtd)
+static int balloon3_nand_dev_ready(struct nand_chip *this)
 {
 	return __raw_readl(BALLOON3_NAND_STAT_REG) & BALLOON3_NAND_STAT_RNB;
 }
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 29be04c6cc48..ba1ec9992830 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -285,10 +285,9 @@ static void nand_cs_off(void)
 }
 
 /* hardware specific access to control-lines */
-static void em_x270_nand_cmd_ctl(struct mtd_info *mtd, int dat,
+static void em_x270_nand_cmd_ctl(struct nand_chip *this, int dat,
 				 unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	unsigned long nandaddr = (unsigned long)this->IO_ADDR_W;
 
 	dsb();
@@ -317,7 +316,7 @@ static void em_x270_nand_cmd_ctl(struct mtd_info *mtd, int dat,
 }
 
 /* read device ready pin */
-static int em_x270_nand_device_ready(struct mtd_info *mtd)
+static int em_x270_nand_device_ready(struct nand_chip *this)
 {
 	dsb();
 
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index 47e3e38e9bec..ed9661e70b83 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -247,10 +247,9 @@ static inline void palmtx_keys_init(void) {}
  ******************************************************************************/
 #if defined(CONFIG_MTD_NAND_PLATFORM) || \
 	defined(CONFIG_MTD_NAND_PLATFORM_MODULE)
-static void palmtx_nand_cmd_ctl(struct mtd_info *mtd, int cmd,
-				 unsigned int ctrl)
+static void palmtx_nand_cmd_ctl(struct nand_chip *this, int cmd,
+				unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	char __iomem *nandaddr = this->IO_ADDR_W;
 
 	if (cmd == NAND_CMD_NONE)
diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c
index da7663770425..f043615c1a99 100644
--- a/arch/mips/alchemy/devboards/db1200.c
+++ b/arch/mips/alchemy/devboards/db1200.c
@@ -197,10 +197,9 @@ static struct i2c_board_info db1200_i2c_devs[] __initdata = {
 
 /**********************************************************************/
 
-static void au1200_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void au1200_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 				 unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
 
 	ioaddr &= 0xffffff00;
@@ -220,7 +219,7 @@ static void au1200_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
 	}
 }
 
-static int au1200_nand_device_ready(struct mtd_info *mtd)
+static int au1200_nand_device_ready(struct nand_chip *this)
 {
 	return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1;
 }
diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c
index efb318e03e0a..1201fa655e78 100644
--- a/arch/mips/alchemy/devboards/db1300.c
+++ b/arch/mips/alchemy/devboards/db1300.c
@@ -149,10 +149,9 @@ static void __init db1300_gpio_config(void)
 
 /**********************************************************************/
 
-static void au1300_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void au1300_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 				 unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
 
 	ioaddr &= 0xffffff00;
@@ -172,7 +171,7 @@ static void au1300_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
 	}
 }
 
-static int au1300_nand_device_ready(struct mtd_info *mtd)
+static int au1300_nand_device_ready(struct nand_chip *this)
 {
 	return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1;
 }
diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c
index 7d3dfaa10231..cae39cde5de6 100644
--- a/arch/mips/alchemy/devboards/db1550.c
+++ b/arch/mips/alchemy/devboards/db1550.c
@@ -126,10 +126,9 @@ static struct i2c_board_info db1550_i2c_devs[] __initdata = {
 
 /**********************************************************************/
 
-static void au1550_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void au1550_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 				 unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
 
 	ioaddr &= 0xffffff00;
@@ -149,7 +148,7 @@ static void au1550_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
 	}
 }
 
-static int au1550_nand_device_ready(struct mtd_info *mtd)
+static int au1550_nand_device_ready(struct nand_chip *this)
 {
 	return alchemy_rdsmem(AU1000_MEM_STSTAT) & 1;
 }
diff --git a/arch/mips/netlogic/xlr/platform-flash.c b/arch/mips/netlogic/xlr/platform-flash.c
index 4d1b4c003376..4f76b85b44c9 100644
--- a/arch/mips/netlogic/xlr/platform-flash.c
+++ b/arch/mips/netlogic/xlr/platform-flash.c
@@ -92,8 +92,8 @@ struct xlr_nand_flash_priv {
 
 static struct xlr_nand_flash_priv nand_priv;
 
-static void xlr_nand_ctrl(struct mtd_info *mtd, int cmd,
-		unsigned int ctrl)
+static void xlr_nand_ctrl(struct nand_chip *chip, int cmd,
+			  unsigned int ctrl)
 {
 	if (ctrl & NAND_CLE)
 		nlm_write_reg(nand_priv.flash_mmio,
diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c
index a7a4e9f5146d..ca8a2889431e 100644
--- a/arch/mips/pnx833x/common/platform.c
+++ b/arch/mips/pnx833x/common/platform.c
@@ -178,9 +178,8 @@ static struct platform_device pnx833x_sata_device = {
 };
 
 static void
-pnx833x_flash_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+pnx833x_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	unsigned long nandaddr = (unsigned long)this->IO_ADDR_W;
 
 	if (cmd == NAND_CMD_NONE)
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index 354d258396ff..9173949892ed 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -141,14 +141,13 @@ static struct platform_device cf_slot0 = {
 };
 
 /* Resources and device for NAND */
-static int rb532_dev_ready(struct mtd_info *mtd)
+static int rb532_dev_ready(struct nand_chip *chip)
 {
 	return gpio_get_value(GPIO_RDY);
 }
 
-static void rb532_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void rb532_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	unsigned char orbits, nandbits;
 
 	if (ctrl & NAND_CTRL_CHANGE) {
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 254f2c662703..833f3e49027b 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -165,11 +165,9 @@ static struct mtd_partition migor_nand_flash_partitions[] = {
 	},
 };
 
-static void migor_nand_flash_cmd_ctl(struct mtd_info *mtd, int cmd,
+static void migor_nand_flash_cmd_ctl(struct nand_chip *chip, int cmd,
 				     unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	if (cmd == NAND_CMD_NONE)
 		return;
 
@@ -181,7 +179,7 @@ static void migor_nand_flash_cmd_ctl(struct mtd_info *mtd, int cmd,
 		writeb(cmd, chip->IO_ADDR_W);
 }
 
-static int migor_nand_flash_ready(struct mtd_info *mtd)
+static int migor_nand_flash_ready(struct nand_chip *chip)
 {
 	return gpio_get_value(GPIO_PTA1); /* NAND_RBn */
 }
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 222626df4b96..24f904300c44 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -23,6 +23,42 @@ struct plat_nand_data {
 	void __iomem		*io_base;
 };
 
+static void plat_nand_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+{
+	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
+
+	pdata->ctrl.cmd_ctrl(mtd_to_nand(mtd), dat, ctrl);
+}
+
+static int plat_nand_dev_ready(struct mtd_info *mtd)
+{
+	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
+
+	return pdata->ctrl.dev_ready(mtd_to_nand(mtd));
+}
+
+static void plat_nand_select_chip(struct mtd_info *mtd, int cs)
+{
+	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
+
+	pdata->ctrl.select_chip(mtd_to_nand(mtd), cs);
+}
+
+static void plat_nand_write_buf(struct mtd_info *mtd, const uint8_t *buf,
+				int len)
+{
+	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
+
+	pdata->ctrl.write_buf(mtd_to_nand(mtd), buf, len);
+}
+
+static void plat_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+{
+	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
+
+	pdata->ctrl.read_buf(mtd_to_nand(mtd), buf, len);
+}
+
 /*
  * Probe for the NAND device.
  */
@@ -62,11 +98,22 @@ static int plat_nand_probe(struct platform_device *pdev)
 
 	data->chip.IO_ADDR_R = data->io_base;
 	data->chip.IO_ADDR_W = data->io_base;
-	data->chip.cmd_ctrl = pdata->ctrl.cmd_ctrl;
-	data->chip.dev_ready = pdata->ctrl.dev_ready;
-	data->chip.select_chip = pdata->ctrl.select_chip;
-	data->chip.write_buf = pdata->ctrl.write_buf;
-	data->chip.read_buf = pdata->ctrl.read_buf;
+
+	if (pdata->ctrl.cmd_ctrl)
+		data->chip.cmd_ctrl = plat_nand_cmd_ctrl;
+
+	if (pdata->ctrl.dev_ready)
+		data->chip.dev_ready = plat_nand_dev_ready;
+
+	if (pdata->ctrl.select_chip)
+		data->chip.select_chip = plat_nand_select_chip;
+
+	if (pdata->ctrl.write_buf)
+		data->chip.write_buf = plat_nand_write_buf;
+
+	if (pdata->ctrl.read_buf)
+		data->chip.read_buf = plat_nand_read_buf;
+
 	data->chip.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
 	data->chip.bbt_options |= pdata->chip.bbt_options;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index d155470f53c8..818cdc0a4dbb 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1595,11 +1595,11 @@ struct platform_device;
 struct platform_nand_ctrl {
 	int (*probe)(struct platform_device *pdev);
 	void (*remove)(struct platform_device *pdev);
-	int (*dev_ready)(struct mtd_info *mtd);
-	void (*select_chip)(struct mtd_info *mtd, int chip);
-	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
-	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
-	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
+	int (*dev_ready)(struct nand_chip *chip);
+	void (*select_chip)(struct nand_chip *chip, int cs);
+	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
+	void (*write_buf)(struct nand_chip *chip, const uint8_t *buf, int len);
+	void (*read_buf)(struct nand_chip *chip, uint8_t *buf, int len);
 	void *priv;
 };
 
-- 
cgit v1.2.3


From 00ad378f304a091ab2e2df5f944892a6ed558610 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:14 +0200
Subject: mtd: rawnand: Pass a nand_chip object to nand_scan()

Let's make the raw NAND API consistent by patching all helpers to take
a nand_chip object instead of an mtd_info one.

We start with nand_scan().

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/driver-api/mtdnand.rst             |  2 +-
 drivers/mtd/nand/raw/ams-delta.c                 |  2 +-
 drivers/mtd/nand/raw/atmel/nand-controller.c     |  2 +-
 drivers/mtd/nand/raw/au1550nd.c                  |  2 +-
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  2 +-
 drivers/mtd/nand/raw/brcmnand/brcmnand.c         |  2 +-
 drivers/mtd/nand/raw/cafe_nand.c                 |  2 +-
 drivers/mtd/nand/raw/cmx270_nand.c               |  2 +-
 drivers/mtd/nand/raw/cs553x_nand.c               |  2 +-
 drivers/mtd/nand/raw/davinci_nand.c              |  2 +-
 drivers/mtd/nand/raw/denali.c                    |  2 +-
 drivers/mtd/nand/raw/diskonchip.c                |  2 +-
 drivers/mtd/nand/raw/docg4.c                     |  2 +-
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |  2 +-
 drivers/mtd/nand/raw/fsl_ifc_nand.c              |  2 +-
 drivers/mtd/nand/raw/fsl_upm.c                   |  2 +-
 drivers/mtd/nand/raw/fsmc_nand.c                 |  2 +-
 drivers/mtd/nand/raw/gpio.c                      |  2 +-
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       |  2 +-
 drivers/mtd/nand/raw/hisi504_nand.c              |  2 +-
 drivers/mtd/nand/raw/jz4740_nand.c               |  2 +-
 drivers/mtd/nand/raw/jz4780_nand.c               |  2 +-
 drivers/mtd/nand/raw/lpc32xx_mlc.c               |  2 +-
 drivers/mtd/nand/raw/lpc32xx_slc.c               |  2 +-
 drivers/mtd/nand/raw/marvell_nand.c              |  2 +-
 drivers/mtd/nand/raw/mpc5121_nfc.c               |  2 +-
 drivers/mtd/nand/raw/mtk_nand.c                  |  2 +-
 drivers/mtd/nand/raw/mxc_nand.c                  |  2 +-
 drivers/mtd/nand/raw/nand_base.c                 | 21 ++++++++++-----------
 drivers/mtd/nand/raw/nandsim.c                   |  2 +-
 drivers/mtd/nand/raw/ndfc.c                      |  2 +-
 drivers/mtd/nand/raw/nuc900_nand.c               |  2 +-
 drivers/mtd/nand/raw/omap2.c                     |  2 +-
 drivers/mtd/nand/raw/orion_nand.c                |  2 +-
 drivers/mtd/nand/raw/oxnas_nand.c                |  2 +-
 drivers/mtd/nand/raw/pasemi_nand.c               |  2 +-
 drivers/mtd/nand/raw/plat_nand.c                 |  2 +-
 drivers/mtd/nand/raw/qcom_nandc.c                |  2 +-
 drivers/mtd/nand/raw/s3c2410.c                   |  2 +-
 drivers/mtd/nand/raw/sh_flctl.c                  |  2 +-
 drivers/mtd/nand/raw/sharpsl.c                   |  2 +-
 drivers/mtd/nand/raw/sm_common.c                 |  2 +-
 drivers/mtd/nand/raw/socrates_nand.c             |  2 +-
 drivers/mtd/nand/raw/sunxi_nand.c                |  2 +-
 drivers/mtd/nand/raw/tango_nand.c                |  2 +-
 drivers/mtd/nand/raw/tegra_nand.c                |  2 +-
 drivers/mtd/nand/raw/tmio_nand.c                 |  2 +-
 drivers/mtd/nand/raw/txx9ndfmc.c                 |  2 +-
 drivers/mtd/nand/raw/vf610_nfc.c                 |  2 +-
 drivers/mtd/nand/raw/xway_nand.c                 |  2 +-
 drivers/staging/mt29f_spinand/mt29f_spinand.c    |  2 +-
 include/linux/mtd/rawnand.h                      |  7 ++++---
 52 files changed, 64 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst
index c55a6034c397..1ab6f35b6410 100644
--- a/Documentation/driver-api/mtdnand.rst
+++ b/Documentation/driver-api/mtdnand.rst
@@ -246,7 +246,7 @@ necessary information about the device.
         this->eccmode = NAND_ECC_SOFT;
 
         /* Scan to find existence of the device */
-        if (nand_scan (board_mtd, 1)) {
+        if (nand_scan (this, 1)) {
             err = -ENXIO;
             goto out_ior;
         }
diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index 37a3cc21c7bc..24ba7296ec08 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -235,7 +235,7 @@ static int ams_delta_init(struct platform_device *pdev)
 		goto out_gpio;
 
 	/* Scan to find existence of the device */
-	err = nand_scan(ams_delta_mtd, 1);
+	err = nand_scan(this, 1);
 	if (err)
 		goto out_mtd;
 
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 8b9e05ac24ee..cef22a79f3a6 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -1683,7 +1683,7 @@ atmel_nand_controller_add_nand(struct atmel_nand_controller *nc,
 
 	nc->caps->ops->nand_init(nc, nand);
 
-	ret = nand_scan(mtd, nand->numcs);
+	ret = nand_scan(chip, nand->numcs);
 	if (ret) {
 		dev_err(nc->dev, "NAND scan failed: %d\n", ret);
 		return ret;
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 32c0440141fb..614f5d447ba5 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -451,7 +451,7 @@ static int au1550nd_probe(struct platform_device *pdev)
 	this->write_buf = (pd->devwidth) ? au_write_buf16 : au_write_buf;
 	this->read_buf = (pd->devwidth) ? au_read_buf16 : au_read_buf;
 
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(this, 1);
 	if (ret) {
 		dev_err(&pdev->dev, "NAND scan failed with %d\n", ret);
 		goto out3;
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 60874de430eb..9b62bc2d25a0 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -423,7 +423,7 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 			(w4 << 24 | w3 << 18 | w2 << 12 | w1 << 6 | w0));
 
 	/* Scan NAND */
-	err = nand_scan(nand_to_mtd(&b47n->nand_chip), 1);
+	err = nand_scan(&b47n->nand_chip, 1);
 	if (err) {
 		pr_err("Could not scan NAND flash: %d\n", err);
 		goto exit;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 4b90d5b380c2..a9a94c102654 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -2301,7 +2301,7 @@ static int brcmnand_init_cs(struct brcmnand_host *host, struct device_node *dn)
 	nand_writereg(ctrl, cfg_offs,
 		      nand_readreg(ctrl, cfg_offs) & ~CFG_BUS_WIDTH);
 
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(chip, 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 1dbe43adcfe7..e497b95d624e 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -783,7 +783,7 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 
 	/* Scan to find existence of the device */
 	cafe->nand.dummy_controller.ops = &cafe_nand_controller_ops;
-	err = nand_scan(mtd, 2);
+	err = nand_scan(&cafe->nand, 2);
 	if (err)
 		goto out_irq;
 
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index b66e254b6802..e92c0f113eb3 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -193,7 +193,7 @@ static int __init cmx270_init(void)
 	this->write_buf = cmx270_write_buf;
 
 	/* Scan to find existence of the device */
-	ret = nand_scan(cmx270_nand_mtd, 1);
+	ret = nand_scan(this, 1);
 	if (ret) {
 		pr_notice("No NAND device\n");
 		goto err_scan;
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index beafad62e7d5..4065bcd12e64 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -241,7 +241,7 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
 	}
 
 	/* Scan to find existence of the device */
-	err = nand_scan(new_mtd, 1);
+	err = nand_scan(this, 1);
 	if (err)
 		goto out_free;
 
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 40145e206a6b..1021624195f7 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -807,7 +807,7 @@ static int nand_davinci_probe(struct platform_device *pdev)
 
 	/* Scan to find existence of the device(s) */
 	info->chip.dummy_controller.ops = &davinci_nand_controller_ops;
-	ret = nand_scan(mtd, pdata->mask_chipsel ? 2 : 1);
+	ret = nand_scan(&info->chip, pdata->mask_chipsel ? 2 : 1);
 	if (ret < 0) {
 		dev_dbg(&pdev->dev, "no NAND chip(s) found\n");
 		return ret;
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 177453dfaa07..2e8a825c740e 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -1355,7 +1355,7 @@ int denali_init(struct denali_nand_info *denali)
 		chip->setup_data_interface = denali_setup_data_interface;
 
 	chip->dummy_controller.ops = &denali_controller_ops;
-	ret = nand_scan(mtd, denali->max_banks);
+	ret = nand_scan(chip, denali->max_banks);
 	if (ret)
 		goto disable_irq;
 
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 3c46188dd6d2..9159748a2ef0 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -1620,7 +1620,7 @@ static int __init doc_probe(unsigned long physadr)
 	else
 		numchips = doc2001_init(mtd);
 
-	if ((ret = nand_scan(mtd, numchips)) || (ret = doc->late_init(mtd))) {
+	if ((ret = nand_scan(nand, numchips)) || (ret = doc->late_init(mtd))) {
 		/* DBB note: i believe nand_release is necessary here, as
 		   buffers may have been allocated in nand_base.  Check with
 		   Thomas. FIX ME! */
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index 427fcbc1b71c..69f60755f38a 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -1391,7 +1391,7 @@ static int __init probe_docg4(struct platform_device *pdev)
 	 * ->attach_chip callback.
 	 */
 	nand->dummy_controller.ops = &docg4_controller_ops;
-	retval = nand_scan(mtd, 0);
+	retval = nand_scan(nand, 0);
 	if (retval)
 		goto free_nand;
 
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 55f449b711fd..541343d142e0 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -915,7 +915,7 @@ static int fsl_elbc_nand_probe(struct platform_device *pdev)
 		goto err;
 
 	priv->chip.controller->ops = &fsl_elbc_controller_ops;
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(&priv->chip, 1);
 	if (ret)
 		goto err;
 
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 7e7729df7827..ad010c72df78 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -1079,7 +1079,7 @@ static int fsl_ifc_nand_probe(struct platform_device *dev)
 		goto err;
 
 	priv->chip.controller->ops = &fsl_ifc_controller_ops;
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(&priv->chip, 1);
 	if (ret)
 		goto err;
 
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index ca82727eca94..99edae365d16 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -191,7 +191,7 @@ static int fun_chip_init(struct fsl_upm_nand *fun,
 		goto err;
 	}
 
-	ret = nand_scan(mtd, fun->mchip_count);
+	ret = nand_scan(&fun->chip, fun->mchip_count);
 	if (ret)
 		goto err;
 
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index f418236fa020..9991e3b8e237 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -1125,7 +1125,7 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	 * Scan to find existence of the device
 	 */
 	nand->dummy_controller.ops = &fsmc_nand_controller_ops;
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(nand, 1);
 	if (ret)
 		goto release_dma_write_chan;
 
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 2780af26d9ab..983d3be48019 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -289,7 +289,7 @@ static int gpio_nand_probe(struct platform_device *pdev)
 	if (gpiomtd->nwp && !IS_ERR(gpiomtd->nwp))
 		gpiod_direction_output(gpiomtd->nwp, 1);
 
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(chip, 1);
 	if (ret)
 		goto err_wp;
 
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 1c1ebbc82824..7af207bc3ab5 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1934,7 +1934,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 		goto err_out;
 
 	chip->dummy_controller.ops = &gpmi_nand_controller_ops;
-	ret = nand_scan(mtd, GPMI_IS_MX6(this) ? 2 : 1);
+	ret = nand_scan(chip, GPMI_IS_MX6(this) ? 2 : 1);
 	if (ret)
 		goto err_out;
 
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 0f5c48aa5673..81baa2e6ae56 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -801,7 +801,7 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 	}
 
 	chip->dummy_controller.ops = &hisi_nfc_controller_ops;
-	ret = nand_scan(mtd, max_chips);
+	ret = nand_scan(chip, max_chips);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index a7515452bc59..75bb26645c82 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -331,7 +331,7 @@ static int jz_nand_detect_bank(struct platform_device *pdev,
 
 	if (chipnr == 0) {
 		/* Detect first chip. */
-		ret = nand_scan(mtd, 1);
+		ret = nand_scan(chip, 1);
 		if (ret)
 			goto notfound_id;
 
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index ac6239588f26..80f29b28bcc4 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -286,7 +286,7 @@ static int jz4780_nand_init_chip(struct platform_device *pdev,
 	nand_set_flash_node(chip, np);
 
 	chip->controller->ops = &jz4780_nand_controller_ops;
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(chip, 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index e82abada130a..453a83b82d73 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -802,7 +802,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	 * SMALL block or LARGE block.
 	 */
 	nand_chip->dummy_controller.ops = &lpc32xx_nand_controller_ops;
-	res = nand_scan(mtd, 1);
+	res = nand_scan(nand_chip, 1);
 	if (res)
 		goto free_irq;
 
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index a4e8b7e75135..ad6eff0591d2 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -925,7 +925,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 
 	/* Find NAND device */
 	chip->dummy_controller.ops = &lpc32xx_nand_controller_ops;
-	res = nand_scan(mtd, 1);
+	res = nand_scan(chip, 1);
 	if (res)
 		goto release_dma;
 
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 270f281067ab..dde64609415f 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -2605,7 +2605,7 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc,
 
 	chip->options |= NAND_BUSWIDTH_AUTO;
 
-	ret = nand_scan(mtd, marvell_nand->nsels);
+	ret = nand_scan(chip, marvell_nand->nsels);
 	if (ret) {
 		dev_err(dev, "could not scan the nand chip\n");
 		return ret;
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index c0be9c30b4cf..efaaec462bd7 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -767,7 +767,7 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	}
 
 	/* Detect NAND chips */
-	retval = nand_scan(mtd, be32_to_cpup(chips_no));
+	retval = nand_scan(chip, be32_to_cpup(chips_no));
 	if (retval) {
 		dev_err(dev, "NAND Flash not found !\n");
 		goto error;
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 57b5ed1699e3..7a2ce405f914 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -1365,7 +1365,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 
 	mtk_nfc_hw_init(nfc);
 
-	ret = nand_scan(mtd, nsels);
+	ret = nand_scan(nand, nsels);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index a0884a621053..1ca03d88adf1 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1887,7 +1887,7 @@ static int mxcnd_probe(struct platform_device *pdev)
 
 	/* Scan the NAND device */
 	this->dummy_controller.ops = &mxcnd_controller_ops;
-	err = nand_scan(mtd, is_imx25_nfc(host) ? 4 : 1);
+	err = nand_scan(this, is_imx25_nfc(host) ? 4 : 1);
 	if (err)
 		goto escan;
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 306ef6ec41db..974cbfbde5e2 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5960,7 +5960,7 @@ static int nand_dt_init(struct nand_chip *chip)
 
 /**
  * nand_scan_ident - Scan for the NAND device
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @maxchips: number of chips to scan for
  * @table: alternative NAND ID table
  *
@@ -5972,11 +5972,11 @@ static int nand_dt_init(struct nand_chip *chip)
  * prevented dynamic allocations during this phase which was unconvenient and
  * as been banned for the benefit of the ->init_ecc()/cleanup_ecc() hooks.
  */
-static int nand_scan_ident(struct mtd_info *mtd, int maxchips,
+static int nand_scan_ident(struct nand_chip *chip, int maxchips,
 			   struct nand_flash_dev *table)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int i, nand_maf_id, nand_dev_id;
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	int ret;
 
 	/* Enforce the right timings for reset/detection */
@@ -6430,15 +6430,15 @@ static bool nand_ecc_strength_good(struct mtd_info *mtd)
 
 /**
  * nand_scan_tail - Scan for the NAND device
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  *
  * This is the second phase of the normal nand_scan() function. It fills out
  * all the uninitialized function pointers with the defaults and scans for a
  * bad block table if appropriate.
  */
-static int nand_scan_tail(struct mtd_info *mtd)
+static int nand_scan_tail(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	int ret, i;
 
@@ -6777,7 +6777,7 @@ static void nand_detach(struct nand_chip *chip)
 
 /**
  * nand_scan_with_ids - [NAND Interface] Scan for the NAND device
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @maxchips: number of chips to scan for. @nand_scan_ident() will not be run if
  *	      this parameter is zero (useful for specific drivers that must
  *	      handle this part of the process themselves, e.g docg4).
@@ -6787,14 +6787,13 @@ static void nand_detach(struct nand_chip *chip)
  * The flash ID is read and the mtd/chip structures are filled with the
  * appropriate values.
  */
-int nand_scan_with_ids(struct mtd_info *mtd, int maxchips,
+int nand_scan_with_ids(struct nand_chip *chip, int maxchips,
 		       struct nand_flash_dev *ids)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	int ret;
 
 	if (maxchips) {
-		ret = nand_scan_ident(mtd, maxchips, ids);
+		ret = nand_scan_ident(chip, maxchips, ids);
 		if (ret)
 			return ret;
 	}
@@ -6803,7 +6802,7 @@ int nand_scan_with_ids(struct mtd_info *mtd, int maxchips,
 	if (ret)
 		goto cleanup_ident;
 
-	ret = nand_scan_tail(mtd);
+	ret = nand_scan_tail(chip);
 	if (ret)
 		goto detach_chip;
 
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index 47a81d1b1397..60761175e531 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2309,7 +2309,7 @@ static int __init ns_init_module(void)
 		goto error;
 
 	chip->dummy_controller.ops = &ns_controller_ops;
-	retval = nand_scan(nsmtd, 1);
+	retval = nand_scan(chip, 1);
 	if (retval) {
 		NS_ERR("Could not scan NAND Simulator device\n");
 		goto error;
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index b193f373f235..7ce7f37dc67a 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -181,7 +181,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
 		goto err;
 	}
 
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(chip, 1);
 	if (ret)
 		goto err;
 
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index af5b32c9a791..41ed993b9523 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -270,7 +270,7 @@ static int nuc900_nand_probe(struct platform_device *pdev)
 
 	nuc900_nand_enable(nuc900_nand);
 
-	if (nand_scan(mtd, 1))
+	if (nand_scan(chip, 1))
 		return -ENXIO;
 
 	mtd_device_register(mtd, partitions, ARRAY_SIZE(partitions));
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 4546ac0bed4a..c2aff2492fa1 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -2254,7 +2254,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 	/* scan NAND device connected to chip controller */
 	nand_chip->options |= info->devsize & NAND_BUSWIDTH_16;
 
-	err = nand_scan(mtd, 1);
+	err = nand_scan(nand_chip, 1);
 	if (err)
 		goto return_error;
 
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 52d435285a3f..256a6b018bdc 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -174,7 +174,7 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(nc, 1);
 	if (ret)
 		goto no_dev;
 
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index 01b00bb69c1e..9aeb024c2a0e 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -142,7 +142,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 		chip->chip_delay = 30;
 
 		/* Scan to find existence of the device */
-		err = nand_scan(mtd, 1);
+		err = nand_scan(chip, 1);
 		if (err)
 			goto err_clk_unprepare;
 
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index a47a7e4bd25a..eca4e41d2be3 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -156,7 +156,7 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 
 	/* Scan to find existence of the device */
-	err = nand_scan(pasemi_nand_mtd, 1);
+	err = nand_scan(chip, 1);
 	if (err)
 		goto out_lpc;
 
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 24f904300c44..c9a23fb21718 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -131,7 +131,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 	}
 
 	/* Scan to find existence of the device */
-	err = nand_scan(mtd, pdata->chip.nr_chips);
+	err = nand_scan(&data->chip, pdata->chip.nr_chips);
 	if (err)
 		goto out;
 
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index d2831b0b28fb..d800347e74da 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2834,7 +2834,7 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc,
 	/* set up initial status value */
 	host->status = NAND_STATUS_READY | NAND_STATUS_WP;
 
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(chip, 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index c21e8892394a..7f30d801d642 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -1170,7 +1170,7 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 		mtd->dev.parent = &pdev->dev;
 		s3c2410_nand_init_chip(info, nmtd, sets);
 
-		err = nand_scan(mtd, sets ? sets->nr_chips : 1);
+		err = nand_scan(&nmtd->chip, sets ? sets->nr_chips : 1);
 		if (err)
 			goto exit_error;
 
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index ef3036d9bf15..abcc3be89b89 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -1193,7 +1193,7 @@ static int flctl_probe(struct platform_device *pdev)
 	flctl_setup_dma(flctl);
 
 	nand->dummy_controller.ops = &flctl_nand_controller_ops;
-	ret = nand_scan(flctl_mtd, 1);
+	ret = nand_scan(nand, 1);
 	if (ret)
 		goto err_chip;
 
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index fc171b17a39b..4afacb0dcf00 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -171,7 +171,7 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
 	this->ecc.correct = nand_correct_data;
 
 	/* Scan to find existence of the device */
-	err = nand_scan(mtd, 1);
+	err = nand_scan(this, 1);
 	if (err)
 		goto err_scan;
 
diff --git a/drivers/mtd/nand/raw/sm_common.c b/drivers/mtd/nand/raw/sm_common.c
index 73aafe8c3ef3..02ac6e9b2d16 100644
--- a/drivers/mtd/nand/raw/sm_common.c
+++ b/drivers/mtd/nand/raw/sm_common.c
@@ -195,7 +195,7 @@ int sm_register_device(struct mtd_info *mtd, int smartmedia)
 	/* Scan for card properties */
 	chip->dummy_controller.ops = &sm_controller_ops;
 	flash_ids = smartmedia ? nand_smartmedia_flash_ids : nand_xd_flash_ids;
-	ret = nand_scan_with_ids(mtd, 1, flash_ids);
+	ret = nand_scan_with_ids(chip, 1, flash_ids);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 36c45aa21f66..e335560f87af 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -173,7 +173,7 @@ static int socrates_nand_probe(struct platform_device *ofdev)
 
 	dev_set_drvdata(&ofdev->dev, host);
 
-	res = nand_scan(mtd, 1);
+	res = nand_scan(nand_chip, 1);
 	if (res)
 		goto out;
 
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 1f0b7ee38df5..179f74b6edf6 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1940,7 +1940,7 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
 	mtd = nand_to_mtd(nand);
 	mtd->dev.parent = dev;
 
-	ret = nand_scan(mtd, nsels);
+	ret = nand_scan(nand, nsels);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index 72698691727d..45beb87aec93 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -588,7 +588,7 @@ static int chip_init(struct device *dev, struct device_node *np)
 	mtd_set_ooblayout(mtd, &tango_nand_ooblayout_ops);
 	mtd->dev.parent = dev;
 
-	err = nand_scan(mtd, 1);
+	err = nand_scan(chip, 1);
 	if (err)
 		return err;
 
diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c
index 79da1efc88d1..5dcee20e2a8c 100644
--- a/drivers/mtd/nand/raw/tegra_nand.c
+++ b/drivers/mtd/nand/raw/tegra_nand.c
@@ -1119,7 +1119,7 @@ static int tegra_nand_chips_init(struct device *dev,
 	chip->select_chip = tegra_nand_select_chip;
 	chip->setup_data_interface = tegra_nand_setup_data_interface;
 
-	ret = nand_scan(mtd, 1);
+	ret = nand_scan(chip, 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index dcaa924502de..6df499a239ae 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -436,7 +436,7 @@ static int tmio_probe(struct platform_device *dev)
 	nand_chip->waitfunc = tmio_nand_wait;
 
 	/* Scan to find existence of the device */
-	retval = nand_scan(mtd, 1);
+	retval = nand_scan(nand_chip, 1);
 	if (retval)
 		goto err_irq;
 
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 4d61a14fcb65..169e8bcee61e 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -359,7 +359,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 		if (plat->wide_mask & (1 << i))
 			chip->options |= NAND_BUSWIDTH_16;
 
-		if (nand_scan(mtd, 1)) {
+		if (nand_scan(chip, 1)) {
 			kfree(txx9_priv->mtdname);
 			kfree(txx9_priv);
 			continue;
diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c
index 6f6dcbf9095b..3b486f4ce868 100644
--- a/drivers/mtd/nand/raw/vf610_nfc.c
+++ b/drivers/mtd/nand/raw/vf610_nfc.c
@@ -892,7 +892,7 @@ static int vf610_nfc_probe(struct platform_device *pdev)
 
 	/* Scan the NAND chip */
 	chip->dummy_controller.ops = &vf610_nfc_controller_ops;
-	err = nand_scan(mtd, 1);
+	err = nand_scan(chip, 1);
 	if (err)
 		goto err_disable_clk;
 
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index 9926b4e3d69d..e670d3b5a646 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -205,7 +205,7 @@ static int xway_nand_probe(struct platform_device *pdev)
 		    | cs_flag, EBU_NAND_CON);
 
 	/* Scan to find existence of the device */
-	err = nand_scan(mtd, 1);
+	err = nand_scan(&data->chip, 1);
 	if (err)
 		return err;
 
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 448478451c4c..b50788b2d1d9 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -934,7 +934,7 @@ static int spinand_probe(struct spi_device *spi_nand)
 	mtd_set_ooblayout(mtd, &spinand_oob_64_ops);
 #endif
 
-	if (nand_scan(mtd, 1))
+	if (nand_scan(chip, 1))
 		return -ENXIO;
 
 	return mtd_device_register(mtd, NULL, 0);
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 818cdc0a4dbb..733b228d94a5 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -24,15 +24,16 @@
 #include <linux/of.h>
 #include <linux/types.h>
 
+struct nand_chip;
 struct nand_flash_dev;
 
 /* Scan and identify a NAND device */
-int nand_scan_with_ids(struct mtd_info *mtd, int max_chips,
+int nand_scan_with_ids(struct nand_chip *chip, int max_chips,
 		       struct nand_flash_dev *ids);
 
-static inline int nand_scan(struct mtd_info *mtd, int max_chips)
+static inline int nand_scan(struct nand_chip *chip, int max_chips)
 {
-	return nand_scan_with_ids(mtd, max_chips, NULL);
+	return nand_scan_with_ids(chip, max_chips, NULL);
 }
 
 /* Internal helper for board drivers which need to override command function */
-- 
cgit v1.2.3


From 59ac276f22270fb2094910f9a734c17f41c25e70 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:15 +0200
Subject: mtd: rawnand: Pass a nand_chip object to nand_release()

Let's make the raw NAND API consistent by patching all helpers to
take a nand_chip object instead of an mtd_info one.

Now is nand_release()'s turn.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/driver-api/mtdnand.rst       | 2 +-
 drivers/mtd/nand/raw/ams-delta.c           | 2 +-
 drivers/mtd/nand/raw/au1550nd.c            | 2 +-
 drivers/mtd/nand/raw/bcm47xxnflash/main.c  | 2 +-
 drivers/mtd/nand/raw/brcmnand/brcmnand.c   | 2 +-
 drivers/mtd/nand/raw/cafe_nand.c           | 2 +-
 drivers/mtd/nand/raw/cmx270_nand.c         | 2 +-
 drivers/mtd/nand/raw/cs553x_nand.c         | 2 +-
 drivers/mtd/nand/raw/davinci_nand.c        | 2 +-
 drivers/mtd/nand/raw/denali.c              | 4 +---
 drivers/mtd/nand/raw/diskonchip.c          | 4 ++--
 drivers/mtd/nand/raw/docg4.c               | 2 +-
 drivers/mtd/nand/raw/fsl_elbc_nand.c       | 3 +--
 drivers/mtd/nand/raw/fsl_ifc_nand.c        | 3 +--
 drivers/mtd/nand/raw/fsl_upm.c             | 2 +-
 drivers/mtd/nand/raw/fsmc_nand.c           | 2 +-
 drivers/mtd/nand/raw/gpio.c                | 2 +-
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c | 2 +-
 drivers/mtd/nand/raw/hisi504_nand.c        | 3 +--
 drivers/mtd/nand/raw/jz4740_nand.c         | 2 +-
 drivers/mtd/nand/raw/jz4780_nand.c         | 4 ++--
 drivers/mtd/nand/raw/lpc32xx_mlc.c         | 3 +--
 drivers/mtd/nand/raw/lpc32xx_slc.c         | 3 +--
 drivers/mtd/nand/raw/marvell_nand.c        | 4 ++--
 drivers/mtd/nand/raw/mpc5121_nfc.c         | 2 +-
 drivers/mtd/nand/raw/mtk_nand.c            | 4 ++--
 drivers/mtd/nand/raw/mxc_nand.c            | 2 +-
 drivers/mtd/nand/raw/nand_base.c           | 8 ++++----
 drivers/mtd/nand/raw/nandsim.c             | 4 ++--
 drivers/mtd/nand/raw/ndfc.c                | 2 +-
 drivers/mtd/nand/raw/nuc900_nand.c         | 2 +-
 drivers/mtd/nand/raw/omap2.c               | 2 +-
 drivers/mtd/nand/raw/orion_nand.c          | 5 ++---
 drivers/mtd/nand/raw/oxnas_nand.c          | 4 ++--
 drivers/mtd/nand/raw/pasemi_nand.c         | 2 +-
 drivers/mtd/nand/raw/plat_nand.c           | 4 ++--
 drivers/mtd/nand/raw/qcom_nandc.c          | 2 +-
 drivers/mtd/nand/raw/r852.c                | 4 ++--
 drivers/mtd/nand/raw/s3c2410.c             | 2 +-
 drivers/mtd/nand/raw/sh_flctl.c            | 2 +-
 drivers/mtd/nand/raw/sharpsl.c             | 4 ++--
 drivers/mtd/nand/raw/socrates_nand.c       | 5 ++---
 drivers/mtd/nand/raw/sunxi_nand.c          | 4 ++--
 drivers/mtd/nand/raw/tango_nand.c          | 2 +-
 drivers/mtd/nand/raw/tmio_nand.c           | 4 ++--
 drivers/mtd/nand/raw/txx9ndfmc.c           | 2 +-
 drivers/mtd/nand/raw/vf610_nfc.c           | 2 +-
 drivers/mtd/nand/raw/xway_nand.c           | 4 ++--
 include/linux/mtd/rawnand.h                | 2 +-
 49 files changed, 66 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst
index 1ab6f35b6410..5470a3d6bd9e 100644
--- a/Documentation/driver-api/mtdnand.rst
+++ b/Documentation/driver-api/mtdnand.rst
@@ -277,7 +277,7 @@ unregisters the partitions in the MTD layer.
     static void __exit board_cleanup (void)
     {
         /* Release resources, unregister device */
-        nand_release (board_mtd);
+        nand_release (mtd_to_nand(board_mtd));
 
         /* unmap physical address */
         iounmap(baseaddr);
diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index 24ba7296ec08..acf7971e815d 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -264,7 +264,7 @@ static int ams_delta_cleanup(struct platform_device *pdev)
 	void __iomem *io_base = platform_get_drvdata(pdev);
 
 	/* Release resources, unregister device */
-	nand_release(ams_delta_mtd);
+	nand_release(mtd_to_nand(ams_delta_mtd));
 
 	gpio_free_array(_mandatory_gpio, ARRAY_SIZE(_mandatory_gpio));
 	gpio_free(AMS_DELTA_GPIO_PIN_NAND_RB);
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 614f5d447ba5..d277a141c7d3 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -477,7 +477,7 @@ static int au1550nd_remove(struct platform_device *pdev)
 	struct au1550nd_ctx *ctx = platform_get_drvdata(pdev);
 	struct resource *r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 
-	nand_release(nand_to_mtd(&ctx->chip));
+	nand_release(&ctx->chip);
 	iounmap(ctx->base);
 	release_mem_region(r->start, 0x1000);
 	kfree(ctx);
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/main.c b/drivers/mtd/nand/raw/bcm47xxnflash/main.c
index fb31429b70a9..d79694160845 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/main.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/main.c
@@ -65,7 +65,7 @@ static int bcm47xxnflash_remove(struct platform_device *pdev)
 {
 	struct bcm47xxnflash *nflash = platform_get_drvdata(pdev);
 
-	nand_release(nand_to_mtd(&nflash->nand_chip));
+	nand_release(&nflash->nand_chip);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index a9a94c102654..19e6e918f896 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -2616,7 +2616,7 @@ int brcmnand_remove(struct platform_device *pdev)
 	struct brcmnand_host *host;
 
 	list_for_each_entry(host, &ctrl->host_list, node)
-		nand_release(nand_to_mtd(&host->chip));
+		nand_release(&host->chip);
 
 	clk_disable_unprepare(ctrl->clk);
 
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index e497b95d624e..3304594177c6 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -819,7 +819,7 @@ static void cafe_nand_remove(struct pci_dev *pdev)
 	/* Disable NAND IRQ in global IRQ mask register */
 	cafe_writel(cafe, ~1 & cafe_readl(cafe, GLOBAL_IRQ_MASK), GLOBAL_IRQ_MASK);
 	free_irq(pdev->irq, mtd);
-	nand_release(mtd);
+	nand_release(chip);
 	free_rs(cafe->rs);
 	pci_iounmap(pdev, cafe->mmio);
 	dma_free_coherent(&cafe->pdev->dev, 2112, cafe->dmabuf, cafe->dmaaddr);
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index e92c0f113eb3..2eb933a8f99e 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -228,7 +228,7 @@ module_init(cmx270_init);
 static void __exit cmx270_cleanup(void)
 {
 	/* Release resources, unregister device */
-	nand_release(cmx270_nand_mtd);
+	nand_release(mtd_to_nand(cmx270_nand_mtd));
 
 	gpio_free(GPIO_NAND_RB);
 	gpio_free(GPIO_NAND_CS);
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 4065bcd12e64..d4be416bb2fa 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -336,7 +336,7 @@ static void __exit cs553x_cleanup(void)
 		mmio_base = this->IO_ADDR_R;
 
 		/* Release resources, unregister device */
-		nand_release(mtd);
+		nand_release(this);
 		kfree(mtd->name);
 		cs553x_mtd[i] = NULL;
 
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 1021624195f7..66d3d5966013 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -841,7 +841,7 @@ static int nand_davinci_remove(struct platform_device *pdev)
 		ecc4_busy = false;
 	spin_unlock_irq(&davinci_nand_lock);
 
-	nand_release(nand_to_mtd(&info->chip));
+	nand_release(&info->chip);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 2e8a825c740e..958619fd4d1b 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -1378,9 +1378,7 @@ EXPORT_SYMBOL(denali_init);
 
 void denali_remove(struct denali_nand_info *denali)
 {
-	struct mtd_info *mtd = nand_to_mtd(&denali->nand);
-
-	nand_release(mtd);
+	nand_release(&denali->nand);
 	denali_disable_irq(denali);
 }
 EXPORT_SYMBOL(denali_remove);
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 9159748a2ef0..43d1e08133ce 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -1627,7 +1627,7 @@ static int __init doc_probe(unsigned long physadr)
 		/* nand_release will call mtd_device_unregister, but we
 		   haven't yet added it.  This is handled without incident by
 		   mtd_device_unregister, as far as I can tell. */
-		nand_release(mtd);
+		nand_release(nand);
 		goto fail;
 	}
 
@@ -1662,7 +1662,7 @@ static void release_nanddoc(void)
 		doc = nand_get_controller_data(nand);
 
 		nextmtd = doc->nextdoc;
-		nand_release(mtd);
+		nand_release(nand);
 		iounmap(doc->virtadr);
 		release_mem_region(doc->physadr, DOC_IOREMAP_LEN);
 		free_rs(doc->rs_decoder);
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index 69f60755f38a..2d86bc5a886d 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -1420,7 +1420,7 @@ unmap:
 static int __exit cleanup_docg4(struct platform_device *pdev)
 {
 	struct docg4_priv *doc = platform_get_drvdata(pdev);
-	nand_release(doc->mtd);
+	nand_release(mtd_to_nand(doc->mtd));
 	kfree(mtd_to_nand(doc->mtd));
 	iounmap(doc->virtadr);
 	return 0;
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 541343d142e0..22bcd64a66c8 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -942,9 +942,8 @@ static int fsl_elbc_nand_remove(struct platform_device *pdev)
 {
 	struct fsl_elbc_fcm_ctrl *elbc_fcm_ctrl = fsl_lbc_ctrl_dev->nand;
 	struct fsl_elbc_mtd *priv = dev_get_drvdata(&pdev->dev);
-	struct mtd_info *mtd = nand_to_mtd(&priv->chip);
 
-	nand_release(mtd);
+	nand_release(&priv->chip);
 	fsl_elbc_chip_remove(priv);
 
 	mutex_lock(&fsl_elbc_nand_mutex);
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index ad010c72df78..70bf8e1552a5 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -1105,9 +1105,8 @@ err:
 static int fsl_ifc_nand_remove(struct platform_device *dev)
 {
 	struct fsl_ifc_mtd *priv = dev_get_drvdata(&dev->dev);
-	struct mtd_info *mtd = nand_to_mtd(&priv->chip);
 
-	nand_release(mtd);
+	nand_release(&priv->chip);
 	fsl_ifc_chip_remove(priv);
 
 	mutex_lock(&fsl_ifc_nand_mutex);
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index 99edae365d16..ffddfc9721ac 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -326,7 +326,7 @@ static int fun_remove(struct platform_device *ofdev)
 	struct mtd_info *mtd = nand_to_mtd(&fun->chip);
 	int i;
 
-	nand_release(mtd);
+	nand_release(&fun->chip);
 	kfree(mtd->name);
 
 	for (i = 0; i < fun->mchip_count; i++) {
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index 9991e3b8e237..25d354e9448e 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -1161,7 +1161,7 @@ static int fsmc_nand_remove(struct platform_device *pdev)
 	struct fsmc_nand_data *host = platform_get_drvdata(pdev);
 
 	if (host) {
-		nand_release(nand_to_mtd(&host->nand));
+		nand_release(&host->nand);
 
 		if (host->mode == USE_DMA_ACCESS) {
 			dma_release_channel(host->write_dma_chan);
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 983d3be48019..0e7d00faf33c 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -194,7 +194,7 @@ static int gpio_nand_remove(struct platform_device *pdev)
 {
 	struct gpiomtd *gpiomtd = platform_get_drvdata(pdev);
 
-	nand_release(nand_to_mtd(&gpiomtd->nand_chip));
+	nand_release(&gpiomtd->nand_chip);
 
 	/* Enable write protection and disable the chip */
 	if (gpiomtd->nwp && !IS_ERR(gpiomtd->nwp))
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 7af207bc3ab5..fe99d9323d4a 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -2026,7 +2026,7 @@ static int gpmi_nand_remove(struct platform_device *pdev)
 {
 	struct gpmi_nand_data *this = platform_get_drvdata(pdev);
 
-	nand_release(nand_to_mtd(&this->nand));
+	nand_release(&this->nand);
 	gpmi_free_dma_buffer(this);
 	release_resources(this);
 	return 0;
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 81baa2e6ae56..9106a1d60bca 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -818,9 +818,8 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 static int hisi_nfc_remove(struct platform_device *pdev)
 {
 	struct hinfc_host *host = platform_get_drvdata(pdev);
-	struct mtd_info *mtd = nand_to_mtd(&host->chip);
 
-	nand_release(mtd);
+	nand_release(&host->chip);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 75bb26645c82..27603d78b157 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -507,7 +507,7 @@ static int jz_nand_remove(struct platform_device *pdev)
 	struct jz_nand *nand = platform_get_drvdata(pdev);
 	size_t i;
 
-	nand_release(nand_to_mtd(&nand->chip));
+	nand_release(&nand->chip);
 
 	/* Deassert and disable all chips */
 	writel(0, nand->base + JZ_REG_NAND_CTRL);
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 80f29b28bcc4..7d008aeae165 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -292,7 +292,7 @@ static int jz4780_nand_init_chip(struct platform_device *pdev,
 
 	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret) {
-		nand_release(mtd);
+		nand_release(chip);
 		return ret;
 	}
 
@@ -307,7 +307,7 @@ static void jz4780_nand_cleanup_chips(struct jz4780_nand_controller *nfc)
 
 	while (!list_empty(&nfc->chips)) {
 		chip = list_first_entry(&nfc->chips, struct jz4780_nand_chip, chip_list);
-		nand_release(nand_to_mtd(&chip->chip));
+		nand_release(&chip->chip);
 		list_del(&chip->chip_list);
 	}
 }
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 453a83b82d73..d240b8ff40ca 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -839,9 +839,8 @@ free_gpio:
 static int lpc32xx_nand_remove(struct platform_device *pdev)
 {
 	struct lpc32xx_nand_host *host = platform_get_drvdata(pdev);
-	struct mtd_info *mtd = nand_to_mtd(&host->nand_chip);
 
-	nand_release(mtd);
+	nand_release(&host->nand_chip);
 	free_irq(host->irq, host);
 	if (use_dma)
 		dma_release_channel(host->dma_chan);
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index ad6eff0591d2..607e4bdfae03 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -956,9 +956,8 @@ static int lpc32xx_nand_remove(struct platform_device *pdev)
 {
 	uint32_t tmp;
 	struct lpc32xx_nand_host *host = platform_get_drvdata(pdev);
-	struct mtd_info *mtd = nand_to_mtd(&host->nand_chip);
 
-	nand_release(mtd);
+	nand_release(&host->nand_chip);
 	dma_release_channel(host->dma_chan);
 
 	/* Force CE high */
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index dde64609415f..5a9836c5093c 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -2618,7 +2618,7 @@ static int marvell_nand_chip_init(struct device *dev, struct marvell_nfc *nfc,
 		ret = mtd_device_register(mtd, NULL, 0);
 	if (ret) {
 		dev_err(dev, "failed to register mtd device: %d\n", ret);
-		nand_release(mtd);
+		nand_release(chip);
 		return ret;
 	}
 
@@ -2673,7 +2673,7 @@ static void marvell_nand_chips_cleanup(struct marvell_nfc *nfc)
 	struct marvell_nand_chip *entry, *temp;
 
 	list_for_each_entry_safe(entry, temp, &nfc->chips, node) {
-		nand_release(nand_to_mtd(&entry->chip));
+		nand_release(&entry->chip);
 		list_del(&entry->node);
 	}
 }
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index efaaec462bd7..3c90d6955476 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -817,7 +817,7 @@ static int mpc5121_nfc_remove(struct platform_device *op)
 	struct device *dev = &op->dev;
 	struct mtd_info *mtd = dev_get_drvdata(dev);
 
-	nand_release(mtd);
+	nand_release(mtd_to_nand(mtd));
 	mpc5121_nfc_free(dev, mtd);
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 7a2ce405f914..46d447f148f1 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -1372,7 +1372,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret) {
 		dev_err(dev, "mtd parse partition error\n");
-		nand_release(mtd);
+		nand_release(nand);
 		return ret;
 	}
 
@@ -1538,7 +1538,7 @@ static int mtk_nfc_remove(struct platform_device *pdev)
 	while (!list_empty(&nfc->chips)) {
 		chip = list_first_entry(&nfc->chips, struct mtk_nfc_nand_chip,
 					node);
-		nand_release(nand_to_mtd(&chip->nand));
+		nand_release(&chip->nand);
 		list_del(&chip->node);
 	}
 
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 1ca03d88adf1..3c57e14e1c7c 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1915,7 +1915,7 @@ static int mxcnd_remove(struct platform_device *pdev)
 {
 	struct mxc_nand_host *host = platform_get_drvdata(pdev);
 
-	nand_release(nand_to_mtd(&host->nand));
+	nand_release(&host->nand);
 	if (host->clk_act)
 		clk_disable_unprepare(host->clk);
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 974cbfbde5e2..f937efe145af 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -6853,12 +6853,12 @@ EXPORT_SYMBOL_GPL(nand_cleanup);
 /**
  * nand_release - [NAND Interface] Unregister the MTD device and free resources
  *		  held by the NAND device
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  */
-void nand_release(struct mtd_info *mtd)
+void nand_release(struct nand_chip *chip)
 {
-	mtd_device_unregister(mtd);
-	nand_cleanup(mtd_to_nand(mtd));
+	mtd_device_unregister(nand_to_mtd(chip));
+	nand_cleanup(chip);
 }
 EXPORT_SYMBOL_GPL(nand_release);
 
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index 60761175e531..e9f7b9e1aead 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2354,7 +2354,7 @@ static int __init ns_init_module(void)
 
 err_exit:
 	free_nandsim(nand);
-	nand_release(nsmtd);
+	nand_release(chip);
 	for (i = 0;i < ARRAY_SIZE(nand->partitions); ++i)
 		kfree(nand->partitions[i].name);
 error:
@@ -2376,7 +2376,7 @@ static void __exit ns_cleanup_module(void)
 	int i;
 
 	free_nandsim(ns);    /* Free nandsim private resources */
-	nand_release(nsmtd); /* Unregister driver */
+	nand_release(chip); /* Unregister driver */
 	for (i = 0;i < ARRAY_SIZE(ns->partitions); ++i)
 		kfree(ns->partitions[i].name);
 	kfree(mtd_to_nand(nsmtd));        /* Free other structures */
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 7ce7f37dc67a..ab24e9ca769b 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -258,7 +258,7 @@ static int ndfc_remove(struct platform_device *ofdev)
 	struct ndfc_controller *ndfc = dev_get_drvdata(&ofdev->dev);
 	struct mtd_info *mtd = nand_to_mtd(&ndfc->chip);
 
-	nand_release(mtd);
+	nand_release(&ndfc->chip);
 	kfree(mtd->name);
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 41ed993b9523..0c675b6c0b6e 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -284,7 +284,7 @@ static int nuc900_nand_remove(struct platform_device *pdev)
 {
 	struct nuc900_nand *nuc900_nand = platform_get_drvdata(pdev);
 
-	nand_release(nand_to_mtd(&nuc900_nand->chip));
+	nand_release(&nuc900_nand->chip);
 	clk_disable(nuc900_nand->clk);
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index c2aff2492fa1..b243f2ab3622 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -2290,7 +2290,7 @@ static int omap_nand_remove(struct platform_device *pdev)
 	}
 	if (info->dma)
 		dma_release_channel(info->dma);
-	nand_release(mtd);
+	nand_release(nand_chip);
 	return 0;
 }
 
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 256a6b018bdc..5c58d91ffaee 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -181,7 +181,7 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	mtd->name = "orion_nand";
 	ret = mtd_device_register(mtd, board->parts, board->nr_parts);
 	if (ret) {
-		nand_release(mtd);
+		nand_release(nc);
 		goto no_dev;
 	}
 
@@ -196,9 +196,8 @@ static int orion_nand_remove(struct platform_device *pdev)
 {
 	struct orion_nand_info *info = platform_get_drvdata(pdev);
 	struct nand_chip *chip = &info->chip;
-	struct mtd_info *mtd = nand_to_mtd(chip);
 
-	nand_release(mtd);
+	nand_release(chip);
 
 	clk_disable_unprepare(info->clk);
 
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index 9aeb024c2a0e..5bc180536320 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -148,7 +148,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 
 		err = mtd_device_register(mtd, NULL, 0);
 		if (err) {
-			nand_release(mtd);
+			nand_release(chip);
 			goto err_clk_unprepare;
 		}
 
@@ -176,7 +176,7 @@ static int oxnas_nand_remove(struct platform_device *pdev)
 	struct oxnas_nand_ctrl *oxnas = platform_get_drvdata(pdev);
 
 	if (oxnas->chips[0])
-		nand_release(nand_to_mtd(oxnas->chips[0]));
+		nand_release(oxnas->chips[0]);
 
 	clk_disable_unprepare(oxnas->clk);
 
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index eca4e41d2be3..c8e2ac04fb86 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -191,7 +191,7 @@ static int pasemi_nand_remove(struct platform_device *ofdev)
 	chip = mtd_to_nand(pasemi_nand_mtd);
 
 	/* Release resources, unregister device */
-	nand_release(pasemi_nand_mtd);
+	nand_release(chip);
 
 	release_region(lpcctl, 4);
 
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index c9a23fb21718..80e1a44f0465 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -144,7 +144,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 	if (!err)
 		return err;
 
-	nand_release(mtd);
+	nand_release(&data->chip);
 out:
 	if (pdata->ctrl.remove)
 		pdata->ctrl.remove(pdev);
@@ -159,7 +159,7 @@ static int plat_nand_remove(struct platform_device *pdev)
 	struct plat_nand_data *data = platform_get_drvdata(pdev);
 	struct platform_nand_data *pdata = dev_get_platdata(&pdev->dev);
 
-	nand_release(nand_to_mtd(&data->chip));
+	nand_release(&data->chip);
 	if (pdata->ctrl.remove)
 		pdata->ctrl.remove(pdev);
 
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index d800347e74da..312cfd786b0f 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2999,7 +2999,7 @@ static int qcom_nandc_remove(struct platform_device *pdev)
 	struct qcom_nand_host *host;
 
 	list_for_each_entry(host, &nandc->host_list, node)
-		nand_release(nand_to_mtd(&host->chip));
+		nand_release(&host->chip);
 
 
 	qcom_nandc_unalloc(nandc);
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index dcdeb0660e5e..bb74a0ac697e 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -656,7 +656,7 @@ static int r852_register_nand_device(struct r852_device *dev)
 	dev->card_registred = 1;
 	return 0;
 error3:
-	nand_release(mtd);
+	nand_release(dev->chip);
 error1:
 	/* Force card redetect */
 	dev->card_detected = 0;
@@ -675,7 +675,7 @@ static void r852_unregister_nand_device(struct r852_device *dev)
 		return;
 
 	device_remove_file(&mtd->dev, &dev_attr_media_type);
-	nand_release(mtd);
+	nand_release(dev->chip);
 	r852_engine_disable(dev);
 	dev->card_registred = 0;
 }
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 7f30d801d642..cf045813c160 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -781,7 +781,7 @@ static int s3c24xx_nand_remove(struct platform_device *pdev)
 
 		for (mtdno = 0; mtdno < info->mtd_count; mtdno++, ptr++) {
 			pr_debug("releasing mtd %d (%p)\n", mtdno, ptr);
-			nand_release(nand_to_mtd(&ptr->chip));
+			nand_release(&ptr->chip);
 		}
 	}
 
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index abcc3be89b89..2580fd981077 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -1216,7 +1216,7 @@ static int flctl_remove(struct platform_device *pdev)
 	struct sh_flctl *flctl = platform_get_drvdata(pdev);
 
 	flctl_release_dma(flctl);
-	nand_release(nand_to_mtd(&flctl->chip));
+	nand_release(&flctl->chip);
 	pm_runtime_disable(&pdev->dev);
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 4afacb0dcf00..c8eb4654bb1c 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -187,7 +187,7 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
 	return 0;
 
 err_add:
-	nand_release(mtd);
+	nand_release(this);
 
 err_scan:
 	iounmap(sharpsl->io);
@@ -205,7 +205,7 @@ static int sharpsl_nand_remove(struct platform_device *pdev)
 	struct sharpsl_nand *sharpsl = platform_get_drvdata(pdev);
 
 	/* Release resources, unregister device */
-	nand_release(nand_to_mtd(&sharpsl->chip));
+	nand_release(&sharpsl->chip);
 
 	iounmap(sharpsl->io);
 
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index e335560f87af..82ba371a8e18 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -181,7 +181,7 @@ static int socrates_nand_probe(struct platform_device *ofdev)
 	if (!res)
 		return res;
 
-	nand_release(mtd);
+	nand_release(nand_chip);
 
 out:
 	iounmap(host->io_base);
@@ -194,9 +194,8 @@ out:
 static int socrates_nand_remove(struct platform_device *ofdev)
 {
 	struct socrates_nand_host *host = dev_get_drvdata(&ofdev->dev);
-	struct mtd_info *mtd = nand_to_mtd(&host->nand_chip);
 
-	nand_release(mtd);
+	nand_release(&host->nand_chip);
 
 	iounmap(host->io_base);
 
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 179f74b6edf6..e31ab86bebee 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1947,7 +1947,7 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
 	ret = mtd_device_register(mtd, NULL, 0);
 	if (ret) {
 		dev_err(dev, "failed to register mtd device: %d\n", ret);
-		nand_release(mtd);
+		nand_release(nand);
 		return ret;
 	}
 
@@ -1986,7 +1986,7 @@ static void sunxi_nand_chips_cleanup(struct sunxi_nfc *nfc)
 	while (!list_empty(&nfc->chips)) {
 		chip = list_first_entry(&nfc->chips, struct sunxi_nand_chip,
 					node);
-		nand_release(nand_to_mtd(&chip->nand));
+		nand_release(&chip->nand);
 		sunxi_nand_ecc_cleanup(&chip->nand.ecc);
 		list_del(&chip->node);
 	}
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index 45beb87aec93..1061eb60ee60 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -617,7 +617,7 @@ static int tango_nand_remove(struct platform_device *pdev)
 
 	for (cs = 0; cs < MAX_CS; ++cs) {
 		if (nfc->chips[cs])
-			nand_release(nand_to_mtd(&nfc->chips[cs]->nand_chip));
+			nand_release(&nfc->chips[cs]->nand_chip);
 	}
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 6df499a239ae..39594910e6f0 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -449,7 +449,7 @@ static int tmio_probe(struct platform_device *dev)
 	if (!retval)
 		return retval;
 
-	nand_release(mtd);
+	nand_release(nand_chip);
 
 err_irq:
 	tmio_hw_stop(dev, tmio);
@@ -460,7 +460,7 @@ static int tmio_remove(struct platform_device *dev)
 {
 	struct tmio_nand *tmio = platform_get_drvdata(dev);
 
-	nand_release(nand_to_mtd(&tmio->chip));
+	nand_release(&tmio->chip);
 	tmio_hw_stop(dev, tmio);
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 169e8bcee61e..f722aae2b244 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -390,7 +390,7 @@ static int __exit txx9ndfmc_remove(struct platform_device *dev)
 		chip = mtd_to_nand(mtd);
 		txx9_priv = nand_get_controller_data(chip);
 
-		nand_release(mtd);
+		nand_release(chip);
 		kfree(txx9_priv->mtdname);
 		kfree(txx9_priv);
 	}
diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c
index 3b486f4ce868..a73213c835a5 100644
--- a/drivers/mtd/nand/raw/vf610_nfc.c
+++ b/drivers/mtd/nand/raw/vf610_nfc.c
@@ -916,7 +916,7 @@ static int vf610_nfc_remove(struct platform_device *pdev)
 	struct mtd_info *mtd = platform_get_drvdata(pdev);
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 
-	nand_release(mtd);
+	nand_release(mtd_to_nand(mtd));
 	clk_disable_unprepare(nfc->clk);
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index e670d3b5a646..1adb41acebfc 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -211,7 +211,7 @@ static int xway_nand_probe(struct platform_device *pdev)
 
 	err = mtd_device_register(mtd, NULL, 0);
 	if (err)
-		nand_release(mtd);
+		nand_release(&data->chip);
 
 	return err;
 }
@@ -223,7 +223,7 @@ static int xway_nand_remove(struct platform_device *pdev)
 {
 	struct xway_nand_data *data = platform_get_drvdata(pdev);
 
-	nand_release(nand_to_mtd(&data->chip));
+	nand_release(&data->chip);
 
 	return 0;
 }
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 733b228d94a5..e9c59f0624ad 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1739,7 +1739,7 @@ int nand_write_data_op(struct nand_chip *chip, const void *buf,
  */
 void nand_cleanup(struct nand_chip *chip);
 /* Unregister the MTD device and calls nand_cleanup() */
-void nand_release(struct mtd_info *mtd);
+void nand_release(struct nand_chip *chip);
 
 /* Default extended ID decoding function */
 void nand_decode_ext_id(struct nand_chip *chip);
-- 
cgit v1.2.3


From 2b356ab46ebe9b1bc63bda6708b81c9200382e78 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:16 +0200
Subject: mtd: rawnand: Pass a nand_chip object to nand_wait_ready()

Let's make the raw NAND API consistent by patching all helpers to
take a nand_chip object instead of an mtd_info one.

Now is nand_wait_ready()'s turn.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  2 +-
 drivers/mtd/nand/raw/cafe_nand.c                 |  2 +-
 drivers/mtd/nand/raw/nand_base.c                 | 12 ++++++------
 include/linux/mtd/rawnand.h                      |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 9b62bc2d25a0..7022ffd271ad 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -232,7 +232,7 @@ static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct mtd_info *mtd,
 		nand_chip->cmd_ctrl(mtd, command, NAND_CTRL_CLE);
 
 		ndelay(100);
-		nand_wait_ready(mtd);
+		nand_wait_ready(nand_chip);
 		break;
 	case NAND_CMD_READID:
 		ctlcode = NCTL_CSA | 0x01000000 | NCTL_CMD1W | NCTL_CMD0;
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 3304594177c6..94e5f7a56084 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -313,7 +313,7 @@ static void cafe_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 		cafe_writel(cafe, cafe->ctl2, NAND_CTRL2);
 		return;
 	}
-	nand_wait_ready(mtd);
+	nand_wait_ready(chip);
 	cafe_writel(cafe, cafe->ctl2, NAND_CTRL2);
 }
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index f937efe145af..9d684f1d9e26 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -624,13 +624,13 @@ static void panic_nand_wait_ready(struct mtd_info *mtd, unsigned long timeo)
 
 /**
  * nand_wait_ready - [GENERIC] Wait for the ready pin after commands.
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  *
  * Wait for the ready pin after a command, and warn if a timeout occurs.
  */
-void nand_wait_ready(struct mtd_info *mtd)
+void nand_wait_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	unsigned long timeo = 400;
 
 	if (in_interrupt() || oops_in_progress)
@@ -852,7 +852,7 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 	 */
 	ndelay(100);
 
-	nand_wait_ready(mtd);
+	nand_wait_ready(chip);
 }
 
 static void nand_ccs_delay(struct nand_chip *chip)
@@ -1004,7 +1004,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 	 */
 	ndelay(100);
 
-	nand_wait_ready(mtd);
+	nand_wait_ready(chip);
 }
 
 /**
@@ -2251,7 +2251,7 @@ static int nand_wait_rdy_op(struct nand_chip *chip, unsigned int timeout_ms,
 	if (!chip->dev_ready)
 		udelay(chip->chip_delay);
 	else
-		nand_wait_ready(nand_to_mtd(chip));
+		nand_wait_ready(chip);
 
 	return 0;
 }
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index e9c59f0624ad..55014e42912a 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -37,7 +37,7 @@ static inline int nand_scan(struct nand_chip *chip, int max_chips)
 }
 
 /* Internal helper for board drivers which need to override command function */
-void nand_wait_ready(struct mtd_info *mtd);
+void nand_wait_ready(struct nand_chip *chip);
 
 /* The maximum number of NAND chips in an array */
 #define NAND_MAX_CHIPS		8
-- 
cgit v1.2.3


From ec47636cd7e7b0dd53e526ec832ebb21b67ad9c6 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:17 +0200
Subject: mtd: rawnand: Pass a nand_chip object to ecc->hwctl()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one.

Now is ecc->hwctl()'s turn.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/cs553x_nand.c  |  3 +--
 drivers/mtd/nand/raw/davinci_nand.c | 10 +++++-----
 drivers/mtd/nand/raw/diskonchip.c   |  6 ++----
 drivers/mtd/nand/raw/fsmc_nand.c    |  6 +++---
 drivers/mtd/nand/raw/jz4740_nand.c  |  4 ++--
 drivers/mtd/nand/raw/jz4780_nand.c  |  4 ++--
 drivers/mtd/nand/raw/lpc32xx_mlc.c  |  2 +-
 drivers/mtd/nand/raw/lpc32xx_slc.c  |  2 +-
 drivers/mtd/nand/raw/nand_base.c    | 14 +++++++-------
 drivers/mtd/nand/raw/ndfc.c         |  3 +--
 drivers/mtd/nand/raw/omap2.c        | 17 ++++++++---------
 drivers/mtd/nand/raw/r852.c         |  4 ++--
 drivers/mtd/nand/raw/s3c2410.c      | 15 +++++++++------
 drivers/mtd/nand/raw/sharpsl.c      |  4 ++--
 drivers/mtd/nand/raw/tmio_nand.c    |  4 ++--
 drivers/mtd/nand/raw/txx9ndfmc.c    |  4 ++--
 include/linux/mtd/rawnand.h         |  2 +-
 17 files changed, 51 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index d4be416bb2fa..508bcb3d134f 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -157,9 +157,8 @@ static int cs553x_device_ready(struct mtd_info *mtd)
 	return (foo & CS_NAND_STS_FLASH_RDY) && !(foo & CS_NAND_CTLR_BUSY);
 }
 
-static void cs_enable_hwecc(struct mtd_info *mtd, int mode)
+static void cs_enable_hwecc(struct nand_chip *this, int mode)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	void __iomem *mmio_base = this->IO_ADDR_R;
 
 	writeb(0x07, mmio_base + MM_NAND_ECC_CTL);
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 66d3d5966013..329de266c953 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -146,16 +146,16 @@ static inline uint32_t nand_davinci_readecc_1bit(struct mtd_info *mtd)
 			+ 4 * info->core_chipsel);
 }
 
-static void nand_davinci_hwctl_1bit(struct mtd_info *mtd, int mode)
+static void nand_davinci_hwctl_1bit(struct nand_chip *chip, int mode)
 {
 	struct davinci_nand_info *info;
 	uint32_t nandcfr;
 	unsigned long flags;
 
-	info = to_davinci_nand(mtd);
+	info = to_davinci_nand(nand_to_mtd(chip));
 
 	/* Reset ECC hardware */
-	nand_davinci_readecc_1bit(mtd);
+	nand_davinci_readecc_1bit(nand_to_mtd(chip));
 
 	spin_lock_irqsave(&davinci_nand_lock, flags);
 
@@ -231,9 +231,9 @@ static int nand_davinci_correct_1bit(struct mtd_info *mtd, u_char *dat,
  * OOB without recomputing ECC.
  */
 
-static void nand_davinci_hwctl_4bit(struct mtd_info *mtd, int mode)
+static void nand_davinci_hwctl_4bit(struct nand_chip *chip, int mode)
 {
-	struct davinci_nand_info *info = to_davinci_nand(mtd);
+	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
 	unsigned long flags;
 	u32 val;
 
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 43d1e08133ce..d007f0704654 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -797,9 +797,8 @@ static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs)
 	return 0;
 }
 
-static void doc200x_enable_hwecc(struct mtd_info *mtd, int mode)
+static void doc200x_enable_hwecc(struct nand_chip *this, int mode)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 
@@ -816,9 +815,8 @@ static void doc200x_enable_hwecc(struct mtd_info *mtd, int mode)
 	}
 }
 
-static void doc2001plus_enable_hwecc(struct mtd_info *mtd, int mode)
+static void doc2001plus_enable_hwecc(struct nand_chip *this, int mode)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index 25d354e9448e..0291a43d9f6e 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -368,9 +368,9 @@ static int fsmc_setup_data_interface(struct mtd_info *mtd, int csline,
 /*
  * fsmc_enable_hwecc - Enables Hardware ECC through FSMC registers
  */
-static void fsmc_enable_hwecc(struct mtd_info *mtd, int mode)
+static void fsmc_enable_hwecc(struct nand_chip *chip, int mode)
 {
-	struct fsmc_nand_data *host = mtd_to_fsmc(mtd);
+	struct fsmc_nand_data *host = mtd_to_fsmc(nand_to_mtd(chip));
 
 	writel_relaxed(readl(host->regs_va + FSMC_PC) & ~FSMC_ECCPLEN_256,
 		       host->regs_va + FSMC_PC);
@@ -740,7 +740,7 @@ static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 
 	for (i = 0, s = 0; s < eccsteps; s++, i += eccbytes, p += eccsize) {
 		nand_read_page_op(chip, page, s * eccsize, NULL, 0);
-		chip->ecc.hwctl(mtd, NAND_ECC_READ);
+		chip->ecc.hwctl(chip, NAND_ECC_READ);
 		nand_read_data_op(chip, p, eccsize, false);
 
 		for (j = 0; j < eccbytes;) {
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 27603d78b157..0bf5d7b7f185 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -134,9 +134,9 @@ static int jz_nand_dev_ready(struct mtd_info *mtd)
 	return gpiod_get_value_cansleep(nand->busy_gpio);
 }
 
-static void jz_nand_hwctl(struct mtd_info *mtd, int mode)
+static void jz_nand_hwctl(struct nand_chip *chip, int mode)
 {
-	struct jz_nand *nand = mtd_to_jz_nand(mtd);
+	struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
 	uint32_t reg;
 
 	writel(0, nand->base + JZ_REG_NAND_IRQ_STAT);
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 7d008aeae165..1604214ee4b8 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -116,9 +116,9 @@ static int jz4780_nand_dev_ready(struct mtd_info *mtd)
 	return !gpiod_get_value_cansleep(nand->busy_gpio);
 }
 
-static void jz4780_nand_ecc_hwctl(struct mtd_info *mtd, int mode)
+static void jz4780_nand_ecc_hwctl(struct nand_chip *chip, int mode)
 {
-	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(mtd);
+	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(nand_to_mtd(chip));
 
 	nand->reading = (mode == NAND_ECC_READ);
 }
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index d240b8ff40ca..84e421710297 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -576,7 +576,7 @@ static int lpc32xx_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 /* Prepares MLC for transfers with H/W ECC enabled: always enabled anyway */
-static void lpc32xx_ecc_enable(struct mtd_info *mtd, int mode)
+static void lpc32xx_ecc_enable(struct nand_chip *chip, int mode)
 {
 	/* Always enabled! */
 }
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 607e4bdfae03..a6c635053bd5 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -337,7 +337,7 @@ static void lpc32xx_wp_disable(struct lpc32xx_nand_host *host)
 /*
  * Prepares SLC for transfers with H/W ECC enabled
  */
-static void lpc32xx_nand_ecc_enable(struct mtd_info *mtd, int mode)
+static void lpc32xx_nand_ecc_enable(struct nand_chip *chip, int mode)
 {
 	/* Hardware ECC is enabled automatically in hardware as needed */
 }
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 9d684f1d9e26..fd0563fc4ad2 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3272,7 +3272,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 		return ret;
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
-		chip->ecc.hwctl(mtd, NAND_ECC_READ);
+		chip->ecc.hwctl(chip, NAND_ECC_READ);
 
 		ret = nand_read_data_op(chip, p, eccsize, false);
 		if (ret)
@@ -3358,7 +3358,7 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
 		int stat;
 
-		chip->ecc.hwctl(mtd, NAND_ECC_READ);
+		chip->ecc.hwctl(chip, NAND_ECC_READ);
 
 		ret = nand_read_data_op(chip, p, eccsize, false);
 		if (ret)
@@ -3415,7 +3415,7 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
 		int stat;
 
-		chip->ecc.hwctl(mtd, NAND_ECC_READ);
+		chip->ecc.hwctl(chip, NAND_ECC_READ);
 
 		ret = nand_read_data_op(chip, p, eccsize, false);
 		if (ret)
@@ -3430,7 +3430,7 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 			oob += chip->ecc.prepad;
 		}
 
-		chip->ecc.hwctl(mtd, NAND_ECC_READSYN);
+		chip->ecc.hwctl(chip, NAND_ECC_READSYN);
 
 		ret = nand_read_data_op(chip, oob, eccbytes, false);
 		if (ret)
@@ -4151,7 +4151,7 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 		return ret;
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
-		chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
+		chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 		ret = nand_write_data_op(chip, p, eccsize, false);
 		if (ret)
@@ -4204,7 +4204,7 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd,
 
 	for (step = 0; step < ecc_steps; step++) {
 		/* configure controller for WRITE access */
-		chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
+		chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 		/* write data (untouched subpages already masked by 0xFF) */
 		ret = nand_write_data_op(chip, buf, ecc_size, false);
@@ -4272,7 +4272,7 @@ static int nand_write_page_syndrome(struct mtd_info *mtd,
 		return ret;
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
-		chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
+		chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 		ret = nand_write_data_op(chip, p, eccsize, false);
 		if (ret)
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index ab24e9ca769b..f9648d87b2e7 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -81,10 +81,9 @@ static int ndfc_ready(struct mtd_info *mtd)
 	return in_be32(ndfc->ndfcbase + NDFC_STAT) & NDFC_STAT_IS_READY;
 }
 
-static void ndfc_enable_hwecc(struct mtd_info *mtd, int mode)
+static void ndfc_enable_hwecc(struct nand_chip *chip, int mode)
 {
 	uint32_t ccr;
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct ndfc_controller *ndfc = nand_get_controller_data(chip);
 
 	ccr = in_be32(ndfc->ndfcbase + NDFC_CCR);
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index b243f2ab3622..bba403b4e262 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -935,10 +935,9 @@ static int omap_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
  * @mtd: MTD device structure
  * @mode: Read/Write mode
  */
-static void omap_enable_hwecc(struct mtd_info *mtd, int mode)
+static void omap_enable_hwecc(struct nand_chip *chip, int mode)
 {
-	struct omap_nand_info *info = mtd_to_omap(mtd);
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct omap_nand_info *info = mtd_to_omap(nand_to_mtd(chip));
 	unsigned int dev_width = (chip->options & NAND_BUSWIDTH_16) ? 1 : 0;
 	u32 val;
 
@@ -1030,13 +1029,13 @@ static int omap_dev_ready(struct mtd_info *mtd)
  * eccsize0 = 0  (no additional protected byte in spare area)
  * eccsize1 = 32 (skip 32 nibbles = 16 bytes per sector in spare area)
  */
-static void __maybe_unused omap_enable_hwecc_bch(struct mtd_info *mtd, int mode)
+static void __maybe_unused omap_enable_hwecc_bch(struct nand_chip *chip,
+						 int mode)
 {
 	unsigned int bch_type;
 	unsigned int dev_width, nsectors;
-	struct omap_nand_info *info = mtd_to_omap(mtd);
+	struct omap_nand_info *info = mtd_to_omap(nand_to_mtd(chip));
 	enum omap_ecc ecc_opt = info->ecc_opt;
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	u32 val, wr_mode;
 	unsigned int ecc_size1, ecc_size0;
 
@@ -1529,7 +1528,7 @@ static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
 	nand_prog_page_begin_op(chip, page, 0, NULL, 0);
 
 	/* Enable GPMC ecc engine */
-	chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
+	chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 	/* Write data */
 	chip->write_buf(mtd, buf, mtd->writesize);
@@ -1582,7 +1581,7 @@ static int omap_write_subpage_bch(struct mtd_info *mtd,
 	nand_prog_page_begin_op(chip, page, 0, NULL, 0);
 
 	/* Enable GPMC ECC engine */
-	chip->ecc.hwctl(mtd, NAND_ECC_WRITE);
+	chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 	/* Write data */
 	chip->write_buf(mtd, buf, mtd->writesize);
@@ -1641,7 +1640,7 @@ static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
 	nand_read_page_op(chip, page, 0, NULL, 0);
 
 	/* Enable GPMC ecc engine */
-	chip->ecc.hwctl(mtd, NAND_ECC_READ);
+	chip->ecc.hwctl(chip, NAND_ECC_READ);
 
 	/* Read data */
 	chip->read_buf(mtd, buf, mtd->writesize);
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index bb74a0ac697e..b5e0cc611b14 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -401,9 +401,9 @@ static int r852_ready(struct mtd_info *mtd)
  * Set ECC engine mode
 */
 
-static void r852_ecc_hwctl(struct mtd_info *mtd, int mode)
+static void r852_ecc_hwctl(struct nand_chip *chip, int mode)
 {
-	struct r852_device *dev = r852_get_dev(mtd);
+	struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
 
 	if (dev->card_unstable)
 		return;
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index cf045813c160..ca2d006cc846 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -591,31 +591,34 @@ static int s3c2410_nand_correct_data(struct mtd_info *mtd, u_char *dat,
  * generator block to ECC the data as it passes through]
 */
 
-static void s3c2410_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+static void s3c2410_nand_enable_hwecc(struct nand_chip *chip, int mode)
 {
-	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
+	struct s3c2410_nand_info *info;
 	unsigned long ctrl;
 
+	info = s3c2410_nand_mtd_toinfo(nand_to_mtd(chip));
 	ctrl = readl(info->regs + S3C2410_NFCONF);
 	ctrl |= S3C2410_NFCONF_INITECC;
 	writel(ctrl, info->regs + S3C2410_NFCONF);
 }
 
-static void s3c2412_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+static void s3c2412_nand_enable_hwecc(struct nand_chip *chip, int mode)
 {
-	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
+	struct s3c2410_nand_info *info;
 	unsigned long ctrl;
 
+	info = s3c2410_nand_mtd_toinfo(nand_to_mtd(chip));
 	ctrl = readl(info->regs + S3C2440_NFCONT);
 	writel(ctrl | S3C2412_NFCONT_INIT_MAIN_ECC,
 	       info->regs + S3C2440_NFCONT);
 }
 
-static void s3c2440_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+static void s3c2440_nand_enable_hwecc(struct nand_chip *chip, int mode)
 {
-	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
+	struct s3c2410_nand_info *info;
 	unsigned long ctrl;
 
+	info = s3c2410_nand_mtd_toinfo(nand_to_mtd(chip));
 	ctrl = readl(info->regs + S3C2440_NFCONT);
 	writel(ctrl | S3C2440_NFCONT_INITECC, info->regs + S3C2440_NFCONT);
 }
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index c8eb4654bb1c..37fdaad82f37 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -85,9 +85,9 @@ static int sharpsl_nand_dev_ready(struct mtd_info *mtd)
 	return !((readb(sharpsl->io + FLASHCTL) & FLRYBY) == 0);
 }
 
-static void sharpsl_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+static void sharpsl_nand_enable_hwecc(struct nand_chip *chip, int mode)
 {
-	struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd);
+	struct sharpsl_nand *sharpsl = mtd_to_sharpsl(nand_to_mtd(chip));
 	writeb(0, sharpsl->io + ECCCLRR);
 }
 
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 39594910e6f0..2578216ff5c0 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -259,9 +259,9 @@ static void tmio_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
 	tmio_ioread16_rep(tmio->fcr + FCR_DATA, buf, len >> 1);
 }
 
-static void tmio_nand_enable_hwecc(struct mtd_info *mtd, int mode)
+static void tmio_nand_enable_hwecc(struct nand_chip *chip, int mode)
 {
-	struct tmio_nand *tmio = mtd_to_tmio(mtd);
+	struct tmio_nand *tmio = mtd_to_tmio(nand_to_mtd(chip));
 
 	tmio_iowrite8(FCR_MODE_HWECC_RESET, tmio->fcr + FCR_MODE);
 	tmio_ioread8(tmio->fcr + FCR_DATA);	/* dummy read */
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index f722aae2b244..fea5bc684aa1 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -211,9 +211,9 @@ static int txx9ndfmc_correct_data(struct mtd_info *mtd, unsigned char *buf,
 	return corrected;
 }
 
-static void txx9ndfmc_enable_hwecc(struct mtd_info *mtd, int mode)
+static void txx9ndfmc_enable_hwecc(struct nand_chip *chip, int mode)
 {
-	struct platform_device *dev = mtd_to_platdev(mtd);
+	struct platform_device *dev = mtd_to_platdev(nand_to_mtd(chip));
 	u32 mcr = txx9ndfmc_read(dev, TXX9_NDFMCR);
 
 	mcr &= ~TXX9_NDFMCR_ECC_ALL;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 55014e42912a..029fef900f33 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -647,7 +647,7 @@ struct nand_ecc_ctrl {
 	void *priv;
 	u8 *calc_buf;
 	u8 *code_buf;
-	void (*hwctl)(struct mtd_info *mtd, int mode);
+	void (*hwctl)(struct nand_chip *chip, int mode);
 	int (*calculate)(struct mtd_info *mtd, const uint8_t *dat,
 			uint8_t *ecc_code);
 	int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc,
-- 
cgit v1.2.3


From af37d2c3a8c7a05cf5b0fe6b61e2a6b9e357928b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:18 +0200
Subject: mtd: rawnand: Pass a nand_chip object to ecc->calculate()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one.

Now is ecc->calculate()'s turn.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/cs553x_nand.c  |  4 ++--
 drivers/mtd/nand/raw/davinci_nand.c | 12 ++++++------
 drivers/mtd/nand/raw/diskonchip.c   |  4 ++--
 drivers/mtd/nand/raw/fsmc_nand.c    | 10 +++++-----
 drivers/mtd/nand/raw/jz4740_nand.c  |  6 +++---
 drivers/mtd/nand/raw/jz4780_nand.c  |  4 ++--
 drivers/mtd/nand/raw/lpc32xx_slc.c  |  2 +-
 drivers/mtd/nand/raw/nand_base.c    | 16 ++++++++--------
 drivers/mtd/nand/raw/nand_bch.c     |  5 ++---
 drivers/mtd/nand/raw/nand_ecc.c     |  5 ++---
 drivers/mtd/nand/raw/ndfc.c         |  3 +--
 drivers/mtd/nand/raw/omap2.c        | 14 +++++++-------
 drivers/mtd/nand/raw/r852.c         |  6 +++---
 drivers/mtd/nand/raw/s3c2410.c      | 15 +++++++++------
 drivers/mtd/nand/raw/sharpsl.c      |  5 +++--
 drivers/mtd/nand/raw/tmio_nand.c    |  6 +++---
 drivers/mtd/nand/raw/txx9ndfmc.c    |  5 ++---
 include/linux/mtd/nand_bch.h        |  5 +++--
 include/linux/mtd/nand_ecc.h        |  4 +++-
 include/linux/mtd/rawnand.h         |  4 ++--
 20 files changed, 69 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 508bcb3d134f..193c3e8fa118 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -164,10 +164,10 @@ static void cs_enable_hwecc(struct nand_chip *this, int mode)
 	writeb(0x07, mmio_base + MM_NAND_ECC_CTL);
 }
 
-static int cs_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code)
+static int cs_calculate_ecc(struct nand_chip *this, const u_char *dat,
+			    u_char *ecc_code)
 {
 	uint32_t ecc;
-	struct nand_chip *this = mtd_to_nand(mtd);
 	void __iomem *mmio_base = this->IO_ADDR_R;
 
 	ecc = readl(mmio_base + MM_NAND_STS);
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 329de266c953..af221e1c8a87 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -170,10 +170,10 @@ static void nand_davinci_hwctl_1bit(struct nand_chip *chip, int mode)
 /*
  * Read hardware ECC value and pack into three bytes
  */
-static int nand_davinci_calculate_1bit(struct mtd_info *mtd,
-				      const u_char *dat, u_char *ecc_code)
+static int nand_davinci_calculate_1bit(struct nand_chip *chip,
+				       const u_char *dat, u_char *ecc_code)
 {
-	unsigned int ecc_val = nand_davinci_readecc_1bit(mtd);
+	unsigned int ecc_val = nand_davinci_readecc_1bit(nand_to_mtd(chip));
 	unsigned int ecc24 = (ecc_val & 0x0fff) | ((ecc_val & 0x0fff0000) >> 4);
 
 	/* invert so that erased block ecc is correct */
@@ -266,10 +266,10 @@ nand_davinci_readecc_4bit(struct davinci_nand_info *info, u32 code[4])
 }
 
 /* Terminate read ECC; or return ECC (as bytes) of data written to NAND. */
-static int nand_davinci_calculate_4bit(struct mtd_info *mtd,
-		const u_char *dat, u_char *ecc_code)
+static int nand_davinci_calculate_4bit(struct nand_chip *chip,
+				       const u_char *dat, u_char *ecc_code)
 {
-	struct davinci_nand_info *info = to_davinci_nand(mtd);
+	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
 	u32 raw_ecc[4], *p;
 	unsigned i;
 
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index d007f0704654..942a5ee83fbd 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -834,9 +834,9 @@ static void doc2001plus_enable_hwecc(struct nand_chip *this, int mode)
 }
 
 /* This code is only called on write */
-static int doc200x_calculate_ecc(struct mtd_info *mtd, const u_char *dat, unsigned char *ecc_code)
+static int doc200x_calculate_ecc(struct nand_chip *this, const u_char *dat,
+				 unsigned char *ecc_code)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int i;
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index 0291a43d9f6e..d4e91465042c 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -385,10 +385,10 @@ static void fsmc_enable_hwecc(struct nand_chip *chip, int mode)
  * FSMC. ECC is 13 bytes for 512 bytes of data (supports error correction up to
  * max of 8-bits)
  */
-static int fsmc_read_hwecc_ecc4(struct mtd_info *mtd, const uint8_t *data,
+static int fsmc_read_hwecc_ecc4(struct nand_chip *chip, const uint8_t *data,
 				uint8_t *ecc)
 {
-	struct fsmc_nand_data *host = mtd_to_fsmc(mtd);
+	struct fsmc_nand_data *host = mtd_to_fsmc(nand_to_mtd(chip));
 	uint32_t ecc_tmp;
 	unsigned long deadline = jiffies + FSMC_BUSY_WAIT_TIMEOUT;
 
@@ -433,10 +433,10 @@ static int fsmc_read_hwecc_ecc4(struct mtd_info *mtd, const uint8_t *data,
  * FSMC. ECC is 3 bytes for 512 bytes of data (supports error correction up to
  * max of 1-bit)
  */
-static int fsmc_read_hwecc_ecc1(struct mtd_info *mtd, const uint8_t *data,
+static int fsmc_read_hwecc_ecc1(struct nand_chip *chip, const uint8_t *data,
 				uint8_t *ecc)
 {
-	struct fsmc_nand_data *host = mtd_to_fsmc(mtd);
+	struct fsmc_nand_data *host = mtd_to_fsmc(nand_to_mtd(chip));
 	uint32_t ecc_tmp;
 
 	ecc_tmp = readl_relaxed(host->regs_va + ECC1);
@@ -767,7 +767,7 @@ static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 		}
 
 		memcpy(&ecc_code[i], oob, chip->ecc.bytes);
-		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+		chip->ecc.calculate(chip, p, &ecc_calc[i]);
 
 		stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
 		if (stat < 0) {
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 0bf5d7b7f185..98ea5172ac74 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -162,10 +162,10 @@ static void jz_nand_hwctl(struct nand_chip *chip, int mode)
 	writel(reg, nand->base + JZ_REG_NAND_ECC_CTRL);
 }
 
-static int jz_nand_calculate_ecc_rs(struct mtd_info *mtd, const uint8_t *dat,
-	uint8_t *ecc_code)
+static int jz_nand_calculate_ecc_rs(struct nand_chip *chip, const uint8_t *dat,
+				    uint8_t *ecc_code)
 {
-	struct jz_nand *nand = mtd_to_jz_nand(mtd);
+	struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
 	uint32_t reg, status;
 	int i;
 	unsigned int timeout = 1000;
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 1604214ee4b8..e53a2bdfc263 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -123,10 +123,10 @@ static void jz4780_nand_ecc_hwctl(struct nand_chip *chip, int mode)
 	nand->reading = (mode == NAND_ECC_READ);
 }
 
-static int jz4780_nand_ecc_calculate(struct mtd_info *mtd, const u8 *dat,
+static int jz4780_nand_ecc_calculate(struct nand_chip *chip, const u8 *dat,
 				     u8 *ecc_code)
 {
-	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(mtd);
+	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(nand_to_mtd(chip));
 	struct jz4780_nand_controller *nfc = to_jz4780_nand_controller(nand->chip.controller);
 	struct jz4780_bch_params params;
 
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index a6c635053bd5..c35a61c453da 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -345,7 +345,7 @@ static void lpc32xx_nand_ecc_enable(struct nand_chip *chip, int mode)
 /*
  * Calculates the ECC for the data
  */
-static int lpc32xx_nand_ecc_calculate(struct mtd_info *mtd,
+static int lpc32xx_nand_ecc_calculate(struct nand_chip *chip,
 				      const unsigned char *buf,
 				      unsigned char *code)
 {
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index fd0563fc4ad2..f147b7948e64 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3110,7 +3110,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	chip->ecc.read_page_raw(mtd, chip, buf, 1, page);
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
-		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+		chip->ecc.calculate(chip, p, &ecc_calc[i]);
 
 	ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0,
 					 chip->ecc.total);
@@ -3175,7 +3175,7 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
 
 	/* Calculate ECC */
 	for (i = 0; i < eccfrag_len ; i += chip->ecc.bytes, p += chip->ecc.size)
-		chip->ecc.calculate(mtd, p, &chip->ecc.calc_buf[i]);
+		chip->ecc.calculate(chip, p, &chip->ecc.calc_buf[i]);
 
 	/*
 	 * The performance is faster if we position offsets according to
@@ -3278,7 +3278,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 		if (ret)
 			return ret;
 
-		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+		chip->ecc.calculate(chip, p, &ecc_calc[i]);
 	}
 
 	ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, false);
@@ -3364,7 +3364,7 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
 		if (ret)
 			return ret;
 
-		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+		chip->ecc.calculate(chip, p, &ecc_calc[i]);
 
 		stat = chip->ecc.correct(mtd, p, &ecc_code[i], NULL);
 		if (stat == -EBADMSG &&
@@ -4118,7 +4118,7 @@ static int nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 
 	/* Software ECC calculation */
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
-		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+		chip->ecc.calculate(chip, p, &ecc_calc[i]);
 
 	ret = mtd_ooblayout_set_eccbytes(mtd, ecc_calc, chip->oob_poi, 0,
 					 chip->ecc.total);
@@ -4157,7 +4157,7 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 		if (ret)
 			return ret;
 
-		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
+		chip->ecc.calculate(chip, p, &ecc_calc[i]);
 	}
 
 	ret = mtd_ooblayout_set_eccbytes(mtd, ecc_calc, chip->oob_poi, 0,
@@ -4215,7 +4215,7 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd,
 		if ((step < start_step) || (step > end_step))
 			memset(ecc_calc, 0xff, ecc_bytes);
 		else
-			chip->ecc.calculate(mtd, buf, ecc_calc);
+			chip->ecc.calculate(chip, buf, ecc_calc);
 
 		/* mask OOB of un-touched subpages by padding 0xFF */
 		/* if oob_required, preserve OOB metadata of written subpage */
@@ -4287,7 +4287,7 @@ static int nand_write_page_syndrome(struct mtd_info *mtd,
 			oob += chip->ecc.prepad;
 		}
 
-		chip->ecc.calculate(mtd, p, oob);
+		chip->ecc.calculate(chip, p, oob);
 
 		ret = nand_write_data_op(chip, oob, eccbytes, false);
 		if (ret)
diff --git a/drivers/mtd/nand/raw/nand_bch.c b/drivers/mtd/nand/raw/nand_bch.c
index b7387ace567a..9e3c2da0f3b1 100644
--- a/drivers/mtd/nand/raw/nand_bch.c
+++ b/drivers/mtd/nand/raw/nand_bch.c
@@ -43,14 +43,13 @@ struct nand_bch_control {
 
 /**
  * nand_bch_calculate_ecc - [NAND Interface] Calculate ECC for data block
- * @mtd:	MTD block structure
+ * @chip:	NAND chip object
  * @buf:	input buffer with raw data
  * @code:	output buffer with ECC
  */
-int nand_bch_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+int nand_bch_calculate_ecc(struct nand_chip *chip, const unsigned char *buf,
 			   unsigned char *code)
 {
-	const struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nand_bch_control *nbc = chip->ecc.priv;
 	unsigned int i;
 
diff --git a/drivers/mtd/nand/raw/nand_ecc.c b/drivers/mtd/nand/raw/nand_ecc.c
index 8e132edbc5ce..1dbfcaecf8c5 100644
--- a/drivers/mtd/nand/raw/nand_ecc.c
+++ b/drivers/mtd/nand/raw/nand_ecc.c
@@ -398,11 +398,10 @@ EXPORT_SYMBOL(__nand_calculate_ecc);
  * @buf:	input buffer with raw data
  * @code:	output buffer with ECC
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const unsigned char *buf,
+int nand_calculate_ecc(struct nand_chip *chip, const unsigned char *buf,
 		       unsigned char *code)
 {
-	__nand_calculate_ecc(buf,
-			mtd_to_nand(mtd)->ecc.size, code);
+	__nand_calculate_ecc(buf, chip->ecc.size, code);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index f9648d87b2e7..9241cfaab5ac 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -92,10 +92,9 @@ static void ndfc_enable_hwecc(struct nand_chip *chip, int mode)
 	wmb();
 }
 
-static int ndfc_calculate_ecc(struct mtd_info *mtd,
+static int ndfc_calculate_ecc(struct nand_chip *chip,
 			      const u_char *dat, u_char *ecc_code)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct ndfc_controller *ndfc = nand_get_controller_data(chip);
 	uint32_t ecc;
 	uint8_t *p = (uint8_t *)&ecc;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index bba403b4e262..adc300b6d243 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -900,7 +900,7 @@ static int omap_correct_data(struct mtd_info *mtd, u_char *dat,
 
 /**
  * omap_calcuate_ecc - Generate non-inverted ECC bytes.
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @dat: The pointer to data on which ecc is computed
  * @ecc_code: The ecc_code buffer
  *
@@ -910,10 +910,10 @@ static int omap_correct_data(struct mtd_info *mtd, u_char *dat,
  * an erased page will produce an ECC mismatch between generated and read
  * ECC bytes that has to be dealt with separately.
  */
-static int omap_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
-				u_char *ecc_code)
+static int omap_calculate_ecc(struct nand_chip *chip, const u_char *dat,
+			      u_char *ecc_code)
 {
-	struct omap_nand_info *info = mtd_to_omap(mtd);
+	struct omap_nand_info *info = mtd_to_omap(nand_to_mtd(chip));
 	u32 val;
 
 	val = readl(info->reg.gpmc_ecc_config);
@@ -1255,7 +1255,7 @@ static int _omap_calculate_ecc_bch(struct mtd_info *mtd,
 
 /**
  * omap_calculate_ecc_bch_sw - ECC generator for sector for SW based correction
- * @mtd:	MTD device structure
+ * @chip:	NAND chip object
  * @dat:	The pointer to data on which ecc is computed
  * @ecc_code:	The ecc_code buffer
  *
@@ -1263,10 +1263,10 @@ static int _omap_calculate_ecc_bch(struct mtd_info *mtd,
  * when SW based correction is required as ECC is required for one sector
  * at a time.
  */
-static int omap_calculate_ecc_bch_sw(struct mtd_info *mtd,
+static int omap_calculate_ecc_bch_sw(struct nand_chip *chip,
 				     const u_char *dat, u_char *ecc_calc)
 {
-	return _omap_calculate_ecc_bch(mtd, dat, ecc_calc, 0);
+	return _omap_calculate_ecc_bch(nand_to_mtd(chip), dat, ecc_calc, 0);
 }
 
 /**
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index b5e0cc611b14..f58d633ec062 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -433,10 +433,10 @@ static void r852_ecc_hwctl(struct nand_chip *chip, int mode)
  * Calculate ECC, only used for writes
  */
 
-static int r852_ecc_calculate(struct mtd_info *mtd, const uint8_t *dat,
-							uint8_t *ecc_code)
+static int r852_ecc_calculate(struct nand_chip *chip, const uint8_t *dat,
+			      uint8_t *ecc_code)
 {
-	struct r852_device *dev = r852_get_dev(mtd);
+	struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
 	struct sm_oob *oob = (struct sm_oob *)ecc_code;
 	uint32_t ecc1, ecc2;
 
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index ca2d006cc846..c94e1f62362f 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -623,9 +623,10 @@ static void s3c2440_nand_enable_hwecc(struct nand_chip *chip, int mode)
 	writel(ctrl | S3C2440_NFCONT_INITECC, info->regs + S3C2440_NFCONT);
 }
 
-static int s3c2410_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
-				      u_char *ecc_code)
+static int s3c2410_nand_calculate_ecc(struct nand_chip *chip,
+				      const u_char *dat, u_char *ecc_code)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 
 	ecc_code[0] = readb(info->regs + S3C2410_NFECC + 0);
@@ -637,9 +638,10 @@ static int s3c2410_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
 	return 0;
 }
 
-static int s3c2412_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
-				      u_char *ecc_code)
+static int s3c2412_nand_calculate_ecc(struct nand_chip *chip,
+				      const u_char *dat, u_char *ecc_code)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	unsigned long ecc = readl(info->regs + S3C2412_NFMECC0);
 
@@ -652,9 +654,10 @@ static int s3c2412_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
 	return 0;
 }
 
-static int s3c2440_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
-				      u_char *ecc_code)
+static int s3c2440_nand_calculate_ecc(struct nand_chip *chip,
+				      const u_char *dat, u_char *ecc_code)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	unsigned long ecc = readl(info->regs + S3C2440_NFMECC0);
 
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 37fdaad82f37..4d931ce71af5 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -91,9 +91,10 @@ static void sharpsl_nand_enable_hwecc(struct nand_chip *chip, int mode)
 	writeb(0, sharpsl->io + ECCCLRR);
 }
 
-static int sharpsl_nand_calculate_ecc(struct mtd_info *mtd, const u_char * dat, u_char * ecc_code)
+static int sharpsl_nand_calculate_ecc(struct nand_chip *chip,
+				      const u_char * dat, u_char * ecc_code)
 {
-	struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd);
+	struct sharpsl_nand *sharpsl = mtd_to_sharpsl(nand_to_mtd(chip));
 	ecc_code[0] = ~readb(sharpsl->io + ECCLPUB);
 	ecc_code[1] = ~readb(sharpsl->io + ECCLPLB);
 	ecc_code[2] = (~readb(sharpsl->io + ECCCP) << 2) | 0x03;
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 2578216ff5c0..03d6428589c8 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -268,10 +268,10 @@ static void tmio_nand_enable_hwecc(struct nand_chip *chip, int mode)
 	tmio_iowrite8(FCR_MODE_HWECC_CALC, tmio->fcr + FCR_MODE);
 }
 
-static int tmio_nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
-							u_char *ecc_code)
+static int tmio_nand_calculate_ecc(struct nand_chip *chip, const u_char *dat,
+				   u_char *ecc_code)
 {
-	struct tmio_nand *tmio = mtd_to_tmio(mtd);
+	struct tmio_nand *tmio = mtd_to_tmio(nand_to_mtd(chip));
 	unsigned int ecc;
 
 	tmio_iowrite8(FCR_MODE_HWECC_RESULT, tmio->fcr + FCR_MODE);
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index fea5bc684aa1..55a5c4d42a81 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -170,11 +170,10 @@ static int txx9ndfmc_dev_ready(struct mtd_info *mtd)
 	return !(txx9ndfmc_read(dev, TXX9_NDFSR) & TXX9_NDFSR_BUSY);
 }
 
-static int txx9ndfmc_calculate_ecc(struct mtd_info *mtd, const uint8_t *dat,
+static int txx9ndfmc_calculate_ecc(struct nand_chip *chip, const uint8_t *dat,
 				   uint8_t *ecc_code)
 {
-	struct platform_device *dev = mtd_to_platdev(mtd);
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct platform_device *dev = mtd_to_platdev(nand_to_mtd(chip));
 	int eccbytes;
 	u32 mcr = txx9ndfmc_read(dev, TXX9_NDFMCR);
 
diff --git a/include/linux/mtd/nand_bch.h b/include/linux/mtd/nand_bch.h
index 98f20ef05d60..6db133508960 100644
--- a/include/linux/mtd/nand_bch.h
+++ b/include/linux/mtd/nand_bch.h
@@ -12,6 +12,7 @@
 #define __MTD_NAND_BCH_H__
 
 struct mtd_info;
+struct nand_chip;
 struct nand_bch_control;
 
 #if defined(CONFIG_MTD_NAND_ECC_BCH)
@@ -21,7 +22,7 @@ static inline int mtd_nand_has_bch(void) { return 1; }
 /*
  * Calculate BCH ecc code
  */
-int nand_bch_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat,
 			   u_char *ecc_code);
 
 /*
@@ -43,7 +44,7 @@ void nand_bch_free(struct nand_bch_control *nbc);
 static inline int mtd_nand_has_bch(void) { return 0; }
 
 static inline int
-nand_bch_calculate_ecc(struct mtd_info *mtd, const u_char *dat,
+nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat,
 		       u_char *ecc_code)
 {
 	return -1;
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index 8a2decf7462c..a514e62ff54f 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -14,6 +14,7 @@
 #define __MTD_NAND_ECC_H__
 
 struct mtd_info;
+struct nand_chip;
 
 /*
  * Calculate 3 byte ECC code for eccsize byte block
@@ -24,7 +25,8 @@ void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize,
 /*
  * Calculate 3 byte ECC code for 256/512 byte block
  */
-int nand_calculate_ecc(struct mtd_info *mtd, const u_char *dat, u_char *ecc_code);
+int nand_calculate_ecc(struct nand_chip *chip, const u_char *dat,
+		       u_char *ecc_code);
 
 /*
  * Detect and correct a 1 bit error for eccsize byte block
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 029fef900f33..b2f51b2fb110 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -648,8 +648,8 @@ struct nand_ecc_ctrl {
 	u8 *calc_buf;
 	u8 *code_buf;
 	void (*hwctl)(struct nand_chip *chip, int mode);
-	int (*calculate)(struct mtd_info *mtd, const uint8_t *dat,
-			uint8_t *ecc_code);
+	int (*calculate)(struct nand_chip *chip, const uint8_t *dat,
+			 uint8_t *ecc_code);
 	int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc,
 			uint8_t *calc_ecc);
 	int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From 00da2ea97229946450c8145d680db480b5da1f94 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:19 +0200
Subject: mtd: rawnand: Pass a nand_chip object to ecc->correct()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one.

Now is ecc->correct()'s turn.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/davinci_nand.c |  9 ++++-----
 drivers/mtd/nand/raw/diskonchip.c   |  3 +--
 drivers/mtd/nand/raw/fsmc_nand.c    |  9 ++++-----
 drivers/mtd/nand/raw/jz4740_nand.c  |  6 +++---
 drivers/mtd/nand/raw/jz4780_nand.c  |  4 ++--
 drivers/mtd/nand/raw/lpc32xx_slc.c  |  2 +-
 drivers/mtd/nand/raw/nand_base.c    | 10 +++++-----
 drivers/mtd/nand/raw/nand_bch.c     |  5 ++---
 drivers/mtd/nand/raw/nand_ecc.c     |  7 +++----
 drivers/mtd/nand/raw/omap2.c        | 18 +++++++++---------
 drivers/mtd/nand/raw/r852.c         |  6 +++---
 drivers/mtd/nand/raw/s3c2410.c      |  3 ++-
 drivers/mtd/nand/raw/tmio_nand.c    |  5 +++--
 drivers/mtd/nand/raw/txx9ndfmc.c    |  6 +++---
 include/linux/mtd/nand_bch.h        |  6 +++---
 include/linux/mtd/nand_ecc.h        |  4 ++--
 include/linux/mtd/rawnand.h         |  4 ++--
 17 files changed, 52 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index af221e1c8a87..c80b6c6da4aa 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -185,10 +185,9 @@ static int nand_davinci_calculate_1bit(struct nand_chip *chip,
 	return 0;
 }
 
-static int nand_davinci_correct_1bit(struct mtd_info *mtd, u_char *dat,
+static int nand_davinci_correct_1bit(struct nand_chip *chip, u_char *dat,
 				     u_char *read_ecc, u_char *calc_ecc)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	uint32_t eccNand = read_ecc[0] | (read_ecc[1] << 8) |
 					  (read_ecc[2] << 16);
 	uint32_t eccCalc = calc_ecc[0] | (calc_ecc[1] << 8) |
@@ -303,11 +302,11 @@ static int nand_davinci_calculate_4bit(struct nand_chip *chip,
 /* Correct up to 4 bits in data we just read, using state left in the
  * hardware plus the ecc_code computed when it was first written.
  */
-static int nand_davinci_correct_4bit(struct mtd_info *mtd,
-		u_char *data, u_char *ecc_code, u_char *null)
+static int nand_davinci_correct_4bit(struct nand_chip *chip, u_char *data,
+				     u_char *ecc_code, u_char *null)
 {
 	int i;
-	struct davinci_nand_info *info = to_davinci_nand(mtd);
+	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
 	unsigned short ecc10[8];
 	unsigned short *ecc16;
 	u32 syndrome[4];
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 942a5ee83fbd..142d21be874e 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -893,11 +893,10 @@ static int doc200x_calculate_ecc(struct nand_chip *this, const u_char *dat,
 	return 0;
 }
 
-static int doc200x_correct_data(struct mtd_info *mtd, u_char *dat,
+static int doc200x_correct_data(struct nand_chip *this, u_char *dat,
 				u_char *read_ecc, u_char *isnull)
 {
 	int i, ret = 0;
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	uint8_t calc_ecc[6];
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index d4e91465042c..b41fd09fa389 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -769,7 +769,7 @@ static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 		memcpy(&ecc_code[i], oob, chip->ecc.bytes);
 		chip->ecc.calculate(chip, p, &ecc_calc[i]);
 
-		stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
+		stat = chip->ecc.correct(chip, p, &ecc_code[i], &ecc_calc[i]);
 		if (stat < 0) {
 			mtd->ecc_stats.failed++;
 		} else {
@@ -791,11 +791,10 @@ static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * calc_ecc is a 104 bit information containing maximum of 8 error
  * offset informations of 13 bits each in 512 bytes of read data.
  */
-static int fsmc_bch8_correct_data(struct mtd_info *mtd, uint8_t *dat,
-			     uint8_t *read_ecc, uint8_t *calc_ecc)
+static int fsmc_bch8_correct_data(struct nand_chip *chip, uint8_t *dat,
+				  uint8_t *read_ecc, uint8_t *calc_ecc)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct fsmc_nand_data *host = mtd_to_fsmc(mtd);
+	struct fsmc_nand_data *host = mtd_to_fsmc(nand_to_mtd(chip));
 	uint32_t err_idx[8];
 	uint32_t num_err, i;
 	uint32_t ecc1, ecc2, ecc3, ecc4;
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 98ea5172ac74..e926ed6ed296 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -215,10 +215,10 @@ static void jz_nand_correct_data(uint8_t *dat, int index, int mask)
 	dat[index+1] = (data >> 8) & 0xff;
 }
 
-static int jz_nand_correct_ecc_rs(struct mtd_info *mtd, uint8_t *dat,
-	uint8_t *read_ecc, uint8_t *calc_ecc)
+static int jz_nand_correct_ecc_rs(struct nand_chip *chip, uint8_t *dat,
+				  uint8_t *read_ecc, uint8_t *calc_ecc)
 {
-	struct jz_nand *nand = mtd_to_jz_nand(mtd);
+	struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
 	int i, error_count, index;
 	uint32_t reg, status, error;
 	unsigned int timeout = 1000;
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index e53a2bdfc263..42c5dcdea4a9 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -144,10 +144,10 @@ static int jz4780_nand_ecc_calculate(struct nand_chip *chip, const u8 *dat,
 	return jz4780_bch_calculate(nfc->bch, &params, dat, ecc_code);
 }
 
-static int jz4780_nand_ecc_correct(struct mtd_info *mtd, u8 *dat,
+static int jz4780_nand_ecc_correct(struct nand_chip *chip, u8 *dat,
 				   u8 *read_ecc, u8 *calc_ecc)
 {
-	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(mtd);
+	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(nand_to_mtd(chip));
 	struct jz4780_nand_controller *nfc = to_jz4780_nand_controller(nand->chip.controller);
 	struct jz4780_bch_params params;
 
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index c35a61c453da..d5cb1b40a235 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -639,7 +639,7 @@ static int lpc32xx_nand_read_page_syndrome(struct mtd_info *mtd,
 	oobecc = chip->oob_poi + oobregion.offset;
 
 	for (i = 0; i < chip->ecc.steps; i++) {
-		stat = chip->ecc.correct(mtd, buf, oobecc,
+		stat = chip->ecc.correct(chip, buf, oobecc,
 					 &tmpecc[i * chip->ecc.bytes]);
 		if (stat < 0)
 			mtd->ecc_stats.failed++;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index f147b7948e64..0444bd23c84b 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3123,7 +3123,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	for (i = 0 ; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
 		int stat;
 
-		stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
+		stat = chip->ecc.correct(chip, p, &ecc_code[i], &ecc_calc[i]);
 		if (stat < 0) {
 			mtd->ecc_stats.failed++;
 		} else {
@@ -3224,7 +3224,7 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
 	for (i = 0; i < eccfrag_len ; i += chip->ecc.bytes, p += chip->ecc.size) {
 		int stat;
 
-		stat = chip->ecc.correct(mtd, p, &chip->ecc.code_buf[i],
+		stat = chip->ecc.correct(chip, p, &chip->ecc.code_buf[i],
 					 &chip->ecc.calc_buf[i]);
 		if (stat == -EBADMSG &&
 		    (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) {
@@ -3296,7 +3296,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 	for (i = 0 ; eccsteps; eccsteps--, i += eccbytes, p += eccsize) {
 		int stat;
 
-		stat = chip->ecc.correct(mtd, p, &ecc_code[i], &ecc_calc[i]);
+		stat = chip->ecc.correct(chip, p, &ecc_code[i], &ecc_calc[i]);
 		if (stat == -EBADMSG &&
 		    (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) {
 			/* check for empty pages with bitflips */
@@ -3366,7 +3366,7 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
 
 		chip->ecc.calculate(chip, p, &ecc_calc[i]);
 
-		stat = chip->ecc.correct(mtd, p, &ecc_code[i], NULL);
+		stat = chip->ecc.correct(chip, p, &ecc_code[i], NULL);
 		if (stat == -EBADMSG &&
 		    (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) {
 			/* check for empty pages with bitflips */
@@ -3436,7 +3436,7 @@ static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 		if (ret)
 			return ret;
 
-		stat = chip->ecc.correct(mtd, p, oob, NULL);
+		stat = chip->ecc.correct(chip, p, oob, NULL);
 
 		oob += eccbytes;
 
diff --git a/drivers/mtd/nand/raw/nand_bch.c b/drivers/mtd/nand/raw/nand_bch.c
index 9e3c2da0f3b1..574c0ca16160 100644
--- a/drivers/mtd/nand/raw/nand_bch.c
+++ b/drivers/mtd/nand/raw/nand_bch.c
@@ -66,17 +66,16 @@ EXPORT_SYMBOL(nand_bch_calculate_ecc);
 
 /**
  * nand_bch_correct_data - [NAND Interface] Detect and correct bit error(s)
- * @mtd:	MTD block structure
+ * @chip:	NAND chip object
  * @buf:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
  *
  * Detect and correct bit errors for a data byte block
  */
-int nand_bch_correct_data(struct mtd_info *mtd, unsigned char *buf,
+int nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf,
 			  unsigned char *read_ecc, unsigned char *calc_ecc)
 {
-	const struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nand_bch_control *nbc = chip->ecc.priv;
 	unsigned int *errloc = nbc->errloc;
 	int i, count;
diff --git a/drivers/mtd/nand/raw/nand_ecc.c b/drivers/mtd/nand/raw/nand_ecc.c
index 1dbfcaecf8c5..8f86eed40b70 100644
--- a/drivers/mtd/nand/raw/nand_ecc.c
+++ b/drivers/mtd/nand/raw/nand_ecc.c
@@ -490,18 +490,17 @@ EXPORT_SYMBOL(__nand_correct_data);
 
 /**
  * nand_correct_data - [NAND Interface] Detect and correct bit error(s)
- * @mtd:	MTD block structure
+ * @chip:	NAND chip object
  * @buf:	raw data read from the chip
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
  *
  * Detect and correct a 1 bit error for 256/512 byte block
  */
-int nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
+int nand_correct_data(struct nand_chip *chip, unsigned char *buf,
 		      unsigned char *read_ecc, unsigned char *calc_ecc)
 {
-	return __nand_correct_data(buf, read_ecc, calc_ecc,
-				   mtd_to_nand(mtd)->ecc.size);
+	return __nand_correct_data(buf, read_ecc, calc_ecc, chip->ecc.size);
 }
 EXPORT_SYMBOL(nand_correct_data);
 
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index adc300b6d243..4e0bc2da63fd 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -857,7 +857,7 @@ static int omap_compare_ecc(u8 *ecc_data1,	/* read from NAND memory */
 
 /**
  * omap_correct_data - Compares the ECC read with HW generated ECC
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @dat: page data
  * @read_ecc: ecc read from nand flash
  * @calc_ecc: ecc read from HW ECC registers
@@ -869,10 +869,10 @@ static int omap_compare_ecc(u8 *ecc_data1,	/* read from NAND memory */
  * corrected errors is returned. If uncorrectable errors exist, %-1 is
  * returned.
  */
-static int omap_correct_data(struct mtd_info *mtd, u_char *dat,
-				u_char *read_ecc, u_char *calc_ecc)
+static int omap_correct_data(struct nand_chip *chip, u_char *dat,
+			     u_char *read_ecc, u_char *calc_ecc)
 {
-	struct omap_nand_info *info = mtd_to_omap(mtd);
+	struct omap_nand_info *info = mtd_to_omap(nand_to_mtd(chip));
 	int blockCnt = 0, i = 0, ret = 0;
 	int stat = 0;
 
@@ -1338,7 +1338,7 @@ static int erased_sector_bitflips(u_char *data, u_char *oob,
 
 /**
  * omap_elm_correct_data - corrects page data area in case error reported
- * @mtd:	MTD device structure
+ * @chip:	NAND chip object
  * @data:	page data
  * @read_ecc:	ecc read from nand flash
  * @calc_ecc:	ecc read from HW ECC registers
@@ -1347,10 +1347,10 @@ static int erased_sector_bitflips(u_char *data, u_char *oob,
  * In case of non-zero ecc vector, first filter out erased-pages, and
  * then process data via ELM to detect bit-flips.
  */
-static int omap_elm_correct_data(struct mtd_info *mtd, u_char *data,
-				u_char *read_ecc, u_char *calc_ecc)
+static int omap_elm_correct_data(struct nand_chip *chip, u_char *data,
+				 u_char *read_ecc, u_char *calc_ecc)
 {
-	struct omap_nand_info *info = mtd_to_omap(mtd);
+	struct omap_nand_info *info = mtd_to_omap(nand_to_mtd(chip));
 	struct nand_ecc_ctrl *ecc = &info->nand.ecc;
 	int eccsteps = info->nand.ecc.steps;
 	int i , j, stat = 0;
@@ -1659,7 +1659,7 @@ static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
 	if (ret)
 		return ret;
 
-	stat = chip->ecc.correct(mtd, buf, ecc_code, ecc_calc);
+	stat = chip->ecc.correct(chip, buf, ecc_code, ecc_calc);
 
 	if (stat < 0) {
 		mtd->ecc_stats.failed++;
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index f58d633ec062..7673aa140009 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -465,14 +465,14 @@ static int r852_ecc_calculate(struct nand_chip *chip, const uint8_t *dat,
  * Correct the data using ECC, hw did almost everything for us
  */
 
-static int r852_ecc_correct(struct mtd_info *mtd, uint8_t *dat,
-				uint8_t *read_ecc, uint8_t *calc_ecc)
+static int r852_ecc_correct(struct nand_chip *chip, uint8_t *dat,
+			    uint8_t *read_ecc, uint8_t *calc_ecc)
 {
 	uint32_t ecc_reg;
 	uint8_t ecc_status, err_byte;
 	int i, error = 0;
 
-	struct r852_device *dev = r852_get_dev(mtd);
+	struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
 
 	if (dev->card_unstable)
 		return 0;
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index c94e1f62362f..d57201d118d8 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -512,9 +512,10 @@ static int s3c2412_nand_devready(struct mtd_info *mtd)
 
 /* ECC handling functions */
 
-static int s3c2410_nand_correct_data(struct mtd_info *mtd, u_char *dat,
+static int s3c2410_nand_correct_data(struct nand_chip *chip, u_char *dat,
 				     u_char *read_ecc, u_char *calc_ecc)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	unsigned int diff0, diff1, diff2;
 	unsigned int bit, byte;
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 03d6428589c8..734ff29705ce 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -290,8 +290,9 @@ static int tmio_nand_calculate_ecc(struct nand_chip *chip, const u_char *dat,
 	return 0;
 }
 
-static int tmio_nand_correct_data(struct mtd_info *mtd, unsigned char *buf,
-		unsigned char *read_ecc, unsigned char *calc_ecc)
+static int tmio_nand_correct_data(struct nand_chip *chip, unsigned char *buf,
+				  unsigned char *read_ecc,
+				  unsigned char *calc_ecc)
 {
 	int r0, r1;
 
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 55a5c4d42a81..3c69d834de62 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -190,10 +190,10 @@ static int txx9ndfmc_calculate_ecc(struct nand_chip *chip, const uint8_t *dat,
 	return 0;
 }
 
-static int txx9ndfmc_correct_data(struct mtd_info *mtd, unsigned char *buf,
-		unsigned char *read_ecc, unsigned char *calc_ecc)
+static int txx9ndfmc_correct_data(struct nand_chip *chip, unsigned char *buf,
+				  unsigned char *read_ecc,
+				  unsigned char *calc_ecc)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	int eccsize;
 	int corrected = 0;
 	int stat;
diff --git a/include/linux/mtd/nand_bch.h b/include/linux/mtd/nand_bch.h
index 6db133508960..b8106651f807 100644
--- a/include/linux/mtd/nand_bch.h
+++ b/include/linux/mtd/nand_bch.h
@@ -28,8 +28,8 @@ int nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat,
 /*
  * Detect and correct bit errors
  */
-int nand_bch_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc,
-			  u_char *calc_ecc);
+int nand_bch_correct_data(struct nand_chip *chip, u_char *dat,
+			  u_char *read_ecc, u_char *calc_ecc);
 /*
  * Initialize BCH encoder/decoder
  */
@@ -51,7 +51,7 @@ nand_bch_calculate_ecc(struct nand_chip *chip, const u_char *dat,
 }
 
 static inline int
-nand_bch_correct_data(struct mtd_info *mtd, unsigned char *buf,
+nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf,
 		      unsigned char *read_ecc, unsigned char *calc_ecc)
 {
 	return -ENOTSUPP;
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index a514e62ff54f..b81fecd5e719 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -13,7 +13,6 @@
 #ifndef __MTD_NAND_ECC_H__
 #define __MTD_NAND_ECC_H__
 
-struct mtd_info;
 struct nand_chip;
 
 /*
@@ -37,6 +36,7 @@ int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc,
 /*
  * Detect and correct a 1 bit error for 256/512 byte block
  */
-int nand_correct_data(struct mtd_info *mtd, u_char *dat, u_char *read_ecc, u_char *calc_ecc);
+int nand_correct_data(struct nand_chip *chip, u_char *dat, u_char *read_ecc,
+		      u_char *calc_ecc);
 
 #endif /* __MTD_NAND_ECC_H__ */
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index b2f51b2fb110..24434310d126 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -650,8 +650,8 @@ struct nand_ecc_ctrl {
 	void (*hwctl)(struct nand_chip *chip, int mode);
 	int (*calculate)(struct nand_chip *chip, const uint8_t *dat,
 			 uint8_t *ecc_code);
-	int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc,
-			uint8_t *calc_ecc);
+	int (*correct)(struct nand_chip *chip, uint8_t *dat, uint8_t *read_ecc,
+		       uint8_t *calc_ecc);
 	int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			uint8_t *buf, int oob_required, int page);
 	int (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From b976168757f7f4adf05215884e8557aaa9f6792c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:20 +0200
Subject: mtd: rawnand: Pass a nand_chip object to ecc->read_xxx() hooks

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle all ecc->read_xxx() hooks at once.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Acked-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c  | 12 ++---
 drivers/mtd/nand/raw/brcmnand/brcmnand.c      | 21 ++++----
 drivers/mtd/nand/raw/cafe_nand.c              | 10 ++--
 drivers/mtd/nand/raw/denali.c                 | 17 +++---
 drivers/mtd/nand/raw/docg4.c                  | 20 ++++----
 drivers/mtd/nand/raw/fsl_elbc_nand.c          |  5 +-
 drivers/mtd/nand/raw/fsl_ifc_nand.c           |  5 +-
 drivers/mtd/nand/raw/fsmc_nand.c              |  6 +--
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c    | 23 ++++-----
 drivers/mtd/nand/raw/hisi504_nand.c           |  9 ++--
 drivers/mtd/nand/raw/lpc32xx_mlc.c            | 10 ++--
 drivers/mtd/nand/raw/lpc32xx_slc.c            | 14 ++---
 drivers/mtd/nand/raw/marvell_nand.c           | 30 +++++------
 drivers/mtd/nand/raw/mtk_nand.c               | 23 +++++----
 drivers/mtd/nand/raw/mxc_nand.c               | 11 ++--
 drivers/mtd/nand/raw/nand_base.c              | 74 +++++++++++++--------------
 drivers/mtd/nand/raw/nand_micron.c            |  6 +--
 drivers/mtd/nand/raw/nand_toshiba.c           | 10 ++--
 drivers/mtd/nand/raw/omap2.c                  |  6 +--
 drivers/mtd/nand/raw/qcom_nandc.c             | 11 ++--
 drivers/mtd/nand/raw/r852.c                   |  5 +-
 drivers/mtd/nand/raw/sh_flctl.c               |  6 ++-
 drivers/mtd/nand/raw/sunxi_nand.c             | 26 +++++-----
 drivers/mtd/nand/raw/tango_nand.c             | 16 +++---
 drivers/mtd/nand/raw/tegra_nand.c             | 15 +++---
 drivers/mtd/nand/raw/vf610_nfc.c              | 18 +++----
 drivers/staging/mt29f_spinand/mt29f_spinand.c |  5 +-
 include/linux/mtd/rawnand.h                   | 30 +++++------
 28 files changed, 221 insertions(+), 223 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index cef22a79f3a6..45061b591346 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -895,15 +895,13 @@ static int atmel_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
 	return ret;
 }
 
-static int atmel_nand_pmecc_read_page(struct mtd_info *mtd,
-				      struct nand_chip *chip, u8 *buf,
+static int atmel_nand_pmecc_read_page(struct nand_chip *chip, u8 *buf,
 				      int oob_required, int page)
 {
 	return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page, false);
 }
 
-static int atmel_nand_pmecc_read_page_raw(struct mtd_info *mtd,
-					  struct nand_chip *chip, u8 *buf,
+static int atmel_nand_pmecc_read_page_raw(struct nand_chip *chip, u8 *buf,
 					  int oob_required, int page)
 {
 	return atmel_nand_pmecc_read_pg(chip, buf, oob_required, page, true);
@@ -1037,16 +1035,14 @@ static int atmel_hsmc_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
 	return ret;
 }
 
-static int atmel_hsmc_nand_pmecc_read_page(struct mtd_info *mtd,
-					   struct nand_chip *chip, u8 *buf,
+static int atmel_hsmc_nand_pmecc_read_page(struct nand_chip *chip, u8 *buf,
 					   int oob_required, int page)
 {
 	return atmel_hsmc_nand_pmecc_read_pg(chip, buf, oob_required, page,
 					     false);
 }
 
-static int atmel_hsmc_nand_pmecc_read_page_raw(struct mtd_info *mtd,
-					       struct nand_chip *chip,
+static int atmel_hsmc_nand_pmecc_read_page_raw(struct nand_chip *chip,
 					       u8 *buf, int oob_required,
 					       int page)
 {
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 19e6e918f896..a17ae692aee9 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -1689,7 +1689,7 @@ static int brcmstb_nand_verify_erased_page(struct mtd_info *mtd,
 	sas = mtd->oobsize / chip->ecc.steps;
 
 	/* read without ecc for verification */
-	ret = chip->ecc.read_page_raw(mtd, chip, buf, true, page);
+	ret = chip->ecc.read_page_raw(chip, buf, true, page);
 	if (ret)
 		return ret;
 
@@ -1786,9 +1786,10 @@ try_dmaread:
 	return 0;
 }
 
-static int brcmnand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf, int oob_required, int page)
+static int brcmnand_read_page(struct nand_chip *chip, uint8_t *buf,
+			      int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	u8 *oob = oob_required ? (u8 *)chip->oob_poi : NULL;
 
@@ -1798,10 +1799,11 @@ static int brcmnand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 			mtd->writesize >> FC_SHIFT, (u32 *)buf, oob);
 }
 
-static int brcmnand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				  uint8_t *buf, int oob_required, int page)
+static int brcmnand_read_page_raw(struct nand_chip *chip, uint8_t *buf,
+				  int oob_required, int page)
 {
 	struct brcmnand_host *host = nand_get_controller_data(chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	u8 *oob = oob_required ? (u8 *)chip->oob_poi : NULL;
 	int ret;
 
@@ -1814,17 +1816,18 @@ static int brcmnand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	return ret;
 }
 
-static int brcmnand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			     int page)
+static int brcmnand_read_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return brcmnand_read(mtd, chip, (u64)page << chip->page_shift,
 			mtd->writesize >> FC_SHIFT,
 			NULL, (u8 *)chip->oob_poi);
 }
 
-static int brcmnand_read_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				 int page)
+static int brcmnand_read_oob_raw(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 
 	brcmnand_set_ecc_enabled(host, 0);
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 94e5f7a56084..c6071d71cc1b 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -354,9 +354,10 @@ static int cafe_nand_write_oob(struct mtd_info *mtd,
 }
 
 /* Don't use -- use nand_read_oob_std for now */
-static int cafe_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			      int page)
+static int cafe_nand_read_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize);
 }
 /**
@@ -369,9 +370,10 @@ static int cafe_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
  * The hw generator calculates the error syndrome automatically. Therefore
  * we need a special oob layout and handling.
  */
-static int cafe_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       uint8_t *buf, int oob_required, int page)
+static int cafe_nand_read_page(struct nand_chip *chip, uint8_t *buf,
+			       int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 	unsigned int max_bitflips = 0;
 
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 958619fd4d1b..994921814d76 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -676,9 +676,10 @@ static void denali_oob_xfer(struct mtd_info *mtd, struct nand_chip *chip,
 					   false);
 }
 
-static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int denali_read_page_raw(struct nand_chip *chip, uint8_t *buf,
+				int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	int writesize = mtd->writesize;
 	int oobsize = mtd->oobsize;
@@ -751,9 +752,10 @@ static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
-static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			   int page)
+static int denali_read_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	denali_oob_xfer(mtd, chip, page, 0);
 
 	return 0;
@@ -771,9 +773,10 @@ static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
 	return nand_prog_page_end_op(chip);
 }
 
-static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			    uint8_t *buf, int oob_required, int page)
+static int denali_read_page(struct nand_chip *chip, uint8_t *buf,
+			    int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	unsigned long uncor_ecc_flags = 0;
 	int stat = 0;
@@ -792,7 +795,7 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 		return stat;
 
 	if (uncor_ecc_flags) {
-		ret = denali_read_oob(mtd, chip, page);
+		ret = denali_read_oob(chip, page);
 		if (ret)
 			return ret;
 
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index 2d86bc5a886d..ebaa479ffcb2 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -845,21 +845,21 @@ static int read_page(struct mtd_info *mtd, struct nand_chip *nand,
 }
 
 
-static int docg4_read_page_raw(struct mtd_info *mtd, struct nand_chip *nand,
-			       uint8_t *buf, int oob_required, int page)
+static int docg4_read_page_raw(struct nand_chip *nand, uint8_t *buf,
+			       int oob_required, int page)
 {
-	return read_page(mtd, nand, buf, page, false);
+	return read_page(nand_to_mtd(nand), nand, buf, page, false);
 }
 
-static int docg4_read_page(struct mtd_info *mtd, struct nand_chip *nand,
-			   uint8_t *buf, int oob_required, int page)
+static int docg4_read_page(struct nand_chip *nand, uint8_t *buf,
+			   int oob_required, int page)
 {
-	return read_page(mtd, nand, buf, page, true);
+	return read_page(nand_to_mtd(nand), nand, buf, page, true);
 }
 
-static int docg4_read_oob(struct mtd_info *mtd, struct nand_chip *nand,
-			  int page)
+static int docg4_read_oob(struct nand_chip *nand, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(nand);
 	struct docg4_priv *doc = nand_get_controller_data(nand);
 	void __iomem *docptr = doc->virtadr;
 	uint16_t status;
@@ -1059,7 +1059,7 @@ static int __init read_factory_bbt(struct mtd_info *mtd)
 		return -ENOMEM;
 
 	read_page_prologue(mtd, g4_addr);
-	docg4_read_page(mtd, nand, buf, 0, DOCG4_FACTORY_BBT_PAGE);
+	docg4_read_page(nand, buf, 0, DOCG4_FACTORY_BBT_PAGE);
 
 	/*
 	 * If no memory-based bbt was created, exit.  This will happen if module
@@ -1077,7 +1077,7 @@ static int __init read_factory_bbt(struct mtd_info *mtd)
 		 * It is stored redundantly, so we get another chance.
 		 */
 		eccfailed_stats = mtd->ecc_stats.failed;
-		docg4_read_page(mtd, nand, buf, 0, DOCG4_REDUNDANT_BBT_PAGE);
+		docg4_read_page(nand, buf, 0, DOCG4_REDUNDANT_BBT_PAGE);
 		if (mtd->ecc_stats.failed > eccfailed_stats) {
 			dev_warn(doc->dev,
 				 "The factory bbt could not be read!\n");
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 22bcd64a66c8..26fcb8ea0c2e 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -710,9 +710,10 @@ static const struct nand_controller_ops fsl_elbc_controller_ops = {
 	.attach_chip = fsl_elbc_attach_chip,
 };
 
-static int fsl_elbc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf, int oob_required, int page)
+static int fsl_elbc_read_page(struct nand_chip *chip, uint8_t *buf,
+			      int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct fsl_elbc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_lbc_ctrl *ctrl = priv->ctrl;
 	struct fsl_elbc_fcm_ctrl *elbc_fcm_ctrl = ctrl->nand;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 70bf8e1552a5..8c6016932aaa 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -679,9 +679,10 @@ static int check_erased_page(struct nand_chip *chip, u8 *buf)
 	return bitflips;
 }
 
-static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			     uint8_t *buf, int oob_required, int page)
+static int fsl_ifc_read_page(struct nand_chip *chip, uint8_t *buf,
+			     int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
 	struct fsl_ifc_nand_ctrl *nctrl = ifc_nand_ctrl;
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index b41fd09fa389..5fc036c89cc8 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -707,7 +707,6 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op,
 
 /*
  * fsmc_read_page_hwecc
- * @mtd:	mtd info structure
  * @chip:	nand chip info structure
  * @buf:	buffer to store read data
  * @oob_required:	caller expects OOB data read to chip->oob_poi
@@ -719,9 +718,10 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op,
  * After this read, fsmc hardware generates and reports error data bits(up to a
  * max of 8 bits)
  */
-static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				 uint8_t *buf, int oob_required, int page)
+static int fsmc_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
+				int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int i, j, s, stat, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
 	int eccsteps = chip->ecc.steps;
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index fe99d9323d4a..5650ebf28903 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1085,8 +1085,8 @@ static int gpmi_ecc_read_page_data(struct nand_chip *chip,
 	return max_bitflips;
 }
 
-static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf, int oob_required, int page)
+static int gpmi_ecc_read_page(struct nand_chip *chip, uint8_t *buf,
+			      int oob_required, int page)
 {
 	nand_read_page_op(chip, page, 0, NULL, 0);
 
@@ -1094,8 +1094,8 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 /* Fake a virtual small page for the subpage read */
-static int gpmi_ecc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offs, uint32_t len, uint8_t *buf, int page)
+static int gpmi_ecc_read_subpage(struct nand_chip *chip, uint32_t offs,
+				 uint32_t len, uint8_t *buf, int page)
 {
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	void __iomem *bch_regs = this->resources.bch_regs;
@@ -1130,7 +1130,7 @@ static int gpmi_ecc_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
 			dev_dbg(this->dev,
 				"page:%d, first:%d, last:%d, marker at:%d\n",
 				page, first, last, marker_pos);
-			return gpmi_ecc_read_page(mtd, chip, buf, 0, page);
+			return gpmi_ecc_read_page(chip, buf, 0, page);
 		}
 	}
 
@@ -1324,9 +1324,9 @@ exit_auxiliary:
  * ECC-based or raw view of the page is implicit in which function it calls
  * (there is a similar pair of ECC-based/raw functions for writing).
  */
-static int gpmi_ecc_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-				int page)
+static int gpmi_ecc_read_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 
 	dev_dbg(this->dev, "page number is %d\n", page);
@@ -1380,10 +1380,10 @@ gpmi_ecc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page)
  * See set_geometry_by_ecc_info inline comments to have a full description
  * of the layout used by the GPMI controller.
  */
-static int gpmi_ecc_read_page_raw(struct mtd_info *mtd,
-				  struct nand_chip *chip, uint8_t *buf,
+static int gpmi_ecc_read_page_raw(struct nand_chip *chip, uint8_t *buf,
 				  int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	struct bch_geometry *nfc_geo = &this->bch_geometry;
 	int eccsize = nfc_geo->ecc_chunk_size;
@@ -1536,10 +1536,9 @@ static int gpmi_ecc_write_page_raw(struct mtd_info *mtd,
 				 mtd->writesize + mtd->oobsize);
 }
 
-static int gpmi_ecc_read_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				 int page)
+static int gpmi_ecc_read_oob_raw(struct nand_chip *chip, int page)
 {
-	return gpmi_ecc_read_page_raw(mtd, chip, NULL, 1, page);
+	return gpmi_ecc_read_page_raw(chip, NULL, 1, page);
 }
 
 static int gpmi_ecc_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 9106a1d60bca..f4078086c14c 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -528,9 +528,10 @@ static irqreturn_t hinfc_irq_handle(int irq, void *devid)
 	return IRQ_HANDLED;
 }
 
-static int hisi_nand_read_page_hwecc(struct mtd_info *mtd,
-	struct nand_chip *chip, uint8_t *buf, int oob_required, int page)
+static int hisi_nand_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
+				     int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct hinfc_host *host = nand_get_controller_data(chip);
 	int max_bitflips = 0, stat = 0, stat_max = 0, status_ecc;
 	int stat_1, stat_2;
@@ -560,9 +561,9 @@ static int hisi_nand_read_page_hwecc(struct mtd_info *mtd,
 	return max_bitflips;
 }
 
-static int hisi_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-				int page)
+static int hisi_nand_read_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct hinfc_host *host = nand_get_controller_data(chip);
 
 	nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 84e421710297..1849e9858d45 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -442,9 +442,10 @@ out1:
 	return -ENXIO;
 }
 
-static int lpc32xx_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			     uint8_t *buf, int oob_required, int page)
+static int lpc32xx_read_page(struct nand_chip *chip, uint8_t *buf,
+			     int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 	int i, j;
 	uint8_t *oobbuf = chip->oob_poi;
@@ -557,13 +558,12 @@ static int lpc32xx_write_page_lowlevel(struct mtd_info *mtd,
 	return nand_prog_page_end_op(chip);
 }
 
-static int lpc32xx_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			    int page)
+static int lpc32xx_read_oob(struct nand_chip *chip, int page)
 {
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 
 	/* Read whole page - necessary with MLC controller! */
-	lpc32xx_read_page(mtd, chip, host->dummy_buf, 1, page);
+	lpc32xx_read_page(chip, host->dummy_buf, 1, page);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index d5cb1b40a235..a9cb089923be 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -396,9 +396,10 @@ static void lpc32xx_nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int
 /*
  * Read the OOB data from the device without ECC using FIFO method
  */
-static int lpc32xx_nand_read_oob_syndrome(struct mtd_info *mtd,
-					  struct nand_chip *chip, int page)
+static int lpc32xx_nand_read_oob_syndrome(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize);
 }
 
@@ -610,10 +611,10 @@ static int lpc32xx_xfer(struct mtd_info *mtd, uint8_t *buf, int eccsubpages,
  * Read the data and OOB data from the device, use ECC correction with the
  * data, disable ECC for the OOB data
  */
-static int lpc32xx_nand_read_page_syndrome(struct mtd_info *mtd,
-					   struct nand_chip *chip, uint8_t *buf,
+static int lpc32xx_nand_read_page_syndrome(struct nand_chip *chip, uint8_t *buf,
 					   int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 	struct mtd_oob_region oobregion = { };
 	int stat, i, status, error;
@@ -657,11 +658,12 @@ static int lpc32xx_nand_read_page_syndrome(struct mtd_info *mtd,
  * Read the data and OOB data from the device, no ECC correction with the
  * data or OOB data
  */
-static int lpc32xx_nand_read_page_raw_syndrome(struct mtd_info *mtd,
-					       struct nand_chip *chip,
+static int lpc32xx_nand_read_page_raw_syndrome(struct nand_chip *chip,
 					       uint8_t *buf, int oob_required,
 					       int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	/* Issue read command */
 	nand_read_page_op(chip, page, 0, NULL, 0);
 
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 5a9836c5093c..a81018f3a2f4 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -1026,18 +1026,15 @@ static int marvell_nfc_hw_ecc_hmg_do_read_page(struct nand_chip *chip,
 	return ret;
 }
 
-static int marvell_nfc_hw_ecc_hmg_read_page_raw(struct mtd_info *mtd,
-						struct nand_chip *chip, u8 *buf,
+static int marvell_nfc_hw_ecc_hmg_read_page_raw(struct nand_chip *chip, u8 *buf,
 						int oob_required, int page)
 {
 	return marvell_nfc_hw_ecc_hmg_do_read_page(chip, buf, chip->oob_poi,
 						   true, page);
 }
 
-static int marvell_nfc_hw_ecc_hmg_read_page(struct mtd_info *mtd,
-					    struct nand_chip *chip,
-					    u8 *buf, int oob_required,
-					    int page)
+static int marvell_nfc_hw_ecc_hmg_read_page(struct nand_chip *chip, u8 *buf,
+					    int oob_required, int page)
 {
 	const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout;
 	unsigned int full_sz = lt->data_bytes + lt->spare_bytes + lt->ecc_bytes;
@@ -1075,8 +1072,7 @@ static int marvell_nfc_hw_ecc_hmg_read_page(struct mtd_info *mtd,
  * it appears before the ECC bytes when reading), the ->read_oob_raw() function
  * also stands for ->read_oob().
  */
-static int marvell_nfc_hw_ecc_hmg_read_oob_raw(struct mtd_info *mtd,
-					       struct nand_chip *chip, int page)
+static int marvell_nfc_hw_ecc_hmg_read_oob_raw(struct nand_chip *chip, int page)
 {
 	/* Invalidate page cache */
 	chip->pagebuf = -1;
@@ -1183,10 +1179,10 @@ static int marvell_nfc_hw_ecc_hmg_write_oob_raw(struct mtd_info *mtd,
 }
 
 /* BCH read helpers */
-static int marvell_nfc_hw_ecc_bch_read_page_raw(struct mtd_info *mtd,
-						struct nand_chip *chip, u8 *buf,
+static int marvell_nfc_hw_ecc_bch_read_page_raw(struct nand_chip *chip, u8 *buf,
 						int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout;
 	u8 *oob = chip->oob_poi;
 	int chunk_size = lt->data_bytes + lt->spare_bytes + lt->ecc_bytes;
@@ -1295,11 +1291,11 @@ static void marvell_nfc_hw_ecc_bch_read_chunk(struct nand_chip *chip, int chunk,
 	}
 }
 
-static int marvell_nfc_hw_ecc_bch_read_page(struct mtd_info *mtd,
-					    struct nand_chip *chip,
+static int marvell_nfc_hw_ecc_bch_read_page(struct nand_chip *chip,
 					    u8 *buf, int oob_required,
 					    int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout;
 	int data_len = lt->data_bytes, spare_len = lt->spare_bytes, ecc_len;
 	u8 *data = buf, *spare = chip->oob_poi, *ecc;
@@ -1392,22 +1388,20 @@ static int marvell_nfc_hw_ecc_bch_read_page(struct mtd_info *mtd,
 	return max_bitflips;
 }
 
-static int marvell_nfc_hw_ecc_bch_read_oob_raw(struct mtd_info *mtd,
-					       struct nand_chip *chip, int page)
+static int marvell_nfc_hw_ecc_bch_read_oob_raw(struct nand_chip *chip, int page)
 {
 	/* Invalidate page cache */
 	chip->pagebuf = -1;
 
-	return chip->ecc.read_page_raw(mtd, chip, chip->data_buf, true, page);
+	return chip->ecc.read_page_raw(chip, chip->data_buf, true, page);
 }
 
-static int marvell_nfc_hw_ecc_bch_read_oob(struct mtd_info *mtd,
-					   struct nand_chip *chip, int page)
+static int marvell_nfc_hw_ecc_bch_read_oob(struct nand_chip *chip, int page)
 {
 	/* Invalidate page cache */
 	chip->pagebuf = -1;
 
-	return chip->ecc.read_page(mtd, chip, chip->data_buf, true, page);
+	return chip->ecc.read_page(chip, chip->data_buf, true, page);
 }
 
 /* BCH write helpers */
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 46d447f148f1..32d5b59eb879 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -969,23 +969,25 @@ done:
 	return bitflips;
 }
 
-static int mtk_nfc_read_subpage_hwecc(struct mtd_info *mtd,
-				      struct nand_chip *chip, u32 off,
+static int mtk_nfc_read_subpage_hwecc(struct nand_chip *chip, u32 off,
 				      u32 len, u8 *p, int pg)
 {
-	return mtk_nfc_read_subpage(mtd, chip, off, len, p, pg, 0);
+	return mtk_nfc_read_subpage(nand_to_mtd(chip), chip, off, len, p, pg,
+				    0);
 }
 
-static int mtk_nfc_read_page_hwecc(struct mtd_info *mtd,
-				   struct nand_chip *chip, u8 *p,
-				   int oob_on, int pg)
+static int mtk_nfc_read_page_hwecc(struct nand_chip *chip, u8 *p, int oob_on,
+				   int pg)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return mtk_nfc_read_subpage(mtd, chip, 0, mtd->writesize, p, pg, 0);
 }
 
-static int mtk_nfc_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				 u8 *buf, int oob_on, int page)
+static int mtk_nfc_read_page_raw(struct nand_chip *chip, u8 *buf, int oob_on,
+				 int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(chip);
 	struct mtk_nfc *nfc = nand_get_controller_data(chip);
 	struct mtk_nfc_fdm *fdm = &mtk_nand->fdm;
@@ -1011,10 +1013,9 @@ static int mtk_nfc_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	return ret;
 }
 
-static int mtk_nfc_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip,
-				int page)
+static int mtk_nfc_read_oob_std(struct nand_chip *chip, int page)
 {
-	return mtk_nfc_read_page_raw(mtd, chip, NULL, 1, page);
+	return mtk_nfc_read_page_raw(chip, NULL, 1, page);
 }
 
 static inline void mtk_nfc_hw_init(struct mtk_nfc *nfc)
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 3c57e14e1c7c..35fcec595c3e 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -816,8 +816,8 @@ static int mxc_nand_read_page_v2_v3(struct nand_chip *chip, void *buf,
 	return max_bitflips;
 }
 
-static int mxc_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf, int oob_required, int page)
+static int mxc_nand_read_page(struct nand_chip *chip, uint8_t *buf,
+			      int oob_required, int page)
 {
 	struct mxc_nand_host *host = nand_get_controller_data(chip);
 	void *oob_buf;
@@ -830,8 +830,8 @@ static int mxc_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 	return host->devtype_data->read_page(chip, buf, oob_buf, 1, page);
 }
 
-static int mxc_nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				  uint8_t *buf, int oob_required, int page)
+static int mxc_nand_read_page_raw(struct nand_chip *chip, uint8_t *buf,
+				  int oob_required, int page)
 {
 	struct mxc_nand_host *host = nand_get_controller_data(chip);
 	void *oob_buf;
@@ -844,8 +844,7 @@ static int mxc_nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	return host->devtype_data->read_page(chip, buf, oob_buf, 0, page);
 }
 
-static int mxc_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			     int page)
+static int mxc_nand_read_oob(struct nand_chip *chip, int page)
 {
 	struct mxc_nand_host *host = nand_get_controller_data(chip);
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 0444bd23c84b..e1f60c841348 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -427,7 +427,7 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 	page_end = page + (chip->bbt_options & NAND_BBT_SCAN2NDPAGE ? 2 : 1);
 
 	for (; page < page_end; page++) {
-		res = chip->ecc.read_oob(mtd, chip, page);
+		res = chip->ecc.read_oob(chip, page);
 		if (res < 0)
 			return res;
 
@@ -2978,7 +2978,6 @@ EXPORT_SYMBOL(nand_check_erased_ecc_chunk);
 
 /**
  * nand_read_page_raw_notsupp - dummy read raw page function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
  * @oob_required: caller requires OOB data read to chip->oob_poi
@@ -2986,8 +2985,8 @@ EXPORT_SYMBOL(nand_check_erased_ecc_chunk);
  *
  * Returns -ENOTSUPP unconditionally.
  */
-int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
-			       u8 *buf, int oob_required, int page)
+int nand_read_page_raw_notsupp(struct nand_chip *chip, u8 *buf,
+			       int oob_required, int page)
 {
 	return -ENOTSUPP;
 }
@@ -2995,7 +2994,6 @@ EXPORT_SYMBOL(nand_read_page_raw_notsupp);
 
 /**
  * nand_read_page_raw - [INTERN] read raw page data without ecc
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
  * @oob_required: caller requires OOB data read to chip->oob_poi
@@ -3003,9 +3001,10 @@ EXPORT_SYMBOL(nand_read_page_raw_notsupp);
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
-int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-		       uint8_t *buf, int oob_required, int page)
+int nand_read_page_raw(struct nand_chip *chip, uint8_t *buf, int oob_required,
+		       int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
 	ret = nand_read_page_op(chip, page, 0, buf, mtd->writesize);
@@ -3025,7 +3024,6 @@ EXPORT_SYMBOL(nand_read_page_raw);
 
 /**
  * nand_read_page_raw_syndrome - [INTERN] read raw page data without ecc
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
  * @oob_required: caller requires OOB data read to chip->oob_poi
@@ -3033,10 +3031,10 @@ EXPORT_SYMBOL(nand_read_page_raw);
  *
  * We need a special oob layout and handling even when OOB isn't used.
  */
-static int nand_read_page_raw_syndrome(struct mtd_info *mtd,
-				       struct nand_chip *chip, uint8_t *buf,
+static int nand_read_page_raw_syndrome(struct nand_chip *chip, uint8_t *buf,
 				       int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
 	uint8_t *oob = chip->oob_poi;
@@ -3090,15 +3088,15 @@ static int nand_read_page_raw_syndrome(struct mtd_info *mtd,
 
 /**
  * nand_read_page_swecc - [REPLACEABLE] software ECC based page read function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
  * @oob_required: caller requires OOB data read to chip->oob_poi
  * @page: page number to read
  */
-static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int nand_read_page_swecc(struct nand_chip *chip, uint8_t *buf,
+				int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int i, eccsize = chip->ecc.size, ret;
 	int eccbytes = chip->ecc.bytes;
 	int eccsteps = chip->ecc.steps;
@@ -3107,7 +3105,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	uint8_t *ecc_code = chip->ecc.code_buf;
 	unsigned int max_bitflips = 0;
 
-	chip->ecc.read_page_raw(mtd, chip, buf, 1, page);
+	chip->ecc.read_page_raw(chip, buf, 1, page);
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
 		chip->ecc.calculate(chip, p, &ecc_calc[i]);
@@ -3136,17 +3134,16 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 
 /**
  * nand_read_subpage - [REPLACEABLE] ECC based sub-page read function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @data_offs: offset of requested data within the page
  * @readlen: data length
  * @bufpoi: buffer to store read data
  * @page: page number to read
  */
-static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t data_offs, uint32_t readlen, uint8_t *bufpoi,
-			int page)
+static int nand_read_subpage(struct nand_chip *chip, uint32_t data_offs,
+			     uint32_t readlen, uint8_t *bufpoi, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int start_step, end_step, num_steps, ret;
 	uint8_t *p;
 	int data_col_addr, i, gaps = 0;
@@ -3248,7 +3245,6 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
 
 /**
  * nand_read_page_hwecc - [REPLACEABLE] hardware ECC based page read function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
  * @oob_required: caller requires OOB data read to chip->oob_poi
@@ -3256,9 +3252,10 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
  *
  * Not for syndrome calculating ECC controllers which need a special oob layout.
  */
-static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int nand_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
+				int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int i, eccsize = chip->ecc.size, ret;
 	int eccbytes = chip->ecc.bytes;
 	int eccsteps = chip->ecc.steps;
@@ -3318,7 +3315,6 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 
 /**
  * nand_read_page_hwecc_oob_first - [REPLACEABLE] hw ecc, read oob first
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
  * @oob_required: caller requires OOB data read to chip->oob_poi
@@ -3330,9 +3326,10 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * multiple ECC steps, follows the "infix ECC" scheme and reads/writes ECC from
  * the data area, by overwriting the NAND manufacturer bad block markings.
  */
-static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
-	struct nand_chip *chip, uint8_t *buf, int oob_required, int page)
+static int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf,
+					  int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int i, eccsize = chip->ecc.size, ret;
 	int eccbytes = chip->ecc.bytes;
 	int eccsteps = chip->ecc.steps;
@@ -3388,7 +3385,6 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
 
 /**
  * nand_read_page_syndrome - [REPLACEABLE] hardware ECC syndrome based page read
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
  * @oob_required: caller requires OOB data read to chip->oob_poi
@@ -3397,9 +3393,10 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
  * The hw generator calculates the error syndrome automatically. Therefore we
  * need a special oob layout and handling.
  */
-static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-				   uint8_t *buf, int oob_required, int page)
+static int nand_read_page_syndrome(struct nand_chip *chip, uint8_t *buf,
+				   int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret, i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
 	int eccsteps = chip->ecc.steps;
@@ -3610,16 +3607,15 @@ read_retry:
 			 * the read methods return max bitflips per ecc step.
 			 */
 			if (unlikely(ops->mode == MTD_OPS_RAW))
-				ret = chip->ecc.read_page_raw(mtd, chip, bufpoi,
+				ret = chip->ecc.read_page_raw(chip, bufpoi,
 							      oob_required,
 							      page);
 			else if (!aligned && NAND_HAS_SUBPAGE_READ(chip) &&
 				 !oob)
-				ret = chip->ecc.read_subpage(mtd, chip,
-							col, bytes, bufpoi,
-							page);
+				ret = chip->ecc.read_subpage(chip, col, bytes,
+							     bufpoi, page);
 			else
-				ret = chip->ecc.read_page(mtd, chip, bufpoi,
+				ret = chip->ecc.read_page(chip, bufpoi,
 							  oob_required, page);
 			if (ret < 0) {
 				if (use_bufpoi)
@@ -3723,12 +3719,13 @@ read_retry:
 
 /**
  * nand_read_oob_std - [REPLACEABLE] the most common OOB data read function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @page: page number to read
  */
-int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page)
+int nand_read_oob_std(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize);
 }
 EXPORT_SYMBOL(nand_read_oob_std);
@@ -3736,13 +3733,12 @@ EXPORT_SYMBOL(nand_read_oob_std);
 /**
  * nand_read_oob_syndrome - [REPLACEABLE] OOB data read function for HW ECC
  *			    with syndromes
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @page: page number to read
  */
-int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-			   int page)
+int nand_read_oob_syndrome(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int length = mtd->oobsize;
 	int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad;
 	int eccsize = chip->ecc.size;
@@ -3913,9 +3909,9 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 
 	while (1) {
 		if (ops->mode == MTD_OPS_RAW)
-			ret = chip->ecc.read_oob_raw(mtd, chip, page);
+			ret = chip->ecc.read_oob_raw(chip, page);
 		else
-			ret = chip->ecc.read_oob(mtd, chip, page);
+			ret = chip->ecc.read_oob(chip, page);
 
 		if (ret < 0)
 			break;
diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c
index f5dc0a7a2456..d83a86ba9d09 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -290,10 +290,10 @@ static int micron_nand_on_die_ecc_status_8(struct nand_chip *chip, u8 status)
 }
 
 static int
-micron_nand_read_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
-				 uint8_t *buf, int oob_required,
-				 int page)
+micron_nand_read_page_on_die_ecc(struct nand_chip *chip, uint8_t *buf,
+				 int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	u8 status;
 	int ret, max_bitflips = 0;
 
diff --git a/drivers/mtd/nand/raw/nand_toshiba.c b/drivers/mtd/nand/raw/nand_toshiba.c
index 8aec3fa6c5d9..952fe9e62ab4 100644
--- a/drivers/mtd/nand/raw/nand_toshiba.c
+++ b/drivers/mtd/nand/raw/nand_toshiba.c
@@ -48,13 +48,13 @@ static int toshiba_nand_benand_eccstatus(struct mtd_info *mtd,
 }
 
 static int
-toshiba_nand_read_page_benand(struct mtd_info *mtd,
-			      struct nand_chip *chip, uint8_t *buf,
+toshiba_nand_read_page_benand(struct nand_chip *chip, uint8_t *buf,
 			      int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
-	ret = nand_read_page_raw(mtd, chip, buf, oob_required, page);
+	ret = nand_read_page_raw(chip, buf, oob_required, page);
 	if (ret)
 		return ret;
 
@@ -62,10 +62,10 @@ toshiba_nand_read_page_benand(struct mtd_info *mtd,
 }
 
 static int
-toshiba_nand_read_subpage_benand(struct mtd_info *mtd,
-				 struct nand_chip *chip, uint32_t data_offs,
+toshiba_nand_read_subpage_benand(struct nand_chip *chip, uint32_t data_offs,
 				 uint32_t readlen, uint8_t *bufpoi, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
 	ret = nand_read_page_op(chip, page, data_offs,
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 4e0bc2da63fd..dfe96098f3f6 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -1616,7 +1616,6 @@ static int omap_write_subpage_bch(struct mtd_info *mtd,
 
 /**
  * omap_read_page_bch - BCH ecc based page read function for entire page
- * @mtd:		mtd info structure
  * @chip:		nand chip info structure
  * @buf:		buffer to store read data
  * @oob_required:	caller requires OOB data read to chip->oob_poi
@@ -1629,9 +1628,10 @@ static int omap_write_subpage_bch(struct mtd_info *mtd,
  * ecc engine enabled. ecc vector updated after read of OOB data.
  * For non error pages ecc vector reported as zero.
  */
-static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int omap_read_page_bch(struct nand_chip *chip, uint8_t *buf,
+			      int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	uint8_t *ecc_calc = chip->ecc.calc_buf;
 	uint8_t *ecc_code = chip->ecc.code_buf;
 	int stat, ret;
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 312cfd786b0f..49113d4cee10 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -1948,8 +1948,8 @@ static int copy_last_cw(struct qcom_nand_host *host, int page)
 }
 
 /* implements ecc->read_page() */
-static int qcom_nandc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int qcom_nandc_read_page(struct nand_chip *chip, uint8_t *buf,
+				int oob_required, int page)
 {
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
@@ -1965,10 +1965,10 @@ static int qcom_nandc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 /* implements ecc->read_page_raw() */
-static int qcom_nandc_read_page_raw(struct mtd_info *mtd,
-				    struct nand_chip *chip, uint8_t *buf,
+static int qcom_nandc_read_page_raw(struct nand_chip *chip, uint8_t *buf,
 				    int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	int cw, ret;
@@ -1988,8 +1988,7 @@ static int qcom_nandc_read_page_raw(struct mtd_info *mtd,
 }
 
 /* implements ecc->read_oob() */
-static int qcom_nandc_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			       int page)
+static int qcom_nandc_read_oob(struct nand_chip *chip, int page)
 {
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 7673aa140009..aa5516b3b45f 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -521,9 +521,10 @@ exit:
  * This is copy of nand_read_oob_std
  * nand_read_oob_syndrome assumes we can send column address - we can't
  */
-static int r852_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			     int page)
+static int r852_read_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize);
 }
 
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index 2580fd981077..fb5df6099d7b 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -611,9 +611,11 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va
 	writel(flcmcdr_val, FLCMCDR(flctl));
 }
 
-static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int flctl_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
+				 int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
 	if (oob_required)
 		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index e31ab86bebee..26d5c6c41c49 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1189,10 +1189,10 @@ static void sunxi_nfc_hw_ecc_write_extra_oob(struct mtd_info *mtd,
 		*cur_off = mtd->oobsize + mtd->writesize;
 }
 
-static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd,
-				      struct nand_chip *chip, uint8_t *buf,
+static int sunxi_nfc_hw_ecc_read_page(struct nand_chip *chip, uint8_t *buf,
 				      int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	unsigned int max_bitflips = 0;
 	int ret, i, cur_off = 0;
@@ -1227,10 +1227,10 @@ static int sunxi_nfc_hw_ecc_read_page(struct mtd_info *mtd,
 	return max_bitflips;
 }
 
-static int sunxi_nfc_hw_ecc_read_page_dma(struct mtd_info *mtd,
-					  struct nand_chip *chip, u8 *buf,
+static int sunxi_nfc_hw_ecc_read_page_dma(struct nand_chip *chip, u8 *buf,
 					  int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
 	nand_read_page_op(chip, page, 0, NULL, 0);
@@ -1241,14 +1241,14 @@ static int sunxi_nfc_hw_ecc_read_page_dma(struct mtd_info *mtd,
 		return ret;
 
 	/* Fallback to PIO mode */
-	return sunxi_nfc_hw_ecc_read_page(mtd, chip, buf, oob_required, page);
+	return sunxi_nfc_hw_ecc_read_page(chip, buf, oob_required, page);
 }
 
-static int sunxi_nfc_hw_ecc_read_subpage(struct mtd_info *mtd,
-					 struct nand_chip *chip,
+static int sunxi_nfc_hw_ecc_read_subpage(struct nand_chip *chip,
 					 u32 data_offs, u32 readlen,
 					 u8 *bufpoi, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	int ret, i, cur_off = 0;
 	unsigned int max_bitflips = 0;
@@ -1278,11 +1278,11 @@ static int sunxi_nfc_hw_ecc_read_subpage(struct mtd_info *mtd,
 	return max_bitflips;
 }
 
-static int sunxi_nfc_hw_ecc_read_subpage_dma(struct mtd_info *mtd,
-					     struct nand_chip *chip,
+static int sunxi_nfc_hw_ecc_read_subpage_dma(struct nand_chip *chip,
 					     u32 data_offs, u32 readlen,
 					     u8 *buf, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int nchunks = DIV_ROUND_UP(data_offs + readlen, chip->ecc.size);
 	int ret;
 
@@ -1293,7 +1293,7 @@ static int sunxi_nfc_hw_ecc_read_subpage_dma(struct mtd_info *mtd,
 		return ret;
 
 	/* Fallback to PIO mode */
-	return sunxi_nfc_hw_ecc_read_subpage(mtd, chip, data_offs, readlen,
+	return sunxi_nfc_hw_ecc_read_subpage(chip, data_offs, readlen,
 					     buf, page);
 }
 
@@ -1428,13 +1428,11 @@ pio_fallback:
 	return sunxi_nfc_hw_ecc_write_page(mtd, chip, buf, oob_required, page);
 }
 
-static int sunxi_nfc_hw_ecc_read_oob(struct mtd_info *mtd,
-				     struct nand_chip *chip,
-				     int page)
+static int sunxi_nfc_hw_ecc_read_oob(struct nand_chip *chip, int page)
 {
 	chip->pagebuf = -1;
 
-	return chip->ecc.read_page(mtd, chip, chip->data_buf, 1, page);
+	return chip->ecc.read_page(chip, chip->data_buf, 1, page);
 }
 
 static int sunxi_nfc_hw_ecc_write_oob(struct mtd_info *mtd,
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index 1061eb60ee60..c53d47159195 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -277,14 +277,15 @@ dma_unmap:
 	return err;
 }
 
-static int tango_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			   u8 *buf, int oob_required, int page)
+static int tango_read_page(struct nand_chip *chip, u8 *buf,
+			   int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
 	int err, res, len = mtd->writesize;
 
 	if (oob_required)
-		chip->ecc.read_oob(mtd, chip, page);
+		chip->ecc.read_oob(chip, page);
 
 	err = do_dma(nfc, DMA_FROM_DEVICE, NFC_READ, buf, len, page);
 	if (err)
@@ -292,7 +293,7 @@ static int tango_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 
 	res = decode_error_report(chip);
 	if (res < 0) {
-		chip->ecc.read_oob_raw(mtd, chip, page);
+		chip->ecc.read_oob_raw(chip, page);
 		res = check_erased_page(chip, buf);
 	}
 
@@ -424,8 +425,8 @@ static void raw_write(struct nand_chip *chip, const u8 *buf, const u8 *oob)
 	aux_write(chip, &oob, ecc_size, &pos);
 }
 
-static int tango_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			       u8 *buf, int oob_required, int page)
+static int tango_read_page_raw(struct nand_chip *chip, u8 *buf,
+			       int oob_required, int page)
 {
 	nand_read_page_op(chip, page, 0, NULL, 0);
 	raw_read(chip, buf, chip->oob_poi);
@@ -440,8 +441,7 @@ static int tango_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	return nand_prog_page_end_op(chip);
 }
 
-static int tango_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			  int page)
+static int tango_read_oob(struct nand_chip *chip, int page)
 {
 	nand_read_page_op(chip, page, 0, NULL, 0);
 	raw_read(chip, NULL, chip->oob_poi);
diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c
index 5dcee20e2a8c..bcc3a2888c4f 100644
--- a/drivers/mtd/nand/raw/tegra_nand.c
+++ b/drivers/mtd/nand/raw/tegra_nand.c
@@ -615,10 +615,10 @@ err_unmap_dma_page:
 	return ret;
 }
 
-static int tegra_nand_read_page_raw(struct mtd_info *mtd,
-				    struct nand_chip *chip, u8 *buf,
+static int tegra_nand_read_page_raw(struct nand_chip *chip, u8 *buf,
 				    int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	void *oob_buf = oob_required ? chip->oob_poi : NULL;
 
 	return tegra_nand_page_xfer(mtd, chip, buf, oob_buf,
@@ -635,9 +635,10 @@ static int tegra_nand_write_page_raw(struct mtd_info *mtd,
 				     mtd->oobsize, page, false);
 }
 
-static int tegra_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			       int page)
+static int tegra_nand_read_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return tegra_nand_page_xfer(mtd, chip, NULL, chip->oob_poi,
 				    mtd->oobsize, page, true);
 }
@@ -649,10 +650,10 @@ static int tegra_nand_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
 				    mtd->oobsize, page, false);
 }
 
-static int tegra_nand_read_page_hwecc(struct mtd_info *mtd,
-				      struct nand_chip *chip, u8 *buf,
+static int tegra_nand_read_page_hwecc(struct nand_chip *chip, u8 *buf,
 				      int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct tegra_nand_controller *ctrl = to_tegra_ctrl(chip->controller);
 	struct tegra_nand_chip *nand = to_tegra_chip(chip);
 	void *oob_buf = oob_required ? chip->oob_poi : NULL;
@@ -716,7 +717,7 @@ static int tegra_nand_read_page_hwecc(struct mtd_info *mtd,
 		 * erased or if error correction just failed for all sub-
 		 * pages.
 		 */
-		ret = tegra_nand_read_oob(mtd, chip, page);
+		ret = tegra_nand_read_oob(chip, page);
 		if (ret < 0)
 			return ret;
 
diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c
index a73213c835a5..7cbcc41cea95 100644
--- a/drivers/mtd/nand/raw/vf610_nfc.c
+++ b/drivers/mtd/nand/raw/vf610_nfc.c
@@ -557,9 +557,10 @@ static void vf610_nfc_fill_row(struct nand_chip *chip, int page, u32 *code,
 	}
 }
 
-static int vf610_nfc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int oob_required, int page)
+static int vf610_nfc_read_page(struct nand_chip *chip, uint8_t *buf,
+			       int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 	int trfr_sz = mtd->writesize + mtd->oobsize;
 	u32 row = 0, cmd1 = 0, cmd2 = 0, code = 0;
@@ -643,15 +644,15 @@ static int vf610_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
-static int vf610_nfc_read_page_raw(struct mtd_info *mtd,
-				   struct nand_chip *chip, u8 *buf,
+static int vf610_nfc_read_page_raw(struct nand_chip *chip, u8 *buf,
 				   int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 	int ret;
 
 	nfc->data_access = true;
-	ret = nand_read_page_raw(mtd, chip, buf, oob_required, page);
+	ret = nand_read_page_raw(chip, buf, oob_required, page);
 	nfc->data_access = false;
 
 	return ret;
@@ -677,14 +678,13 @@ static int vf610_nfc_write_page_raw(struct mtd_info *mtd,
 	return nand_prog_page_end_op(chip);
 }
 
-static int vf610_nfc_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			      int page)
+static int vf610_nfc_read_oob(struct nand_chip *chip, int page)
 {
-	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+	struct vf610_nfc *nfc = mtd_to_nfc(nand_to_mtd(chip));
 	int ret;
 
 	nfc->data_access = true;
-	ret = nand_read_oob_std(mtd, chip, page);
+	ret = nand_read_oob_std(chip, page);
 	nfc->data_access = false;
 
 	return ret;
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index b50788b2d1d9..0776d38d4498 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -643,14 +643,15 @@ static int spinand_write_page_hwecc(struct mtd_info *mtd,
 	return nand_prog_page_op(chip, page, 0, p, eccsize * eccsteps);
 }
 
-static int spinand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				   u8 *buf, int oob_required, int page)
+static int spinand_read_page_hwecc(struct nand_chip *chip, u8 *buf,
+				   int oob_required, int page)
 {
 	int retval;
 	u8 status;
 	u8 *p = buf;
 	int eccsize = chip->ecc.size;
 	int eccsteps = chip->ecc.steps;
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct spinand_info *info = nand_get_controller_data(chip);
 
 	enable_read_hw_ecc = 1;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 24434310d126..a5f4a585f749 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -652,14 +652,14 @@ struct nand_ecc_ctrl {
 			 uint8_t *ecc_code);
 	int (*correct)(struct nand_chip *chip, uint8_t *dat, uint8_t *read_ecc,
 		       uint8_t *calc_ecc);
-	int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint8_t *buf, int oob_required, int page);
+	int (*read_page_raw)(struct nand_chip *chip, uint8_t *buf,
+			     int oob_required, int page);
 	int (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			const uint8_t *buf, int oob_required, int page);
-	int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint8_t *buf, int oob_required, int page);
-	int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offs, uint32_t len, uint8_t *buf, int page);
+	int (*read_page)(struct nand_chip *chip, uint8_t *buf,
+			 int oob_required, int page);
+	int (*read_subpage)(struct nand_chip *chip, uint32_t offs,
+			    uint32_t len, uint8_t *buf, int page);
 	int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip,
 			uint32_t offset, uint32_t data_len,
 			const uint8_t *data_buf, int oob_required, int page);
@@ -667,9 +667,8 @@ struct nand_ecc_ctrl {
 			const uint8_t *buf, int oob_required, int page);
 	int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
-	int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			int page);
-	int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page);
+	int (*read_oob_raw)(struct nand_chip *chip, int page);
+	int (*read_oob)(struct nand_chip *chip, int page);
 	int (*write_oob)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
 };
@@ -1676,11 +1675,10 @@ int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 			    int page);
 
 /* Default read_oob implementation */
-int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
+int nand_read_oob_std(struct nand_chip *chip, int page);
 
 /* Default read_oob syndrome implementation */
-int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-			   int page);
+int nand_read_oob_syndrome(struct nand_chip *chip, int page);
 
 /* Wrapper to use in order for controllers/vendors to GET/SET FEATURES */
 int nand_get_features(struct nand_chip *chip, int addr, u8 *subfeature_param);
@@ -1690,10 +1688,10 @@ int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
 				  int addr, u8 *subfeature_param);
 
 /* Default read_page_raw implementation */
-int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-		       uint8_t *buf, int oob_required, int page);
-int nand_read_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
-			       u8 *buf, int oob_required, int page);
+int nand_read_page_raw(struct nand_chip *chip, uint8_t *buf, int oob_required,
+		       int page);
+int nand_read_page_raw_notsupp(struct nand_chip *chip, u8 *buf,
+			       int oob_required, int page);
 
 /* Default write_page_raw implementation */
 int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From 767eb6fbdedb7d8b9c7a87d640a8bc8091eba002 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:21 +0200
Subject: mtd: rawnand: Pass a nand_chip object to ecc->write_xxx() hooks

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle all ecc->write_xxx() hooks at once.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c  | 12 ++---
 drivers/mtd/nand/raw/brcmnand/brcmnand.c      | 21 ++++----
 drivers/mtd/nand/raw/cafe_nand.c              | 13 ++---
 drivers/mtd/nand/raw/denali.c                 | 14 ++---
 drivers/mtd/nand/raw/docg4.c                  | 17 +++---
 drivers/mtd/nand/raw/fsl_elbc_nand.c          | 14 +++--
 drivers/mtd/nand/raw/fsl_ifc_nand.c           |  6 ++-
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c    | 21 ++++----
 drivers/mtd/nand/raw/hisi504_nand.c           |  8 +--
 drivers/mtd/nand/raw/lpc32xx_mlc.c            |  7 ++-
 drivers/mtd/nand/raw/lpc32xx_slc.c            | 14 ++---
 drivers/mtd/nand/raw/marvell_nand.c           | 32 ++++++------
 drivers/mtd/nand/raw/mtk_nand.c               | 19 ++++---
 drivers/mtd/nand/raw/mxc_nand.c               | 13 +++--
 drivers/mtd/nand/raw/nand_base.c              | 74 ++++++++++++---------------
 drivers/mtd/nand/raw/nand_ecc.c               |  2 +-
 drivers/mtd/nand/raw/nand_micron.c            |  7 ++-
 drivers/mtd/nand/raw/omap2.c                  | 11 ++--
 drivers/mtd/nand/raw/qcom_nandc.c             | 15 +++---
 drivers/mtd/nand/raw/sh_flctl.c               |  7 +--
 drivers/mtd/nand/raw/sunxi_nand.c             | 21 ++++----
 drivers/mtd/nand/raw/tango_nand.c             | 12 ++---
 drivers/mtd/nand/raw/tegra_nand.c             | 13 ++---
 drivers/mtd/nand/raw/vf610_nfc.c              | 13 ++---
 drivers/staging/mt29f_spinand/mt29f_spinand.c |  3 +-
 include/linux/mtd/rawnand.h                   | 33 ++++++------
 26 files changed, 208 insertions(+), 214 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 45061b591346..3ebe9b727315 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -858,15 +858,13 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf,
 	return nand_prog_page_end_op(chip);
 }
 
-static int atmel_nand_pmecc_write_page(struct mtd_info *mtd,
-				       struct nand_chip *chip, const u8 *buf,
+static int atmel_nand_pmecc_write_page(struct nand_chip *chip, const u8 *buf,
 				       int oob_required, int page)
 {
 	return atmel_nand_pmecc_write_pg(chip, buf, oob_required, page, false);
 }
 
-static int atmel_nand_pmecc_write_page_raw(struct mtd_info *mtd,
-					   struct nand_chip *chip,
+static int atmel_nand_pmecc_write_page_raw(struct nand_chip *chip,
 					   const u8 *buf, int oob_required,
 					   int page)
 {
@@ -963,8 +961,7 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
 	return ret;
 }
 
-static int atmel_hsmc_nand_pmecc_write_page(struct mtd_info *mtd,
-					    struct nand_chip *chip,
+static int atmel_hsmc_nand_pmecc_write_page(struct nand_chip *chip,
 					    const u8 *buf, int oob_required,
 					    int page)
 {
@@ -972,8 +969,7 @@ static int atmel_hsmc_nand_pmecc_write_page(struct mtd_info *mtd,
 					      false);
 }
 
-static int atmel_hsmc_nand_pmecc_write_page_raw(struct mtd_info *mtd,
-						struct nand_chip *chip,
+static int atmel_hsmc_nand_pmecc_write_page_raw(struct nand_chip *chip,
 						const u8 *buf,
 						int oob_required, int page)
 {
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index a17ae692aee9..d8fb2b5c19c9 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -1909,9 +1909,10 @@ out:
 	return ret;
 }
 
-static int brcmnand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required, int page)
+static int brcmnand_write_page(struct nand_chip *chip, const uint8_t *buf,
+			       int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	void *oob = oob_required ? chip->oob_poi : NULL;
 
@@ -1921,10 +1922,10 @@ static int brcmnand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	return nand_prog_page_end_op(chip);
 }
 
-static int brcmnand_write_page_raw(struct mtd_info *mtd,
-				   struct nand_chip *chip, const uint8_t *buf,
+static int brcmnand_write_page_raw(struct nand_chip *chip, const uint8_t *buf,
 				   int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	void *oob = oob_required ? chip->oob_poi : NULL;
 
@@ -1936,16 +1937,16 @@ static int brcmnand_write_page_raw(struct mtd_info *mtd,
 	return nand_prog_page_end_op(chip);
 }
 
-static int brcmnand_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-				  int page)
+static int brcmnand_write_oob(struct nand_chip *chip, int page)
 {
-	return brcmnand_write(mtd, chip, (u64)page << chip->page_shift,
-				  NULL, chip->oob_poi);
+	return brcmnand_write(nand_to_mtd(chip), chip,
+			      (u64)page << chip->page_shift, NULL,
+			      chip->oob_poi);
 }
 
-static int brcmnand_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				  int page)
+static int brcmnand_write_oob_raw(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	int ret;
 
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index c6071d71cc1b..fe7c7db3cfe7 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -346,9 +346,10 @@ static irqreturn_t cafe_nand_interrupt(int irq, void *id)
 	return IRQ_HANDLED;
 }
 
-static int cafe_nand_write_oob(struct mtd_info *mtd,
-			       struct nand_chip *chip, int page)
+static int cafe_nand_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return nand_prog_page_op(chip, page, mtd->writesize, chip->oob_poi,
 				 mtd->oobsize);
 }
@@ -533,11 +534,11 @@ static struct nand_bbt_descr cafe_bbt_mirror_descr_512 = {
 };
 
 
-static int cafe_nand_write_page_lowlevel(struct mtd_info *mtd,
-					  struct nand_chip *chip,
-					  const uint8_t *buf, int oob_required,
-					  int page)
+static int cafe_nand_write_page_lowlevel(struct nand_chip *chip,
+					 const uint8_t *buf, int oob_required,
+					 int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 994921814d76..52fe5115ed6e 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -761,9 +761,9 @@ static int denali_read_oob(struct nand_chip *chip, int page)
 	return 0;
 }
 
-static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			    int page)
+static int denali_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
 	denali_reset_irq(denali);
@@ -806,9 +806,10 @@ static int denali_read_page(struct nand_chip *chip, uint8_t *buf,
 	return stat;
 }
 
-static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				 const uint8_t *buf, int oob_required, int page)
+static int denali_write_page_raw(struct nand_chip *chip, const uint8_t *buf,
+				 int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	int writesize = mtd->writesize;
 	int oobsize = mtd->oobsize;
@@ -884,9 +885,10 @@ static int denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
 	return denali_data_xfer(denali, tmp_buf, size, page, 1, 1);
 }
 
-static int denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			     const uint8_t *buf, int oob_required, int page)
+static int denali_write_page(struct nand_chip *chip, const uint8_t *buf,
+			     int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
 	return denali_data_xfer(denali, (void *)buf, mtd->writesize,
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index ebaa479ffcb2..37935fd04020 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -1007,20 +1007,19 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *nand,
 	return nand_prog_page_end_op(nand);
 }
 
-static int docg4_write_page_raw(struct mtd_info *mtd, struct nand_chip *nand,
-				const uint8_t *buf, int oob_required, int page)
+static int docg4_write_page_raw(struct nand_chip *nand, const uint8_t *buf,
+				int oob_required, int page)
 {
-	return write_page(mtd, nand, buf, page, false);
+	return write_page(nand_to_mtd(nand), nand, buf, page, false);
 }
 
-static int docg4_write_page(struct mtd_info *mtd, struct nand_chip *nand,
-			     const uint8_t *buf, int oob_required, int page)
+static int docg4_write_page(struct nand_chip *nand, const uint8_t *buf,
+			    int oob_required, int page)
 {
-	return write_page(mtd, nand, buf, page, true);
+	return write_page(nand_to_mtd(nand), nand, buf, page, true);
 }
 
-static int docg4_write_oob(struct mtd_info *mtd, struct nand_chip *nand,
-			   int page)
+static int docg4_write_oob(struct nand_chip *nand, int page)
 {
 	/*
 	 * Writing oob-only is not really supported, because MLC nand must write
@@ -1144,7 +1143,7 @@ static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)
 
 	/* write first page of block */
 	write_page_prologue(mtd, g4_addr);
-	docg4_write_page(mtd, nand, buf, 1, page);
+	docg4_write_page(nand, buf, 1, page);
 	ret = pageprog(mtd);
 
 	kfree(buf);
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 26fcb8ea0c2e..c992d7ad39d9 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -731,9 +731,11 @@ static int fsl_elbc_read_page(struct nand_chip *chip, uint8_t *buf,
 /* ECC will be calculated automatically, and errors will be detected in
  * waitfunc.
  */
-static int fsl_elbc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required, int page)
+static int fsl_elbc_write_page(struct nand_chip *chip, const uint8_t *buf,
+			       int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
 	fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
 
@@ -743,10 +745,12 @@ static int fsl_elbc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 /* ECC will be calculated automatically, and errors will be detected in
  * waitfunc.
  */
-static int fsl_elbc_write_subpage(struct mtd_info *mtd, struct nand_chip *chip,
-				uint32_t offset, uint32_t data_len,
-				const uint8_t *buf, int oob_required, int page)
+static int fsl_elbc_write_subpage(struct nand_chip *chip, uint32_t offset,
+				  uint32_t data_len, const uint8_t *buf,
+				  int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	nand_prog_page_begin_op(chip, page, 0, NULL, 0);
 	fsl_elbc_write_buf(mtd, buf, mtd->writesize);
 	fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 8c6016932aaa..945f3dab7ebf 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -707,9 +707,11 @@ static int fsl_ifc_read_page(struct nand_chip *chip, uint8_t *buf,
 /* ECC will be calculated automatically, and errors will be detected in
  * waitfunc.
  */
-static int fsl_ifc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required, int page)
+static int fsl_ifc_write_page(struct nand_chip *chip, const uint8_t *buf,
+			      int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
 	fsl_ifc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
 
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 5650ebf28903..09f33f6006a3 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1182,9 +1182,10 @@ static int gpmi_ecc_read_subpage(struct nand_chip *chip, uint32_t offs,
 	return max_bitflips;
 }
 
-static int gpmi_ecc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required, int page)
+static int gpmi_ecc_write_page(struct nand_chip *chip, const uint8_t *buf,
+			       int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	struct bch_geometry *nfc_geo = &this->bch_geometry;
 	const void *payload_virt;
@@ -1351,9 +1352,9 @@ static int gpmi_ecc_read_oob(struct nand_chip *chip, int page)
 	return 0;
 }
 
-static int
-gpmi_ecc_write_oob(struct mtd_info *mtd, struct nand_chip *chip, int page)
+static int gpmi_ecc_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct mtd_oob_region of = { };
 
 	/* Do we have available oob area? */
@@ -1464,11 +1465,10 @@ static int gpmi_ecc_read_page_raw(struct nand_chip *chip, uint8_t *buf,
  * See set_geometry_by_ecc_info inline comments to have a full description
  * of the layout used by the GPMI controller.
  */
-static int gpmi_ecc_write_page_raw(struct mtd_info *mtd,
-				   struct nand_chip *chip,
-				   const uint8_t *buf,
+static int gpmi_ecc_write_page_raw(struct nand_chip *chip, const uint8_t *buf,
 				   int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	struct bch_geometry *nfc_geo = &this->bch_geometry;
 	int eccsize = nfc_geo->ecc_chunk_size;
@@ -1541,10 +1541,9 @@ static int gpmi_ecc_read_oob_raw(struct nand_chip *chip, int page)
 	return gpmi_ecc_read_page_raw(chip, NULL, 1, page);
 }
 
-static int gpmi_ecc_write_oob_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				 int page)
+static int gpmi_ecc_write_oob_raw(struct nand_chip *chip, int page)
 {
-	return gpmi_ecc_write_page_raw(mtd, chip, NULL, 1, page);
+	return gpmi_ecc_write_page_raw(chip, NULL, 1, page);
 }
 
 static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs)
@@ -1715,7 +1714,7 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this)
 		/* Write the first page of the current stride. */
 		dev_dbg(dev, "Writing an NCB fingerprint in page 0x%x\n", page);
 
-		status = chip->ecc.write_page_raw(mtd, chip, buffer, 0, page);
+		status = chip->ecc.write_page_raw(chip, buffer, 0, page);
 		if (status)
 			dev_err(dev, "[%s] Write failed.\n", __func__);
 	}
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index f4078086c14c..fab3c7fcf77b 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -577,10 +577,12 @@ static int hisi_nand_read_oob(struct nand_chip *chip, int page)
 	return 0;
 }
 
-static int hisi_nand_write_page_hwecc(struct mtd_info *mtd,
-		struct nand_chip *chip, const uint8_t *buf, int oob_required,
-		int page)
+static int hisi_nand_write_page_hwecc(struct nand_chip *chip,
+				      const uint8_t *buf, int oob_required,
+				      int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
 	if (oob_required)
 		chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 1849e9858d45..79a02acb0517 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -508,11 +508,11 @@ static int lpc32xx_read_page(struct nand_chip *chip, uint8_t *buf,
 	return 0;
 }
 
-static int lpc32xx_write_page_lowlevel(struct mtd_info *mtd,
-				       struct nand_chip *chip,
+static int lpc32xx_write_page_lowlevel(struct nand_chip *chip,
 				       const uint8_t *buf, int oob_required,
 				       int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 	const uint8_t *oobbuf = chip->oob_poi;
 	uint8_t *dma_buf = (uint8_t *)buf;
@@ -568,8 +568,7 @@ static int lpc32xx_read_oob(struct nand_chip *chip, int page)
 	return 0;
 }
 
-static int lpc32xx_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			      int page)
+static int lpc32xx_write_oob(struct nand_chip *chip, int page)
 {
 	/* None, write_oob conflicts with the automatic LPC MLC ECC decoder! */
 	return 0;
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index a9cb089923be..6e4017ddacad 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -406,9 +406,10 @@ static int lpc32xx_nand_read_oob_syndrome(struct nand_chip *chip, int page)
 /*
  * Write the OOB data to the device without ECC using FIFO method
  */
-static int lpc32xx_nand_write_oob_syndrome(struct mtd_info *mtd,
-	struct nand_chip *chip, int page)
+static int lpc32xx_nand_write_oob_syndrome(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return nand_prog_page_op(chip, page, mtd->writesize, chip->oob_poi,
 				 mtd->oobsize);
 }
@@ -678,11 +679,11 @@ static int lpc32xx_nand_read_page_raw_syndrome(struct nand_chip *chip,
  * Write the data and OOB data to the device, use ECC with the data,
  * disable ECC for the OOB data
  */
-static int lpc32xx_nand_write_page_syndrome(struct mtd_info *mtd,
-					    struct nand_chip *chip,
+static int lpc32xx_nand_write_page_syndrome(struct nand_chip *chip,
 					    const uint8_t *buf,
 					    int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 	struct mtd_oob_region oobregion = { };
 	uint8_t *pb;
@@ -716,11 +717,12 @@ static int lpc32xx_nand_write_page_syndrome(struct mtd_info *mtd,
  * Write the data and OOB data to the device, no ECC correction with the
  * data or OOB data
  */
-static int lpc32xx_nand_write_page_raw_syndrome(struct mtd_info *mtd,
-						struct nand_chip *chip,
+static int lpc32xx_nand_write_page_raw_syndrome(struct nand_chip *chip,
 						const uint8_t *buf,
 						int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	/* Raw writes can just use the FIFO interface */
 	nand_prog_page_begin_op(chip, page, 0, buf,
 				chip->ecc.size * chip->ecc.steps);
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index a81018f3a2f4..5f5709c2e58e 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -1136,8 +1136,7 @@ static int marvell_nfc_hw_ecc_hmg_do_write_page(struct nand_chip *chip,
 	return ret;
 }
 
-static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct mtd_info *mtd,
-						 struct nand_chip *chip,
+static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct nand_chip *chip,
 						 const u8 *buf,
 						 int oob_required, int page)
 {
@@ -1145,8 +1144,7 @@ static int marvell_nfc_hw_ecc_hmg_write_page_raw(struct mtd_info *mtd,
 						    true, page);
 }
 
-static int marvell_nfc_hw_ecc_hmg_write_page(struct mtd_info *mtd,
-					     struct nand_chip *chip,
+static int marvell_nfc_hw_ecc_hmg_write_page(struct nand_chip *chip,
 					     const u8 *buf,
 					     int oob_required, int page)
 {
@@ -1165,10 +1163,11 @@ static int marvell_nfc_hw_ecc_hmg_write_page(struct mtd_info *mtd,
  * it appears before the ECC bytes when reading), the ->write_oob_raw() function
  * also stands for ->write_oob().
  */
-static int marvell_nfc_hw_ecc_hmg_write_oob_raw(struct mtd_info *mtd,
-						struct nand_chip *chip,
+static int marvell_nfc_hw_ecc_hmg_write_oob_raw(struct nand_chip *chip,
 						int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	/* Invalidate page cache */
 	chip->pagebuf = -1;
 
@@ -1405,8 +1404,7 @@ static int marvell_nfc_hw_ecc_bch_read_oob(struct nand_chip *chip, int page)
 }
 
 /* BCH write helpers */
-static int marvell_nfc_hw_ecc_bch_write_page_raw(struct mtd_info *mtd,
-						 struct nand_chip *chip,
+static int marvell_nfc_hw_ecc_bch_write_page_raw(struct nand_chip *chip,
 						 const u8 *buf,
 						 int oob_required, int page)
 {
@@ -1519,11 +1517,11 @@ marvell_nfc_hw_ecc_bch_write_chunk(struct nand_chip *chip, int chunk,
 	return 0;
 }
 
-static int marvell_nfc_hw_ecc_bch_write_page(struct mtd_info *mtd,
-					     struct nand_chip *chip,
+static int marvell_nfc_hw_ecc_bch_write_page(struct nand_chip *chip,
 					     const u8 *buf,
 					     int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	const struct marvell_hw_ecc_layout *lt = to_marvell_nand(chip)->layout;
 	const u8 *data = buf;
 	const u8 *spare = chip->oob_poi;
@@ -1568,27 +1566,29 @@ static int marvell_nfc_hw_ecc_bch_write_page(struct mtd_info *mtd,
 	return 0;
 }
 
-static int marvell_nfc_hw_ecc_bch_write_oob_raw(struct mtd_info *mtd,
-						struct nand_chip *chip,
+static int marvell_nfc_hw_ecc_bch_write_oob_raw(struct nand_chip *chip,
 						int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	/* Invalidate page cache */
 	chip->pagebuf = -1;
 
 	memset(chip->data_buf, 0xFF, mtd->writesize);
 
-	return chip->ecc.write_page_raw(mtd, chip, chip->data_buf, true, page);
+	return chip->ecc.write_page_raw(chip, chip->data_buf, true, page);
 }
 
-static int marvell_nfc_hw_ecc_bch_write_oob(struct mtd_info *mtd,
-					    struct nand_chip *chip, int page)
+static int marvell_nfc_hw_ecc_bch_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	/* Invalidate page cache */
 	chip->pagebuf = -1;
 
 	memset(chip->data_buf, 0xFF, mtd->writesize);
 
-	return chip->ecc.write_page(mtd, chip, chip->data_buf, true, page);
+	return chip->ecc.write_page(chip, chip->data_buf, true, page);
 }
 
 /* NAND framework ->exec_op() hooks and related helpers */
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 32d5b59eb879..c338a9646433 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -807,27 +807,27 @@ static int mtk_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 	return nand_prog_page_end_op(chip);
 }
 
-static int mtk_nfc_write_page_hwecc(struct mtd_info *mtd,
-				    struct nand_chip *chip, const u8 *buf,
+static int mtk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
 				    int oob_on, int page)
 {
-	return mtk_nfc_write_page(mtd, chip, buf, page, 0);
+	return mtk_nfc_write_page(nand_to_mtd(chip), chip, buf, page, 0);
 }
 
-static int mtk_nfc_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				  const u8 *buf, int oob_on, int pg)
+static int mtk_nfc_write_page_raw(struct nand_chip *chip, const u8 *buf,
+				  int oob_on, int pg)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct mtk_nfc *nfc = nand_get_controller_data(chip);
 
 	mtk_nfc_format_page(mtd, buf);
 	return mtk_nfc_write_page(mtd, chip, nfc->buffer, pg, 1);
 }
 
-static int mtk_nfc_write_subpage_hwecc(struct mtd_info *mtd,
-				       struct nand_chip *chip, u32 offset,
+static int mtk_nfc_write_subpage_hwecc(struct nand_chip *chip, u32 offset,
 				       u32 data_len, const u8 *buf,
 				       int oob_on, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct mtk_nfc *nfc = nand_get_controller_data(chip);
 	int ret;
 
@@ -839,10 +839,9 @@ static int mtk_nfc_write_subpage_hwecc(struct mtd_info *mtd,
 	return mtk_nfc_write_page(mtd, chip, nfc->buffer, page, 1);
 }
 
-static int mtk_nfc_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip,
-				 int page)
+static int mtk_nfc_write_oob_std(struct nand_chip *chip, int page)
 {
-	return mtk_nfc_write_page_raw(mtd, chip, NULL, 1, page);
+	return mtk_nfc_write_page_raw(chip, NULL, 1, page);
 }
 
 static int mtk_nfc_update_ecc_stats(struct mtd_info *mtd, u8 *buf, u32 sectors)
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 35fcec595c3e..597c74ea7e5e 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -873,22 +873,21 @@ static int mxc_nand_write_page(struct nand_chip *chip, const uint8_t *buf,
 	return 0;
 }
 
-static int mxc_nand_write_page_ecc(struct mtd_info *mtd, struct nand_chip *chip,
-				   const uint8_t *buf, int oob_required,
-				   int page)
+static int mxc_nand_write_page_ecc(struct nand_chip *chip, const uint8_t *buf,
+				   int oob_required, int page)
 {
 	return mxc_nand_write_page(chip, buf, true, page);
 }
 
-static int mxc_nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				   const uint8_t *buf, int oob_required, int page)
+static int mxc_nand_write_page_raw(struct nand_chip *chip, const uint8_t *buf,
+				   int oob_required, int page)
 {
 	return mxc_nand_write_page(chip, buf, false, page);
 }
 
-static int mxc_nand_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			      int page)
+static int mxc_nand_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct mxc_nand_host *host = nand_get_controller_data(chip);
 
 	memset(host->data_buf, 0xff, mtd->writesize);
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index e1f60c841348..cc386ee64a1b 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3787,12 +3787,13 @@ EXPORT_SYMBOL(nand_read_oob_syndrome);
 
 /**
  * nand_write_oob_std - [REPLACEABLE] the most common OOB data write function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @page: page number to write
  */
-int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page)
+int nand_write_oob_std(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return nand_prog_page_op(chip, page, mtd->writesize, chip->oob_poi,
 				 mtd->oobsize);
 }
@@ -3801,13 +3802,12 @@ EXPORT_SYMBOL(nand_write_oob_std);
 /**
  * nand_write_oob_syndrome - [REPLACEABLE] OOB data write function for HW ECC
  *			     with syndrome - only for large page flash
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @page: page number to write
  */
-int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-			    int page)
+int nand_write_oob_syndrome(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad;
 	int eccsize = chip->ecc.size, length = mtd->oobsize;
 	int ret, i, len, pos, sndcmd = 0, steps = chip->ecc.steps;
@@ -3984,7 +3984,6 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
 
 /**
  * nand_write_page_raw_notsupp - dummy raw page write function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
@@ -3992,8 +3991,8 @@ static int nand_read_oob(struct mtd_info *mtd, loff_t from,
  *
  * Returns -ENOTSUPP unconditionally.
  */
-int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
-				const u8 *buf, int oob_required, int page)
+int nand_write_page_raw_notsupp(struct nand_chip *chip, const u8 *buf,
+				int oob_required, int page)
 {
 	return -ENOTSUPP;
 }
@@ -4001,7 +4000,6 @@ EXPORT_SYMBOL(nand_write_page_raw_notsupp);
 
 /**
  * nand_write_page_raw - [INTERN] raw page write function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
@@ -4009,9 +4007,10 @@ EXPORT_SYMBOL(nand_write_page_raw_notsupp);
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
-int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int oob_required, int page)
+int nand_write_page_raw(struct nand_chip *chip, const uint8_t *buf,
+			int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
 	ret = nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
@@ -4031,7 +4030,6 @@ EXPORT_SYMBOL(nand_write_page_raw);
 
 /**
  * nand_write_page_raw_syndrome - [INTERN] raw page write function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
@@ -4039,11 +4037,11 @@ EXPORT_SYMBOL(nand_write_page_raw);
  *
  * We need a special oob layout and handling even when ECC isn't checked.
  */
-static int nand_write_page_raw_syndrome(struct mtd_info *mtd,
-					struct nand_chip *chip,
+static int nand_write_page_raw_syndrome(struct nand_chip *chip,
 					const uint8_t *buf, int oob_required,
 					int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
 	uint8_t *oob = chip->oob_poi;
@@ -4096,16 +4094,15 @@ static int nand_write_page_raw_syndrome(struct mtd_info *mtd,
 }
 /**
  * nand_write_page_swecc - [REPLACEABLE] software ECC based page write function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
  * @page: page number to write
  */
-static int nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
-				 const uint8_t *buf, int oob_required,
-				 int page)
+static int nand_write_page_swecc(struct nand_chip *chip, const uint8_t *buf,
+				 int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int i, eccsize = chip->ecc.size, ret;
 	int eccbytes = chip->ecc.bytes;
 	int eccsteps = chip->ecc.steps;
@@ -4121,21 +4118,20 @@ static int nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	if (ret)
 		return ret;
 
-	return chip->ecc.write_page_raw(mtd, chip, buf, 1, page);
+	return chip->ecc.write_page_raw(chip, buf, 1, page);
 }
 
 /**
  * nand_write_page_hwecc - [REPLACEABLE] hardware ECC based page write function
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
  * @page: page number to write
  */
-static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				  const uint8_t *buf, int oob_required,
-				  int page)
+static int nand_write_page_hwecc(struct nand_chip *chip, const uint8_t *buf,
+				 int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int i, eccsize = chip->ecc.size, ret;
 	int eccbytes = chip->ecc.bytes;
 	int eccsteps = chip->ecc.steps;
@@ -4171,7 +4167,6 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 
 /**
  * nand_write_subpage_hwecc - [REPLACEABLE] hardware ECC based subpage write
- * @mtd:	mtd info structure
  * @chip:	nand chip info structure
  * @offset:	column address of subpage within the page
  * @data_len:	data length
@@ -4179,11 +4174,11 @@ static int nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * @oob_required: must write chip->oob_poi to OOB
  * @page: page number to write
  */
-static int nand_write_subpage_hwecc(struct mtd_info *mtd,
-				struct nand_chip *chip, uint32_t offset,
-				uint32_t data_len, const uint8_t *buf,
-				int oob_required, int page)
+static int nand_write_subpage_hwecc(struct nand_chip *chip, uint32_t offset,
+				    uint32_t data_len, const uint8_t *buf,
+				    int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	uint8_t *oob_buf  = chip->oob_poi;
 	uint8_t *ecc_calc = chip->ecc.calc_buf;
 	int ecc_size      = chip->ecc.size;
@@ -4242,7 +4237,6 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd,
 
 /**
  * nand_write_page_syndrome - [REPLACEABLE] hardware ECC syndrome based page write
- * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
  * @oob_required: must write chip->oob_poi to OOB
@@ -4251,11 +4245,10 @@ static int nand_write_subpage_hwecc(struct mtd_info *mtd,
  * The hw generator calculates the error syndrome automatically. Therefore we
  * need a special oob layout and handling.
  */
-static int nand_write_page_syndrome(struct mtd_info *mtd,
-				    struct nand_chip *chip,
-				    const uint8_t *buf, int oob_required,
-				    int page)
+static int nand_write_page_syndrome(struct nand_chip *chip, const uint8_t *buf,
+				    int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
 	int eccsteps = chip->ecc.steps;
@@ -4336,14 +4329,13 @@ static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 		subpage = 0;
 
 	if (unlikely(raw))
-		status = chip->ecc.write_page_raw(mtd, chip, buf,
-						  oob_required, page);
+		status = chip->ecc.write_page_raw(chip, buf, oob_required,
+						  page);
 	else if (subpage)
-		status = chip->ecc.write_subpage(mtd, chip, offset, data_len,
-						 buf, oob_required, page);
+		status = chip->ecc.write_subpage(chip, offset, data_len, buf,
+						 oob_required, page);
 	else
-		status = chip->ecc.write_page(mtd, chip, buf, oob_required,
-					      page);
+		status = chip->ecc.write_page(chip, buf, oob_required, page);
 
 	if (status < 0)
 		return status;
@@ -4610,9 +4602,9 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 	nand_fill_oob(mtd, ops->oobbuf, ops->ooblen, ops);
 
 	if (ops->mode == MTD_OPS_RAW)
-		status = chip->ecc.write_oob_raw(mtd, chip, page & chip->pagemask);
+		status = chip->ecc.write_oob_raw(chip, page & chip->pagemask);
 	else
-		status = chip->ecc.write_oob(mtd, chip, page & chip->pagemask);
+		status = chip->ecc.write_oob(chip, page & chip->pagemask);
 
 	chip->select_chip(mtd, -1);
 
diff --git a/drivers/mtd/nand/raw/nand_ecc.c b/drivers/mtd/nand/raw/nand_ecc.c
index 8f86eed40b70..93df8e73f577 100644
--- a/drivers/mtd/nand/raw/nand_ecc.c
+++ b/drivers/mtd/nand/raw/nand_ecc.c
@@ -394,7 +394,7 @@ EXPORT_SYMBOL(__nand_calculate_ecc);
 /**
  * nand_calculate_ecc - [NAND Interface] Calculate 3-byte ECC for 256/512-byte
  *			 block
- * @mtd:	MTD block structure
+ * @chip:	NAND chip object
  * @buf:	input buffer with raw data
  * @code:	output buffer with ECC
  */
diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c
index d83a86ba9d09..2f26dbeb5428 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -332,9 +332,8 @@ out:
 }
 
 static int
-micron_nand_write_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
-				  const uint8_t *buf, int oob_required,
-				  int page)
+micron_nand_write_page_on_die_ecc(struct nand_chip *chip, const uint8_t *buf,
+				  int oob_required, int page)
 {
 	int ret;
 
@@ -342,7 +341,7 @@ micron_nand_write_page_on_die_ecc(struct mtd_info *mtd, struct nand_chip *chip,
 	if (ret)
 		return ret;
 
-	ret = nand_write_page_raw(mtd, chip, buf, oob_required, page);
+	ret = nand_write_page_raw(chip, buf, oob_required, page);
 	micron_nand_on_die_ecc_setup(chip, false);
 
 	return ret;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index dfe96098f3f6..f1f8b6c1d654 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -1511,7 +1511,6 @@ static int omap_elm_correct_data(struct nand_chip *chip, u_char *data,
 
 /**
  * omap_write_page_bch - BCH ecc based write page function for entire page
- * @mtd:		mtd info structure
  * @chip:		nand chip info structure
  * @buf:		data buffer
  * @oob_required:	must write chip->oob_poi to OOB
@@ -1519,9 +1518,10 @@ static int omap_elm_correct_data(struct nand_chip *chip, u_char *data,
  *
  * Custom write page method evolved to support multi sector writing in one shot
  */
-static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
-			       const uint8_t *buf, int oob_required, int page)
+static int omap_write_page_bch(struct nand_chip *chip, const uint8_t *buf,
+			       int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 	uint8_t *ecc_calc = chip->ecc.calc_buf;
 
@@ -1549,7 +1549,6 @@ static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
 
 /**
  * omap_write_subpage_bch - BCH hardware ECC based subpage write
- * @mtd:	mtd info structure
  * @chip:	nand chip info structure
  * @offset:	column address of subpage within the page
  * @data_len:	data length
@@ -1559,11 +1558,11 @@ static int omap_write_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
  *
  * OMAP optimized subpage write method.
  */
-static int omap_write_subpage_bch(struct mtd_info *mtd,
-				  struct nand_chip *chip, u32 offset,
+static int omap_write_subpage_bch(struct nand_chip *chip, u32 offset,
 				  u32 data_len, const u8 *buf,
 				  int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	u8 *ecc_calc = chip->ecc.calc_buf;
 	int ecc_size      = chip->ecc.size;
 	int ecc_bytes     = chip->ecc.bytes;
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 49113d4cee10..e0cec027572c 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2005,8 +2005,8 @@ static int qcom_nandc_read_oob(struct nand_chip *chip, int page)
 }
 
 /* implements ecc->write_page() */
-static int qcom_nandc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				 const uint8_t *buf, int oob_required, int page)
+static int qcom_nandc_write_page(struct nand_chip *chip, const uint8_t *buf,
+				 int oob_required, int page)
 {
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
@@ -2075,10 +2075,11 @@ static int qcom_nandc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 /* implements ecc->write_page_raw() */
-static int qcom_nandc_write_page_raw(struct mtd_info *mtd,
-				     struct nand_chip *chip, const uint8_t *buf,
-				     int oob_required, int page)
+static int qcom_nandc_write_page_raw(struct nand_chip *chip,
+				     const uint8_t *buf, int oob_required,
+				     int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
@@ -2153,9 +2154,9 @@ static int qcom_nandc_write_page_raw(struct mtd_info *mtd,
  * since ECC is calculated for the combined codeword. So update the OOB from
  * chip->oob_poi, and pad the data area with OxFF before writing.
  */
-static int qcom_nandc_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-				int page)
+static int qcom_nandc_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index fb5df6099d7b..bb58edd2bdf0 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -622,10 +622,11 @@ static int flctl_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
 	return 0;
 }
 
-static int flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				  const uint8_t *buf, int oob_required,
-				  int page)
+static int flctl_write_page_hwecc(struct nand_chip *chip, const uint8_t *buf,
+				  int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
 	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
 	return nand_prog_page_end_op(chip);
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 26d5c6c41c49..86d666c0c03c 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1297,11 +1297,11 @@ static int sunxi_nfc_hw_ecc_read_subpage_dma(struct nand_chip *chip,
 					     buf, page);
 }
 
-static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd,
-				       struct nand_chip *chip,
+static int sunxi_nfc_hw_ecc_write_page(struct nand_chip *chip,
 				       const uint8_t *buf, int oob_required,
 				       int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	int ret, i, cur_off = 0;
 
@@ -1331,12 +1331,12 @@ static int sunxi_nfc_hw_ecc_write_page(struct mtd_info *mtd,
 	return nand_prog_page_end_op(chip);
 }
 
-static int sunxi_nfc_hw_ecc_write_subpage(struct mtd_info *mtd,
-					  struct nand_chip *chip,
+static int sunxi_nfc_hw_ecc_write_subpage(struct nand_chip *chip,
 					  u32 data_offs, u32 data_len,
 					  const u8 *buf, int oob_required,
 					  int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	int ret, i, cur_off = 0;
 
@@ -1363,12 +1363,12 @@ static int sunxi_nfc_hw_ecc_write_subpage(struct mtd_info *mtd,
 	return nand_prog_page_end_op(chip);
 }
 
-static int sunxi_nfc_hw_ecc_write_page_dma(struct mtd_info *mtd,
-					   struct nand_chip *chip,
+static int sunxi_nfc_hw_ecc_write_page_dma(struct nand_chip *chip,
 					   const u8 *buf,
 					   int oob_required,
 					   int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(nand->controller);
 	struct nand_ecc_ctrl *ecc = &nand->ecc;
@@ -1425,7 +1425,7 @@ static int sunxi_nfc_hw_ecc_write_page_dma(struct mtd_info *mtd,
 	return nand_prog_page_end_op(chip);
 
 pio_fallback:
-	return sunxi_nfc_hw_ecc_write_page(mtd, chip, buf, oob_required, page);
+	return sunxi_nfc_hw_ecc_write_page(chip, buf, oob_required, page);
 }
 
 static int sunxi_nfc_hw_ecc_read_oob(struct nand_chip *chip, int page)
@@ -1435,16 +1435,15 @@ static int sunxi_nfc_hw_ecc_read_oob(struct nand_chip *chip, int page)
 	return chip->ecc.read_page(chip, chip->data_buf, 1, page);
 }
 
-static int sunxi_nfc_hw_ecc_write_oob(struct mtd_info *mtd,
-				      struct nand_chip *chip,
-				      int page)
+static int sunxi_nfc_hw_ecc_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
 	chip->pagebuf = -1;
 
 	memset(chip->data_buf, 0xff, mtd->writesize);
-	ret = chip->ecc.write_page(mtd, chip, chip->data_buf, 1, page);
+	ret = chip->ecc.write_page(chip, chip->data_buf, 1, page);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index c53d47159195..7c8f47546002 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -300,9 +300,10 @@ static int tango_read_page(struct nand_chip *chip, u8 *buf,
 	return res;
 }
 
-static int tango_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			    const u8 *buf, int oob_required, int page)
+static int tango_write_page(struct nand_chip *chip, const u8 *buf,
+			    int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
 	int err, status, len = mtd->writesize;
 
@@ -433,8 +434,8 @@ static int tango_read_page_raw(struct nand_chip *chip, u8 *buf,
 	return 0;
 }
 
-static int tango_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				const u8 *buf, int oob_required, int page)
+static int tango_write_page_raw(struct nand_chip *chip, const u8 *buf,
+				int oob_required, int page)
 {
 	nand_prog_page_begin_op(chip, page, 0, NULL, 0);
 	raw_write(chip, buf, chip->oob_poi);
@@ -448,8 +449,7 @@ static int tango_read_oob(struct nand_chip *chip, int page)
 	return 0;
 }
 
-static int tango_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			   int page)
+static int tango_write_oob(struct nand_chip *chip, int page)
 {
 	nand_prog_page_begin_op(chip, page, 0, NULL, 0);
 	raw_write(chip, NULL, chip->oob_poi);
diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c
index bcc3a2888c4f..df8e78814a08 100644
--- a/drivers/mtd/nand/raw/tegra_nand.c
+++ b/drivers/mtd/nand/raw/tegra_nand.c
@@ -625,10 +625,10 @@ static int tegra_nand_read_page_raw(struct nand_chip *chip, u8 *buf,
 				    mtd->oobsize, page, true);
 }
 
-static int tegra_nand_write_page_raw(struct mtd_info *mtd,
-				     struct nand_chip *chip, const u8 *buf,
+static int tegra_nand_write_page_raw(struct nand_chip *chip, const u8 *buf,
 				     int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	void *oob_buf = oob_required ? chip->oob_poi : NULL;
 
 	return tegra_nand_page_xfer(mtd, chip, (void *)buf, oob_buf,
@@ -643,9 +643,10 @@ static int tegra_nand_read_oob(struct nand_chip *chip, int page)
 				    mtd->oobsize, page, true);
 }
 
-static int tegra_nand_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-				int page)
+static int tegra_nand_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	return tegra_nand_page_xfer(mtd, chip, NULL, chip->oob_poi,
 				    mtd->oobsize, page, false);
 }
@@ -760,10 +761,10 @@ static int tegra_nand_read_page_hwecc(struct nand_chip *chip, u8 *buf,
 	}
 }
 
-static int tegra_nand_write_page_hwecc(struct mtd_info *mtd,
-				       struct nand_chip *chip, const u8 *buf,
+static int tegra_nand_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
 				       int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct tegra_nand_controller *ctrl = to_tegra_ctrl(chip->controller);
 	void *oob_buf = oob_required ? chip->oob_poi : NULL;
 	int ret;
diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c
index 7cbcc41cea95..bce6f6769cd6 100644
--- a/drivers/mtd/nand/raw/vf610_nfc.c
+++ b/drivers/mtd/nand/raw/vf610_nfc.c
@@ -603,9 +603,10 @@ static int vf610_nfc_read_page(struct nand_chip *chip, uint8_t *buf,
 	}
 }
 
-static int vf610_nfc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int oob_required, int page)
+static int vf610_nfc_write_page(struct nand_chip *chip, const uint8_t *buf,
+				int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 	int trfr_sz = mtd->writesize + mtd->oobsize;
 	u32 row = 0, cmd1 = 0, cmd2 = 0, code = 0;
@@ -658,10 +659,10 @@ static int vf610_nfc_read_page_raw(struct nand_chip *chip, u8 *buf,
 	return ret;
 }
 
-static int vf610_nfc_write_page_raw(struct mtd_info *mtd,
-				    struct nand_chip *chip, const u8 *buf,
+static int vf610_nfc_write_page_raw(struct nand_chip *chip, const u8 *buf,
 				    int oob_required, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 	int ret;
 
@@ -690,9 +691,9 @@ static int vf610_nfc_read_oob(struct nand_chip *chip, int page)
 	return ret;
 }
 
-static int vf610_nfc_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			       int page)
+static int vf610_nfc_write_oob(struct nand_chip *chip, int page)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
 	int ret;
 
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 0776d38d4498..2b2f98efdb54 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -630,8 +630,7 @@ static int spinand_erase_block(struct spi_device *spi_nand, u16 block_id)
 }
 
 #ifdef CONFIG_MTD_SPINAND_ONDIEECC
-static int spinand_write_page_hwecc(struct mtd_info *mtd,
-				    struct nand_chip *chip,
+static int spinand_write_page_hwecc(struct nand_chip *chip,
 				    const u8 *buf, int oob_required,
 				    int page)
 {
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index a5f4a585f749..527947e81447 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -654,23 +654,21 @@ struct nand_ecc_ctrl {
 		       uint8_t *calc_ecc);
 	int (*read_page_raw)(struct nand_chip *chip, uint8_t *buf,
 			     int oob_required, int page);
-	int (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int oob_required, int page);
+	int (*write_page_raw)(struct nand_chip *chip, const uint8_t *buf,
+			      int oob_required, int page);
 	int (*read_page)(struct nand_chip *chip, uint8_t *buf,
 			 int oob_required, int page);
 	int (*read_subpage)(struct nand_chip *chip, uint32_t offs,
 			    uint32_t len, uint8_t *buf, int page);
-	int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint32_t offset, uint32_t data_len,
-			const uint8_t *data_buf, int oob_required, int page);
-	int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int oob_required, int page);
-	int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			int page);
+	int (*write_subpage)(struct nand_chip *chip, uint32_t offset,
+			     uint32_t data_len, const uint8_t *data_buf,
+			     int oob_required, int page);
+	int (*write_page)(struct nand_chip *chip, const uint8_t *buf,
+			  int oob_required, int page);
+	int (*write_oob_raw)(struct nand_chip *chip, int page);
 	int (*read_oob_raw)(struct nand_chip *chip, int page);
 	int (*read_oob)(struct nand_chip *chip, int page);
-	int (*write_oob)(struct mtd_info *mtd, struct nand_chip *chip,
-			int page);
+	int (*write_oob)(struct nand_chip *chip, int page);
 };
 
 /**
@@ -1668,11 +1666,10 @@ int nand_ecc_choose_conf(struct nand_chip *chip,
 			 const struct nand_ecc_caps *caps, int oobavail);
 
 /* Default write_oob implementation */
-int nand_write_oob_std(struct mtd_info *mtd, struct nand_chip *chip, int page);
+int nand_write_oob_std(struct nand_chip *chip, int page);
 
 /* Default write_oob syndrome implementation */
-int nand_write_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-			    int page);
+int nand_write_oob_syndrome(struct nand_chip *chip, int page);
 
 /* Default read_oob implementation */
 int nand_read_oob_std(struct nand_chip *chip, int page);
@@ -1694,10 +1691,10 @@ int nand_read_page_raw_notsupp(struct nand_chip *chip, u8 *buf,
 			       int oob_required, int page);
 
 /* Default write_page_raw implementation */
-int nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int oob_required, int page);
-int nand_write_page_raw_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
-				const u8 *buf, int oob_required, int page);
+int nand_write_page_raw(struct nand_chip *chip, const uint8_t *buf,
+			int oob_required, int page);
+int nand_write_page_raw_notsupp(struct nand_chip *chip, const u8 *buf,
+				int oob_required, int page);
 
 /* Reset and initialize a NAND device */
 int nand_reset(struct nand_chip *chip, int chipnr);
-- 
cgit v1.2.3


From 7e534323c416216e8ac45b5633fb0a5e5137e5b5 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:22 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->read_xxx() hooks

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle all chip->read_xxx() hooks at once.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/ams-delta.c                 |  7 ++--
 drivers/mtd/nand/raw/atmel/nand-controller.c     | 10 +++---
 drivers/mtd/nand/raw/au1550nd.c                  | 15 ++++-----
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c | 10 +++---
 drivers/mtd/nand/raw/brcmnand/brcmnand.c         |  7 ++--
 drivers/mtd/nand/raw/cafe_nand.c                 | 10 +++---
 drivers/mtd/nand/raw/cmx270_nand.c               |  7 ++--
 drivers/mtd/nand/raw/cs553x_nand.c               |  7 ++--
 drivers/mtd/nand/raw/davinci_nand.c              |  5 ++-
 drivers/mtd/nand/raw/denali.c                    | 11 ++++---
 drivers/mtd/nand/raw/diskonchip.c                | 32 +++++++-----------
 drivers/mtd/nand/raw/docg4.c                     | 13 +++-----
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |  8 ++---
 drivers/mtd/nand/raw/fsl_ifc_nand.c              | 13 +++-----
 drivers/mtd/nand/raw/fsl_upm.c                   |  8 ++---
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       | 16 ++++-----
 drivers/mtd/nand/raw/hisi504_nand.c              |  8 ++---
 drivers/mtd/nand/raw/lpc32xx_slc.c               | 12 +++----
 drivers/mtd/nand/raw/mpc5121_nfc.c               |  8 ++---
 drivers/mtd/nand/raw/mtk_nand.c                  |  7 ++--
 drivers/mtd/nand/raw/mxc_nand.c                  | 10 +++---
 drivers/mtd/nand/raw/nand_base.c                 | 41 ++++++++++--------------
 drivers/mtd/nand/raw/nandsim.c                   |  8 ++---
 drivers/mtd/nand/raw/ndfc.c                      |  3 +-
 drivers/mtd/nand/raw/nuc900_nand.c               |  8 ++---
 drivers/mtd/nand/raw/omap2.c                     | 24 ++++++++------
 drivers/mtd/nand/raw/orion_nand.c                |  3 +-
 drivers/mtd/nand/raw/oxnas_nand.c                |  6 ++--
 drivers/mtd/nand/raw/pasemi_nand.c               |  4 +--
 drivers/mtd/nand/raw/plat_nand.c                 | 11 +------
 drivers/mtd/nand/raw/qcom_nandc.c                |  6 ++--
 drivers/mtd/nand/raw/r852.c                      |  8 ++---
 drivers/mtd/nand/raw/s3c2410.c                   |  6 ++--
 drivers/mtd/nand/raw/sh_flctl.c                  | 10 +++---
 drivers/mtd/nand/raw/socrates_nand.c             | 10 +++---
 drivers/mtd/nand/raw/sunxi_nand.c                | 11 +++----
 drivers/mtd/nand/raw/tango_nand.c                | 12 +++----
 drivers/mtd/nand/raw/tmio_nand.c                 |  8 ++---
 drivers/mtd/nand/raw/txx9ndfmc.c                 |  8 ++---
 drivers/mtd/nand/raw/xway_nand.c                 |  8 ++---
 drivers/staging/mt29f_spinand/mt29f_spinand.c    | 10 +++---
 include/linux/mtd/rawnand.h                      |  4 +--
 42 files changed, 186 insertions(+), 247 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index acf7971e815d..eb48c939c4ae 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -75,10 +75,9 @@ static void ams_delta_write_byte(struct mtd_info *mtd, u_char byte)
 	gpio_set_value(AMS_DELTA_GPIO_PIN_NAND_NWE, 1);
 }
 
-static u_char ams_delta_read_byte(struct mtd_info *mtd)
+static u_char ams_delta_read_byte(struct nand_chip *this)
 {
 	u_char res;
-	struct nand_chip *this = mtd_to_nand(mtd);
 	void __iomem *io_base = (void __iomem *)nand_get_controller_data(this);
 
 	gpio_set_value(AMS_DELTA_GPIO_PIN_NAND_NRE, 0);
@@ -99,12 +98,12 @@ static void ams_delta_write_buf(struct mtd_info *mtd, const u_char *buf,
 		ams_delta_write_byte(mtd, buf[i]);
 }
 
-static void ams_delta_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void ams_delta_read_buf(struct nand_chip *this, u_char *buf, int len)
 {
 	int i;
 
 	for (i=0; i<len; i++)
-		buf[i] = ams_delta_read_byte(mtd);
+		buf[i] = ams_delta_read_byte(this);
 }
 
 /*
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 3ebe9b727315..371223e19dcc 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -410,9 +410,8 @@ err:
 	return -EIO;
 }
 
-static u8 atmel_nand_read_byte(struct mtd_info *mtd)
+static u8 atmel_nand_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 
 	return ioread8(nand->activecs->io.virt);
@@ -429,9 +428,8 @@ static void atmel_nand_write_byte(struct mtd_info *mtd, u8 byte)
 		iowrite8(byte, nand->activecs->io.virt);
 }
 
-static void atmel_nand_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+static void atmel_nand_read_buf(struct nand_chip *chip, u8 *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_nand_controller *nc;
 
@@ -883,8 +881,8 @@ static int atmel_nand_pmecc_read_pg(struct nand_chip *chip, u8 *buf,
 	if (ret)
 		return ret;
 
-	atmel_nand_read_buf(mtd, buf, mtd->writesize);
-	atmel_nand_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	atmel_nand_read_buf(chip, buf, mtd->writesize);
+	atmel_nand_read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	ret = atmel_nand_pmecc_correct_data(chip, buf, raw);
 
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index d277a141c7d3..76ea4141eb10 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -29,13 +29,12 @@ struct au1550nd_ctx {
 
 /**
  * au_read_byte -  read one byte from the chip
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  *
  * read function for 8bit buswidth
  */
-static u_char au_read_byte(struct mtd_info *mtd)
+static u_char au_read_byte(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	u_char ret = readb(this->IO_ADDR_R);
 	wmb(); /* drain writebuffer */
 	return ret;
@@ -57,13 +56,12 @@ static void au_write_byte(struct mtd_info *mtd, u_char byte)
 
 /**
  * au_read_byte16 -  read one byte endianness aware from the chip
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  *
  * read function for 16bit buswidth with endianness conversion
  */
-static u_char au_read_byte16(struct mtd_info *mtd)
+static u_char au_read_byte16(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	u_char ret = (u_char) cpu_to_le16(readw(this->IO_ADDR_R));
 	wmb(); /* drain writebuffer */
 	return ret;
@@ -104,16 +102,15 @@ static void au_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
 
 /**
  * au_read_buf -  read chip data into buffer
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @buf:	buffer to store date
  * @len:	number of bytes to read
  *
  * read function for 8bit buswidth
  */
-static void au_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void au_read_buf(struct nand_chip *this, u_char *buf, int len)
 {
 	int i;
-	struct nand_chip *this = mtd_to_nand(mtd);
 
 	for (i = 0; i < len; i++) {
 		buf[i] = readb(this->IO_ADDR_R);
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 7022ffd271ad..cf3e45358c60 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -310,9 +310,9 @@ static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct mtd_info *mtd,
 	b47n->curr_command = command;
 }
 
-static u8 bcm47xxnflash_ops_bcm4706_read_byte(struct mtd_info *mtd)
+static u8 bcm47xxnflash_ops_bcm4706_read_byte(struct nand_chip *nand_chip)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand_chip);
 	struct bcm47xxnflash *b47n = nand_get_controller_data(nand_chip);
 	struct bcma_drv_cc *cc = b47n->cc;
 	u32 tmp = 0;
@@ -338,16 +338,16 @@ static u8 bcm47xxnflash_ops_bcm4706_read_byte(struct mtd_info *mtd)
 	return 0;
 }
 
-static void bcm47xxnflash_ops_bcm4706_read_buf(struct mtd_info *mtd,
+static void bcm47xxnflash_ops_bcm4706_read_buf(struct nand_chip *nand_chip,
 					       uint8_t *buf, int len)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct bcm47xxnflash *b47n = nand_get_controller_data(nand_chip);
 
 	switch (b47n->curr_command) {
 	case NAND_CMD_READ0:
 	case NAND_CMD_READOOB:
-		bcm47xxnflash_ops_bcm4706_read(mtd, buf, len);
+		bcm47xxnflash_ops_bcm4706_read(nand_to_mtd(nand_chip), buf,
+					       len);
 		return;
 	}
 
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index d8fb2b5c19c9..7cbc6045f16d 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -1417,9 +1417,8 @@ static void brcmnand_cmdfunc(struct mtd_info *mtd, unsigned command,
 		brcmnand_wp(mtd, 1);
 }
 
-static uint8_t brcmnand_read_byte(struct mtd_info *mtd)
+static uint8_t brcmnand_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	struct brcmnand_controller *ctrl = host->ctrl;
 	uint8_t ret = 0;
@@ -1474,12 +1473,12 @@ static uint8_t brcmnand_read_byte(struct mtd_info *mtd)
 	return ret;
 }
 
-static void brcmnand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void brcmnand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++, buf++)
-		*buf = brcmnand_read_byte(mtd);
+		*buf = brcmnand_read_byte(chip);
 }
 
 static void brcmnand_write_buf(struct mtd_info *mtd, const uint8_t *buf,
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index fe7c7db3cfe7..97d835f88b86 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -133,9 +133,8 @@ static void cafe_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 		len, cafe->datalen);
 }
 
-static void cafe_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void cafe_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 
 	if (cafe->usedma)
@@ -148,13 +147,12 @@ static void cafe_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 	cafe->datalen += len;
 }
 
-static uint8_t cafe_read_byte(struct mtd_info *mtd)
+static uint8_t cafe_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 	uint8_t d;
 
-	cafe_read_buf(mtd, &d, 1);
+	cafe_read_buf(chip, &d, 1);
 	cafe_dev_dbg(&cafe->pdev->dev, "Read %02x\n", d);
 
 	return d;
@@ -383,7 +381,7 @@ static int cafe_nand_read_page(struct nand_chip *chip, uint8_t *buf,
 		     cafe_readl(cafe, NAND_ECC_SYN01));
 
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
-	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	if (checkecc && cafe_readl(cafe, NAND_ECC_RESULT) & (1<<18)) {
 		unsigned short syn[8], pat[4];
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index 2eb933a8f99e..232d32391b1f 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -49,10 +49,8 @@ static const struct mtd_partition partition_info[] = {
 };
 #define NUM_PARTITIONS (ARRAY_SIZE(partition_info))
 
-static u_char cmx270_read_byte(struct mtd_info *mtd)
+static u_char cmx270_read_byte(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
-
 	return (readl(this->IO_ADDR_R) >> 16);
 }
 
@@ -65,10 +63,9 @@ static void cmx270_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
 		writel((*buf++ << 16), this->IO_ADDR_W);
 }
 
-static void cmx270_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void cmx270_read_buf(struct nand_chip *this, u_char *buf, int len)
 {
 	int i;
-	struct nand_chip *this = mtd_to_nand(mtd);
 
 	for (i=0; i<len; i++)
 		*buf++ = readl(this->IO_ADDR_R) >> 16;
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 193c3e8fa118..4394eeebec7f 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -93,10 +93,8 @@
 #define CS_NAND_ECC_CLRECC	(1<<1)
 #define CS_NAND_ECC_ENECC	(1<<0)
 
-static void cs553x_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void cs553x_read_buf(struct nand_chip *this, u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
-
 	while (unlikely(len > 0x800)) {
 		memcpy_fromio(buf, this->IO_ADDR_R, 0x800);
 		buf += 0x800;
@@ -117,9 +115,8 @@ static void cs553x_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
 	memcpy_toio(this->IO_ADDR_R, buf, len);
 }
 
-static unsigned char cs553x_read_byte(struct mtd_info *mtd)
+static unsigned char cs553x_read_byte(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	return readb(this->IO_ADDR_R);
 }
 
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index c80b6c6da4aa..b879049e51c6 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -435,10 +435,9 @@ correct:
  * the two LSBs for NAND access ... so we can issue 32-bit reads/writes
  * and have that transparently morphed into multiple NAND operations.
  */
-static void nand_davinci_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void nand_davinci_read_buf(struct nand_chip *chip, uint8_t *buf,
+				  int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	if ((0x03 & ((uintptr_t)buf)) == 0 && (0x03 & len) == 0)
 		ioread32_rep(chip->IO_ADDR_R, buf, len >> 2);
 	else if ((0x01 & ((uintptr_t)buf)) == 0 && (0x01 & len) == 0)
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 52fe5115ed6e..6e5be3efcb5d 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -215,8 +215,9 @@ static uint32_t denali_check_irq(struct denali_nand_info *denali)
 	return irq_status;
 }
 
-static void denali_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void denali_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 	u32 addr = DENALI_MAP11_DATA | DENALI_BANK(denali);
 	int i;
@@ -235,9 +236,9 @@ static void denali_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 		denali->host_write(denali, addr, buf[i]);
 }
 
-static void denali_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
+static void denali_read_buf16(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 	u32 addr = DENALI_MAP11_DATA | DENALI_BANK(denali);
 	uint16_t *buf16 = (uint16_t *)buf;
 	int i;
@@ -258,11 +259,11 @@ static void denali_write_buf16(struct mtd_info *mtd, const uint8_t *buf,
 		denali->host_write(denali, addr, buf16[i]);
 }
 
-static uint8_t denali_read_byte(struct mtd_info *mtd)
+static uint8_t denali_read_byte(struct nand_chip *chip)
 {
 	uint8_t byte;
 
-	denali_read_buf(mtd, &byte, 1);
+	denali_read_buf(chip, &byte, 1);
 
 	return byte;
 }
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 142d21be874e..de1059069e8f 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -302,9 +302,8 @@ static void doc2000_write_byte(struct mtd_info *mtd, u_char datum)
 	WriteDOC(datum, docptr, 2k_CDSN_IO);
 }
 
-static u_char doc2000_read_byte(struct mtd_info *mtd)
+static u_char doc2000_read_byte(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	u_char ret;
@@ -334,9 +333,8 @@ static void doc2000_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
 		printk("\n");
 }
 
-static void doc2000_readbuf(struct mtd_info *mtd, u_char *buf, int len)
+static void doc2000_readbuf(struct nand_chip *this, u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int i;
@@ -344,14 +342,12 @@ static void doc2000_readbuf(struct mtd_info *mtd, u_char *buf, int len)
 	if (debug)
 		printk("readbuf of %d bytes: ", len);
 
-	for (i = 0; i < len; i++) {
+	for (i = 0; i < len; i++)
 		buf[i] = ReadDOC(docptr, 2k_CDSN_IO + i);
-	}
 }
 
-static void doc2000_readbuf_dword(struct mtd_info *mtd, u_char *buf, int len)
+static void doc2000_readbuf_dword(struct nand_chip *this, u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int i;
@@ -387,8 +383,8 @@ static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr)
 	 */
 	udelay(50);
 
-	ret = this->read_byte(mtd) << 8;
-	ret |= this->read_byte(mtd);
+	ret = this->read_byte(this) << 8;
+	ret |= this->read_byte(this);
 
 	if (doc->ChipID == DOC_ChipID_Doc2k && try_dword && !nr) {
 		/* First chip probe. See if we get same results by 32-bit access */
@@ -447,7 +443,7 @@ static int doc200x_wait(struct mtd_info *mtd, struct nand_chip *this)
 	DoC_WaitReady(doc);
 	nand_status_op(this, NULL);
 	DoC_WaitReady(doc);
-	status = (int)this->read_byte(mtd);
+	status = (int)this->read_byte(this);
 
 	return status;
 }
@@ -463,9 +459,8 @@ static void doc2001_write_byte(struct mtd_info *mtd, u_char datum)
 	WriteDOC(datum, docptr, WritePipeTerm);
 }
 
-static u_char doc2001_read_byte(struct mtd_info *mtd)
+static u_char doc2001_read_byte(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 
@@ -490,9 +485,8 @@ static void doc2001_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
 	WriteDOC(0x00, docptr, WritePipeTerm);
 }
 
-static void doc2001_readbuf(struct mtd_info *mtd, u_char *buf, int len)
+static void doc2001_readbuf(struct nand_chip *this, u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int i;
@@ -507,9 +501,8 @@ static void doc2001_readbuf(struct mtd_info *mtd, u_char *buf, int len)
 	buf[i] = ReadDOC(docptr, LastDataRead);
 }
 
-static u_char doc2001plus_read_byte(struct mtd_info *mtd)
+static u_char doc2001plus_read_byte(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	u_char ret;
@@ -540,9 +533,8 @@ static void doc2001plus_writebuf(struct mtd_info *mtd, const u_char *buf, int le
 		printk("\n");
 }
 
-static void doc2001plus_readbuf(struct mtd_info *mtd, u_char *buf, int len)
+static void doc2001plus_readbuf(struct nand_chip *this, u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int i;
@@ -735,7 +727,7 @@ static void doc2001plus_command(struct mtd_info *mtd, unsigned command, int colu
 		WriteDOC(NAND_CMD_STATUS, docptr, Mplus_FlashCmd);
 		WriteDOC(0, docptr, Mplus_WritePipeTerm);
 		WriteDOC(0, docptr, Mplus_WritePipeTerm);
-		while (!(this->read_byte(mtd) & 0x40)) ;
+		while (!(this->read_byte(this) & 0x40)) ;
 		return;
 
 		/* This applies to read commands */
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index 37935fd04020..284bc96dacd3 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -261,10 +261,9 @@ static inline void write_nop(void __iomem *docptr)
 	writew(0, docptr + DOC_NOP);
 }
 
-static void docg4_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void docg4_read_buf(struct nand_chip *nand, uint8_t *buf, int len)
 {
 	int i;
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	uint16_t *p = (uint16_t *) buf;
 	len >>= 1;
 
@@ -484,9 +483,8 @@ static int correct_data(struct mtd_info *mtd, uint8_t *buf, int page)
 	return numerrs;
 }
 
-static uint8_t docg4_read_byte(struct mtd_info *mtd)
+static uint8_t docg4_read_byte(struct nand_chip *nand)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct docg4_priv *doc = nand_get_controller_data(nand);
 
 	dev_dbg(doc->dev, "%s\n", __func__);
@@ -809,11 +807,11 @@ static int read_page(struct mtd_info *mtd, struct nand_chip *nand,
 
 	dev_dbg(doc->dev, "%s: status = 0x%x\n", __func__, status);
 
-	docg4_read_buf(mtd, buf, DOCG4_PAGE_SIZE); /* read the page data */
+	docg4_read_buf(nand, buf, DOCG4_PAGE_SIZE); /* read the page data */
 
 	/* this device always reads oob after page data */
 	/* first 14 oob bytes read from I/O reg */
-	docg4_read_buf(mtd, nand->oob_poi, 14);
+	docg4_read_buf(nand, nand->oob_poi, 14);
 
 	/* last 2 read from another reg */
 	buf16 = (uint16_t *)(nand->oob_poi + 14);
@@ -859,7 +857,6 @@ static int docg4_read_page(struct nand_chip *nand, uint8_t *buf,
 
 static int docg4_read_oob(struct nand_chip *nand, int page)
 {
-	struct mtd_info *mtd = nand_to_mtd(nand);
 	struct docg4_priv *doc = nand_get_controller_data(nand);
 	void __iomem *docptr = doc->virtadr;
 	uint16_t status;
@@ -885,7 +882,7 @@ static int docg4_read_oob(struct nand_chip *nand, int page)
 
 	dev_dbg(doc->dev, "%s: status = 0x%x\n", __func__, status);
 
-	docg4_read_buf(mtd, nand->oob_poi, 16);
+	docg4_read_buf(nand, nand->oob_poi, 16);
 
 	write_nop(docptr);
 	write_nop(docptr);
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index c992d7ad39d9..22326bcb8b62 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -581,9 +581,8 @@ static void fsl_elbc_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
  * read a byte from either the FCM hardware buffer if it has any data left
  * otherwise issue a command to read a single byte.
  */
-static u8 fsl_elbc_read_byte(struct mtd_info *mtd)
+static u8 fsl_elbc_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct fsl_elbc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_elbc_fcm_ctrl *elbc_fcm_ctrl = priv->ctrl->nand;
 
@@ -598,9 +597,8 @@ static u8 fsl_elbc_read_byte(struct mtd_info *mtd)
 /*
  * Read from the FCM Controller Data Buffer
  */
-static void fsl_elbc_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+static void fsl_elbc_read_buf(struct nand_chip *chip, u8 *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct fsl_elbc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_elbc_fcm_ctrl *elbc_fcm_ctrl = priv->ctrl->nand;
 	int avail;
@@ -720,7 +718,7 @@ static int fsl_elbc_read_page(struct nand_chip *chip, uint8_t *buf,
 
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
 	if (oob_required)
-		fsl_elbc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+		fsl_elbc_read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	if (fsl_elbc_wait(mtd, chip) & NAND_STATUS_FAIL)
 		mtd->ecc_stats.failed++;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 945f3dab7ebf..7b6d0913a5bb 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -545,9 +545,8 @@ static void fsl_ifc_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
  * Read a byte from either the IFC hardware buffer
  * read function for 8-bit buswidth
  */
-static uint8_t fsl_ifc_read_byte(struct mtd_info *mtd)
+static uint8_t fsl_ifc_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
 	unsigned int offset;
 
@@ -568,9 +567,8 @@ static uint8_t fsl_ifc_read_byte(struct mtd_info *mtd)
  * Read two bytes from the IFC hardware buffer
  * read function for 16-bit buswith
  */
-static uint8_t fsl_ifc_read_byte16(struct mtd_info *mtd)
+static uint8_t fsl_ifc_read_byte16(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
 	uint16_t data;
 
@@ -591,9 +589,8 @@ static uint8_t fsl_ifc_read_byte16(struct mtd_info *mtd)
 /*
  * Read from the IFC Controller Data Buffer
  */
-static void fsl_ifc_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+static void fsl_ifc_read_buf(struct nand_chip *chip, u8 *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
 	int avail;
 
@@ -689,11 +686,11 @@ static int fsl_ifc_read_page(struct nand_chip *chip, uint8_t *buf,
 
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
 	if (oob_required)
-		fsl_ifc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+		fsl_ifc_read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	if (ctrl->nand_stat & IFC_NAND_EVTER_STAT_ECCER) {
 		if (!oob_required)
-			fsl_ifc_read_buf(mtd, chip->oob_poi, mtd->oobsize);
+			fsl_ifc_read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 		return check_erased_page(chip, buf);
 	}
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index ffddfc9721ac..340547f1b6c7 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -124,16 +124,16 @@ static void fun_select_chip(struct mtd_info *mtd, int mchip_nr)
 	}
 }
 
-static uint8_t fun_read_byte(struct mtd_info *mtd)
+static uint8_t fun_read_byte(struct nand_chip *chip)
 {
-	struct fsl_upm_nand *fun = to_fsl_upm_nand(mtd);
+	struct fsl_upm_nand *fun = to_fsl_upm_nand(nand_to_mtd(chip));
 
 	return in_8(fun->chip.IO_ADDR_R);
 }
 
-static void fun_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void fun_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct fsl_upm_nand *fun = to_fsl_upm_nand(mtd);
+	struct fsl_upm_nand *fun = to_fsl_upm_nand(nand_to_mtd(chip));
 	int i;
 
 	for (i = 0; i < len; i++)
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 09f33f6006a3..d0d5caa1b7a6 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -859,9 +859,8 @@ static void gpmi_select_chip(struct mtd_info *mtd, int chipnr)
 	this->current_chip = chipnr;
 }
 
-static void gpmi_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void gpmi_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 
 	dev_dbg(this->dev, "len is %d\n", len);
@@ -879,13 +878,12 @@ static void gpmi_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 	gpmi_send_data(this, buf, len);
 }
 
-static uint8_t gpmi_read_byte(struct mtd_info *mtd)
+static uint8_t gpmi_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	uint8_t *buf = this->data_buffer_dma;
 
-	gpmi_read_buf(mtd, buf, 1);
+	gpmi_read_buf(chip, buf, 1);
 	return buf[0];
 }
 
@@ -1336,7 +1334,7 @@ static int gpmi_ecc_read_oob(struct nand_chip *chip, int page)
 
 	/* Read out the conventional OOB. */
 	nand_read_page_op(chip, page, mtd->writesize, NULL, 0);
-	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	/*
 	 * Now, we want to make sure the block mark is correct. In the
@@ -1346,7 +1344,7 @@ static int gpmi_ecc_read_oob(struct nand_chip *chip, int page)
 	if (GPMI_IS_MX23(this)) {
 		/* Read the block mark into the first byte of the OOB buffer. */
 		nand_read_page_op(chip, page, 0, NULL, 0);
-		chip->oob_poi[0] = chip->read_byte(mtd);
+		chip->oob_poi[0] = chip->read_byte(chip);
 	}
 
 	return 0;
@@ -1635,7 +1633,7 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this)
 		 * and starts in the 12th byte of the page.
 		 */
 		nand_read_page_op(chip, page, 12, NULL, 0);
-		chip->read_buf(mtd, buffer, strlen(fingerprint));
+		chip->read_buf(chip, buffer, strlen(fingerprint));
 
 		/* Look for the fingerprint. */
 		if (!memcmp(buffer, fingerprint, strlen(fingerprint))) {
@@ -1771,7 +1769,7 @@ static int mx23_boot_init(struct gpmi_nand_data  *this)
 		/* Send the command to read the conventional block mark. */
 		chip->select_chip(mtd, chipnr);
 		nand_read_page_op(chip, page, mtd->writesize, NULL, 0);
-		block_mark = chip->read_byte(mtd);
+		block_mark = chip->read_byte(chip);
 		chip->select_chip(mtd, -1);
 
 		/*
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index fab3c7fcf77b..e1fe6963c908 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -364,9 +364,8 @@ static void hisi_nfc_select_chip(struct mtd_info *mtd, int chipselect)
 	host->chipselect = chipselect;
 }
 
-static uint8_t hisi_nfc_read_byte(struct mtd_info *mtd)
+static uint8_t hisi_nfc_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct hinfc_host *host = nand_get_controller_data(chip);
 
 	if (host->command == NAND_CMD_STATUS)
@@ -390,9 +389,8 @@ hisi_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 	host->offset += len;
 }
 
-static void hisi_nfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void hisi_nfc_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct hinfc_host *host = nand_get_controller_data(chip);
 
 	memcpy(buf, host->buffer + host->offset, len);
@@ -537,7 +535,7 @@ static int hisi_nand_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
 	int stat_1, stat_2;
 
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
-	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	/* errors which can not be corrected by ECC */
 	if (host->irq_status & HINFC504_INTS_UE) {
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 6e4017ddacad..5820c86cb1f1 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -359,9 +359,8 @@ static int lpc32xx_nand_ecc_calculate(struct nand_chip *chip,
 /*
  * Read a single byte from NAND device
  */
-static uint8_t lpc32xx_nand_read_byte(struct mtd_info *mtd)
+static uint8_t lpc32xx_nand_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 
 	return (uint8_t)readl(SLC_DATA(host->io_base));
@@ -370,9 +369,8 @@ static uint8_t lpc32xx_nand_read_byte(struct mtd_info *mtd)
 /*
  * Simple device read without ECC
  */
-static void lpc32xx_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void lpc32xx_nand_read_buf(struct nand_chip *chip, u_char *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 
 	/* Direct device read with no ECC */
@@ -628,7 +626,7 @@ static int lpc32xx_nand_read_page_syndrome(struct nand_chip *chip, uint8_t *buf,
 	status = lpc32xx_xfer(mtd, buf, chip->ecc.steps, 1);
 
 	/* Get OOB data */
-	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	/* Convert to stored ECC format */
 	lpc32xx_slc_ecc_copy(tmpecc, (uint32_t *) host->ecc_buf, chip->ecc.steps);
@@ -669,8 +667,8 @@ static int lpc32xx_nand_read_page_raw_syndrome(struct nand_chip *chip,
 	nand_read_page_op(chip, page, 0, NULL, 0);
 
 	/* Raw reads can just use the FIFO interface */
-	chip->read_buf(mtd, buf, chip->ecc.size * chip->ecc.steps);
-	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->read_buf(chip, buf, chip->ecc.size * chip->ecc.steps);
+	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index 3c90d6955476..49031f5a3b6d 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -493,9 +493,9 @@ static void mpc5121_nfc_buf_copy(struct mtd_info *mtd, u_char *buf, int len,
 }
 
 /* Read data from NFC buffers */
-static void mpc5121_nfc_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void mpc5121_nfc_read_buf(struct nand_chip *chip, u_char *buf, int len)
 {
-	mpc5121_nfc_buf_copy(mtd, buf, len, 0);
+	mpc5121_nfc_buf_copy(nand_to_mtd(chip), buf, len, 0);
 }
 
 /* Write data to NFC buffers */
@@ -506,11 +506,11 @@ static void mpc5121_nfc_write_buf(struct mtd_info *mtd,
 }
 
 /* Read byte from NFC buffers */
-static u8 mpc5121_nfc_read_byte(struct mtd_info *mtd)
+static u8 mpc5121_nfc_read_byte(struct nand_chip *chip)
 {
 	u8 tmp;
 
-	mpc5121_nfc_read_buf(mtd, &tmp, sizeof(tmp));
+	mpc5121_nfc_read_buf(chip, &tmp, sizeof(tmp));
 
 	return tmp;
 }
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index c338a9646433..1c7392242d4d 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -438,9 +438,8 @@ static inline void mtk_nfc_wait_ioready(struct mtk_nfc *nfc)
 		dev_err(nfc->dev, "data not ready\n");
 }
 
-static inline u8 mtk_nfc_read_byte(struct mtd_info *mtd)
+static inline u8 mtk_nfc_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct mtk_nfc *nfc = nand_get_controller_data(chip);
 	u32 reg;
 
@@ -467,12 +466,12 @@ static inline u8 mtk_nfc_read_byte(struct mtd_info *mtd)
 	return nfi_readb(nfc, NFI_DATAR);
 }
 
-static void mtk_nfc_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+static void mtk_nfc_read_buf(struct nand_chip *chip, u8 *buf, int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++)
-		buf[i] = mtk_nfc_read_byte(mtd);
+		buf[i] = mtk_nfc_read_byte(chip);
 }
 
 static void mtk_nfc_write_byte(struct mtd_info *mtd, u8 byte)
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 597c74ea7e5e..8fed2919f35e 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -895,9 +895,8 @@ static int mxc_nand_write_oob(struct nand_chip *chip, int page)
 	return mxc_nand_write_page(chip, host->data_buf, false, page);
 }
 
-static u_char mxc_nand_read_byte(struct mtd_info *mtd)
+static u_char mxc_nand_read_byte(struct nand_chip *nand_chip)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
 	uint8_t ret;
 
@@ -941,9 +940,10 @@ static void mxc_nand_write_buf(struct mtd_info *mtd,
  * Flash first the data output cycle is initiated by the NFC, which copies
  * the data to RAMbuffer. This data of length len is then copied to buffer buf.
  */
-static void mxc_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void mxc_nand_read_buf(struct nand_chip *nand_chip, u_char *buf,
+			      int len)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand_chip);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
 	u16 col = host->buf_start;
 	int n = mtd->oobsize + mtd->writesize - col;
@@ -1429,7 +1429,7 @@ static int mxc_nand_get_features(struct mtd_info *mtd, struct nand_chip *chip,
 	host->buf_start = 0;
 
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
-		*subfeature_param++ = chip->read_byte(mtd);
+		*subfeature_param++ = chip->read_byte(chip);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index cc386ee64a1b..e4686078011d 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -254,26 +254,24 @@ static void nand_release_device(struct mtd_info *mtd)
 
 /**
  * nand_read_byte - [DEFAULT] read one byte from the chip
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  *
  * Default read function for 8bit buswidth
  */
-static uint8_t nand_read_byte(struct mtd_info *mtd)
+static uint8_t nand_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	return readb(chip->IO_ADDR_R);
 }
 
 /**
  * nand_read_byte16 - [DEFAULT] read one byte endianness aware from the chip
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  *
  * Default read function for 16bit buswidth with endianness conversion.
  *
  */
-static uint8_t nand_read_byte16(struct mtd_info *mtd)
+static uint8_t nand_read_byte16(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	return (uint8_t) cpu_to_le16(readw(chip->IO_ADDR_R));
 }
 
@@ -362,16 +360,14 @@ static void nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 
 /**
  * nand_read_buf - [DEFAULT] read chip data into buffer
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: buffer to store date
  * @len: number of bytes to read
  *
  * Default read function for 8bit buswidth.
  */
-static void nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	ioread8_rep(chip->IO_ADDR_R, buf, len);
 }
 
@@ -393,15 +389,14 @@ static void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len)
 
 /**
  * nand_read_buf16 - [DEFAULT] read chip data into buffer
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: buffer to store date
  * @len: number of bytes to read
  *
  * Default read function for 16bit buswidth.
  */
-static void nand_read_buf16(struct mtd_info *mtd, uint8_t *buf, int len)
+static void nand_read_buf16(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	u16 *p = (u16 *) buf;
 
 	ioread16_rep(chip->IO_ADDR_R, p, len >> 1);
@@ -1544,7 +1539,7 @@ int nand_read_page_op(struct nand_chip *chip, unsigned int page,
 
 	chip->cmdfunc(mtd, NAND_CMD_READ0, offset_in_page, page);
 	if (len)
-		chip->read_buf(mtd, buf, len);
+		chip->read_buf(chip, buf, len);
 
 	return 0;
 }
@@ -1593,7 +1588,7 @@ static int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf,
 
 	chip->cmdfunc(mtd, NAND_CMD_PARAM, page, -1);
 	for (i = 0; i < len; i++)
-		p[i] = chip->read_byte(mtd);
+		p[i] = chip->read_byte(chip);
 
 	return 0;
 }
@@ -1656,7 +1651,7 @@ int nand_change_read_column_op(struct nand_chip *chip,
 
 	chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset_in_page, -1);
 	if (len)
-		chip->read_buf(mtd, buf, len);
+		chip->read_buf(chip, buf, len);
 
 	return 0;
 }
@@ -1693,7 +1688,7 @@ int nand_read_oob_op(struct nand_chip *chip, unsigned int page,
 
 	chip->cmdfunc(mtd, NAND_CMD_READOOB, offset_in_oob, page);
 	if (len)
-		chip->read_buf(mtd, buf, len);
+		chip->read_buf(chip, buf, len);
 
 	return 0;
 }
@@ -2009,7 +2004,7 @@ int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf,
 	chip->cmdfunc(mtd, NAND_CMD_READID, addr, -1);
 
 	for (i = 0; i < len; i++)
-		id[i] = chip->read_byte(mtd);
+		id[i] = chip->read_byte(chip);
 
 	return 0;
 }
@@ -2048,7 +2043,7 @@ int nand_status_op(struct nand_chip *chip, u8 *status)
 
 	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
 	if (status)
-		*status = chip->read_byte(mtd);
+		*status = chip->read_byte(chip);
 
 	return 0;
 }
@@ -2229,7 +2224,7 @@ static int nand_get_features_op(struct nand_chip *chip, u8 feature,
 
 	chip->cmdfunc(mtd, NAND_CMD_GET_FEATURES, feature, -1);
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
-		params[i] = chip->read_byte(mtd);
+		params[i] = chip->read_byte(chip);
 
 	return 0;
 }
@@ -2304,8 +2299,6 @@ EXPORT_SYMBOL_GPL(nand_reset_op);
 int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
 		      bool force_8bit)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	if (!len || !buf)
 		return -EINVAL;
 
@@ -2325,9 +2318,9 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
 		unsigned int i;
 
 		for (i = 0; i < len; i++)
-			p[i] = chip->read_byte(mtd);
+			p[i] = chip->read_byte(chip);
 	} else {
-		chip->read_buf(mtd, buf, len);
+		chip->read_buf(chip, buf, len);
 	}
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index e9f7b9e1aead..04feb4e8d112 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -1872,9 +1872,8 @@ static void switch_state(struct nandsim *ns)
 	}
 }
 
-static u_char ns_nand_read_byte(struct mtd_info *mtd)
+static u_char ns_nand_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nandsim *ns = nand_get_controller_data(chip);
 	u_char outb = 0x00;
 
@@ -2136,9 +2135,8 @@ static void ns_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
 	}
 }
 
-static void ns_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void ns_nand_read_buf(struct nand_chip *chip, u_char *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nandsim *ns = nand_get_controller_data(chip);
 
 	/* Sanity and correctness checks */
@@ -2160,7 +2158,7 @@ static void ns_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
 		int i;
 
 		for (i = 0; i < len; i++)
-			buf[i] = mtd_to_nand(mtd)->read_byte(mtd);
+			buf[i] = chip->read_byte(chip);
 
 		return;
 	}
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 9241cfaab5ac..56bb1eaa5ef3 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -116,9 +116,8 @@ static int ndfc_calculate_ecc(struct nand_chip *chip,
  * functions. No further checking, as nand_base will always read/write
  * page aligned.
  */
-static void ndfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void ndfc_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct ndfc_controller *ndfc = nand_get_controller_data(chip);
 	uint32_t *p = (uint32_t *) buf;
 
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 0c675b6c0b6e..3a88da5ec97f 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -79,21 +79,21 @@ static const struct mtd_partition partitions[] = {
 	}
 };
 
-static unsigned char nuc900_nand_read_byte(struct mtd_info *mtd)
+static unsigned char nuc900_nand_read_byte(struct nand_chip *chip)
 {
 	unsigned char ret;
-	struct nuc900_nand *nand = mtd_to_nuc900(mtd);
+	struct nuc900_nand *nand = mtd_to_nuc900(nand_to_mtd(chip));
 
 	ret = (unsigned char)read_data_reg(nand);
 
 	return ret;
 }
 
-static void nuc900_nand_read_buf(struct mtd_info *mtd,
+static void nuc900_nand_read_buf(struct nand_chip *chip,
 				 unsigned char *buf, int len)
 {
 	int i;
-	struct nuc900_nand *nand = mtd_to_nuc900(mtd);
+	struct nuc900_nand *nand = mtd_to_nuc900(nand_to_mtd(chip));
 
 	for (i = 0; i < len; i++)
 		buf[i] = (unsigned char)read_data_reg(nand);
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index f1f8b6c1d654..4f2e5cd86050 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -337,12 +337,13 @@ static void omap_write_buf16(struct mtd_info *mtd, const u_char * buf, int len)
 
 /**
  * omap_read_buf_pref - read data from NAND controller into buffer
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: buffer to store date
  * @len: number of bytes to read
  */
-static void omap_read_buf_pref(struct mtd_info *mtd, u_char *buf, int len)
+static void omap_read_buf_pref(struct nand_chip *chip, u_char *buf, int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct omap_nand_info *info = mtd_to_omap(mtd);
 	uint32_t r_count = 0;
 	int ret = 0;
@@ -528,14 +529,17 @@ out_copy:
 
 /**
  * omap_read_buf_dma_pref - read data from NAND controller into buffer
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: buffer to store date
  * @len: number of bytes to read
  */
-static void omap_read_buf_dma_pref(struct mtd_info *mtd, u_char *buf, int len)
+static void omap_read_buf_dma_pref(struct nand_chip *chip, u_char *buf,
+				   int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	if (len <= mtd->oobsize)
-		omap_read_buf_pref(mtd, buf, len);
+		omap_read_buf_pref(chip, buf, len);
 	else
 		/* start transfer in DMA mode */
 		omap_nand_dma_transfer(mtd, buf, len, 0x0);
@@ -605,17 +609,19 @@ done:
 
 /*
  * omap_read_buf_irq_pref - read data from NAND controller into buffer
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: buffer to store date
  * @len: number of bytes to read
  */
-static void omap_read_buf_irq_pref(struct mtd_info *mtd, u_char *buf, int len)
+static void omap_read_buf_irq_pref(struct nand_chip *chip, u_char *buf,
+				   int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct omap_nand_info *info = mtd_to_omap(mtd);
 	int ret = 0;
 
 	if (len <= mtd->oobsize) {
-		omap_read_buf_pref(mtd, buf, len);
+		omap_read_buf_pref(chip, buf, len);
 		return;
 	}
 
@@ -1642,7 +1648,7 @@ static int omap_read_page_bch(struct nand_chip *chip, uint8_t *buf,
 	chip->ecc.hwctl(chip, NAND_ECC_READ);
 
 	/* Read data */
-	chip->read_buf(mtd, buf, mtd->writesize);
+	chip->read_buf(chip, buf, mtd->writesize);
 
 	/* Read oob bytes */
 	nand_change_read_column_op(chip,
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 5c58d91ffaee..870eabe6fff8 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -48,9 +48,8 @@ static void orion_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl
 	writeb(cmd, nc->IO_ADDR_W + offs);
 }
 
-static void orion_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void orion_nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	void __iomem *io_base = chip->IO_ADDR_R;
 #if defined(__LINUX_ARM_ARCH__) && __LINUX_ARM_ARCH__ >= 5
 	uint64_t *buf64;
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index 5bc180536320..6156abb30b7a 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -38,17 +38,15 @@ struct oxnas_nand_ctrl {
 	struct nand_chip *chips[OXNAS_NAND_MAX_CHIPS];
 };
 
-static uint8_t oxnas_nand_read_byte(struct mtd_info *mtd)
+static uint8_t oxnas_nand_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct oxnas_nand_ctrl *oxnas = nand_get_controller_data(chip);
 
 	return readb(oxnas->io_base);
 }
 
-static void oxnas_nand_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+static void oxnas_nand_read_buf(struct nand_chip *chip, u8 *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct oxnas_nand_ctrl *oxnas = nand_get_controller_data(chip);
 
 	ioread8_rep(oxnas->io_base, buf, len);
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index c8e2ac04fb86..551e5db670be 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -43,10 +43,8 @@ static unsigned int lpcctl;
 static struct mtd_info *pasemi_nand_mtd;
 static const char driver_name[] = "pasemi-nand";
 
-static void pasemi_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void pasemi_read_buf(struct nand_chip *chip, u_char *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	while (len > 0x800) {
 		memcpy_fromio(buf, chip->IO_ADDR_R, 0x800);
 		buf += 0x800;
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 80e1a44f0465..5193806923ba 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -52,13 +52,6 @@ static void plat_nand_write_buf(struct mtd_info *mtd, const uint8_t *buf,
 	pdata->ctrl.write_buf(mtd_to_nand(mtd), buf, len);
 }
 
-static void plat_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
-{
-	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
-
-	pdata->ctrl.read_buf(mtd_to_nand(mtd), buf, len);
-}
-
 /*
  * Probe for the NAND device.
  */
@@ -111,9 +104,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 	if (pdata->ctrl.write_buf)
 		data->chip.write_buf = plat_nand_write_buf;
 
-	if (pdata->ctrl.read_buf)
-		data->chip.read_buf = plat_nand_read_buf;
-
+	data->chip.read_buf = pdata->ctrl.read_buf;
 	data->chip.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
 	data->chip.bbt_options |= pdata->chip.bbt_options;
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index e0cec027572c..63bb9f3fe23b 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2282,9 +2282,8 @@ static int qcom_nandc_block_markbad(struct mtd_info *mtd, loff_t ofs)
  * reading/writing page data, they are used for smaller data like reading
  * id, status etc
  */
-static uint8_t qcom_nandc_read_byte(struct mtd_info *mtd)
+static uint8_t qcom_nandc_read_byte(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 	u8 *buf = nandc->data_buffer;
@@ -2304,9 +2303,8 @@ static uint8_t qcom_nandc_read_byte(struct mtd_info *mtd)
 	return ret;
 }
 
-static void qcom_nandc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void qcom_nandc_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 	int real_len = min_t(size_t, len, nandc->buf_count - nandc->buf_start);
 
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index aa5516b3b45f..07055bd657cd 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -266,9 +266,9 @@ static void r852_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 /*
  * Read data lines of the nand chip to retrieve data
  */
-static void r852_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void r852_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct r852_device *dev = r852_get_dev(mtd);
+	struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
 	uint32_t reg;
 
 	if (dev->card_unstable) {
@@ -303,9 +303,9 @@ static void r852_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
 /*
  * Read one byte from nand chip
  */
-static uint8_t r852_read_byte(struct mtd_info *mtd)
+static uint8_t r852_read_byte(struct nand_chip *chip)
 {
-	struct r852_device *dev = r852_get_dev(mtd);
+	struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
 
 	/* Same problem as in r852_read_buf.... */
 	if (dev->card_unstable)
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index d57201d118d8..54c86ec612dd 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -675,14 +675,14 @@ static int s3c2440_nand_calculate_ecc(struct nand_chip *chip,
  * use read/write block to move the data buffers to/from the controller
 */
 
-static void s3c2410_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void s3c2410_nand_read_buf(struct nand_chip *this, u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	readsb(this->IO_ADDR_R, buf, len);
 }
 
-static void s3c2440_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void s3c2440_nand_read_buf(struct nand_chip *this, u_char *buf, int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(this);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 
 	readsl(info->regs + S3C2440_NFDATA, buf, len >> 2);
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index bb58edd2bdf0..6966a18f8ac4 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -618,7 +618,7 @@ static int flctl_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
 
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
 	if (oob_required)
-		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+		chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
 	return 0;
 }
 
@@ -978,9 +978,9 @@ static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
 	flctl->index += len;
 }
 
-static uint8_t flctl_read_byte(struct mtd_info *mtd)
+static uint8_t flctl_read_byte(struct nand_chip *chip)
 {
-	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	struct sh_flctl *flctl = mtd_to_flctl(nand_to_mtd(chip));
 	uint8_t data;
 
 	data = flctl->done_buff[flctl->index];
@@ -988,9 +988,9 @@ static uint8_t flctl_read_byte(struct mtd_info *mtd)
 	return data;
 }
 
-static void flctl_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void flctl_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	struct sh_flctl *flctl = mtd_to_flctl(nand_to_mtd(chip));
 
 	memcpy(buf, &flctl->done_buff[flctl->index], len);
 	flctl->index += len;
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 82ba371a8e18..007e37680b88 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -54,14 +54,14 @@ static void socrates_nand_write_buf(struct mtd_info *mtd,
 
 /**
  * socrates_nand_read_buf -  read chip data into buffer
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @buf:	buffer to store date
  * @len:	number of bytes to read
  */
-static void socrates_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void socrates_nand_read_buf(struct nand_chip *this, uint8_t *buf,
+				   int len)
 {
 	int i;
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct socrates_nand_host *host = nand_get_controller_data(this);
 	uint32_t val;
 
@@ -78,10 +78,10 @@ static void socrates_nand_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
  * socrates_nand_read_byte -  read one byte from the chip
  * @mtd:	MTD device structure
  */
-static uint8_t socrates_nand_read_byte(struct mtd_info *mtd)
+static uint8_t socrates_nand_read_byte(struct nand_chip *this)
 {
 	uint8_t byte;
-	socrates_nand_read_buf(mtd, &byte, sizeof(byte));
+	socrates_nand_read_buf(this, &byte, sizeof(byte));
 	return byte;
 }
 
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 86d666c0c03c..2a0e624eca6c 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -464,9 +464,8 @@ static void sunxi_nfc_select_chip(struct mtd_info *mtd, int chip)
 	sunxi_nand->selected = chip;
 }
 
-static void sunxi_nfc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void sunxi_nfc_read_buf(struct nand_chip *nand, uint8_t *buf, int len)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
 	int ret;
@@ -540,11 +539,11 @@ static void sunxi_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
 	}
 }
 
-static uint8_t sunxi_nfc_read_byte(struct mtd_info *mtd)
+static uint8_t sunxi_nfc_read_byte(struct nand_chip *nand)
 {
 	uint8_t ret = 0;
 
-	sunxi_nfc_read_buf(mtd, &ret, 1);
+	sunxi_nfc_read_buf(nand, &ret, 1);
 
 	return ret;
 }
@@ -770,7 +769,7 @@ static void sunxi_nfc_randomizer_read_buf(struct mtd_info *mtd, uint8_t *buf,
 {
 	sunxi_nfc_randomizer_config(mtd, page, ecc);
 	sunxi_nfc_randomizer_enable(mtd);
-	sunxi_nfc_read_buf(mtd, buf, len);
+	sunxi_nfc_read_buf(mtd_to_nand(mtd), buf, len);
 	sunxi_nfc_randomizer_disable(mtd);
 }
 
@@ -995,7 +994,7 @@ static void sunxi_nfc_hw_ecc_read_extra_oob(struct mtd_info *mtd,
 					   false);
 
 	if (!randomize)
-		sunxi_nfc_read_buf(mtd, oob + offset, len);
+		sunxi_nfc_read_buf(nand, oob + offset, len);
 	else
 		sunxi_nfc_randomizer_read_buf(mtd, oob + offset, len,
 					      false, page);
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index 7c8f47546002..20d6fa983a6b 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -135,16 +135,16 @@ static int tango_dev_ready(struct mtd_info *mtd)
 	return readl_relaxed(nfc->pbus_base + PBUS_CS_CTRL) & PBUS_IORDY;
 }
 
-static u8 tango_read_byte(struct mtd_info *mtd)
+static u8 tango_read_byte(struct nand_chip *chip)
 {
-	struct tango_chip *tchip = to_tango_chip(mtd_to_nand(mtd));
+	struct tango_chip *tchip = to_tango_chip(chip);
 
 	return readb_relaxed(tchip->base + PBUS_DATA);
 }
 
-static void tango_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+static void tango_read_buf(struct nand_chip *chip, u8 *buf, int len)
 {
-	struct tango_chip *tchip = to_tango_chip(mtd_to_nand(mtd));
+	struct tango_chip *tchip = to_tango_chip(chip);
 
 	ioread8_rep(tchip->base + PBUS_DATA, buf, len);
 }
@@ -325,15 +325,13 @@ static int tango_write_page(struct nand_chip *chip, const u8 *buf,
 
 static void aux_read(struct nand_chip *chip, u8 **buf, int len, int *pos)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	*pos += len;
 
 	if (!*buf) {
 		/* skip over "len" bytes */
 		nand_change_read_column_op(chip, *pos, NULL, 0, false);
 	} else {
-		tango_read_buf(mtd, *buf, len);
+		tango_read_buf(chip, *buf, len);
 		*buf += len;
 	}
 }
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 734ff29705ce..570ea045fbce 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -225,9 +225,9 @@ tmio_nand_wait(struct mtd_info *mtd, struct nand_chip *nand_chip)
   *To prevent stale data from being read, tmio_nand_hwcontrol() clears
   *tmio->read_good.
  */
-static u_char tmio_nand_read_byte(struct mtd_info *mtd)
+static u_char tmio_nand_read_byte(struct nand_chip *chip)
 {
-	struct tmio_nand *tmio = mtd_to_tmio(mtd);
+	struct tmio_nand *tmio = mtd_to_tmio(nand_to_mtd(chip));
 	unsigned int data;
 
 	if (tmio->read_good--)
@@ -252,9 +252,9 @@ tmio_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
 	tmio_iowrite16_rep(tmio->fcr + FCR_DATA, buf, len >> 1);
 }
 
-static void tmio_nand_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void tmio_nand_read_buf(struct nand_chip *chip, u_char *buf, int len)
 {
-	struct tmio_nand *tmio = mtd_to_tmio(mtd);
+	struct tmio_nand *tmio = mtd_to_tmio(nand_to_mtd(chip));
 
 	tmio_ioread16_rep(tmio->fcr + FCR_DATA, buf, len >> 1);
 }
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 3c69d834de62..c68b638c4fe8 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -102,9 +102,9 @@ static void txx9ndfmc_write(struct platform_device *dev,
 	__raw_writel(val, ndregaddr(dev, reg));
 }
 
-static uint8_t txx9ndfmc_read_byte(struct mtd_info *mtd)
+static uint8_t txx9ndfmc_read_byte(struct nand_chip *chip)
 {
-	struct platform_device *dev = mtd_to_platdev(mtd);
+	struct platform_device *dev = mtd_to_platdev(nand_to_mtd(chip));
 
 	return txx9ndfmc_read(dev, TXX9_NDFDTR);
 }
@@ -122,9 +122,9 @@ static void txx9ndfmc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
 	txx9ndfmc_write(dev, mcr, TXX9_NDFMCR);
 }
 
-static void txx9ndfmc_read_buf(struct mtd_info *mtd, uint8_t *buf, int len)
+static void txx9ndfmc_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	struct platform_device *dev = mtd_to_platdev(mtd);
+	struct platform_device *dev = mtd_to_platdev(nand_to_mtd(chip));
 	void __iomem *ndfdtr = ndregaddr(dev, TXX9_NDFDTR);
 
 	while (len--)
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index 1adb41acebfc..edbcfaa85ed8 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -125,17 +125,17 @@ static int xway_dev_ready(struct mtd_info *mtd)
 	return ltq_ebu_r32(EBU_NAND_WAIT) & NAND_WAIT_RD;
 }
 
-static unsigned char xway_read_byte(struct mtd_info *mtd)
+static unsigned char xway_read_byte(struct nand_chip *chip)
 {
-	return xway_readb(mtd, NAND_READ_DATA);
+	return xway_readb(nand_to_mtd(chip), NAND_READ_DATA);
 }
 
-static void xway_read_buf(struct mtd_info *mtd, u_char *buf, int len)
+static void xway_read_buf(struct nand_chip *chip, u_char *buf, int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++)
-		buf[i] = xway_readb(mtd, NAND_WRITE_DATA);
+		buf[i] = xway_readb(nand_to_mtd(chip), NAND_WRITE_DATA);
 }
 
 static void xway_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 2b2f98efdb54..644c91ff2734 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -657,7 +657,7 @@ static int spinand_read_page_hwecc(struct nand_chip *chip, u8 *buf,
 
 	nand_read_page_op(chip, page, 0, p, eccsize * eccsteps);
 	if (oob_required)
-		chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
+		chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	while (1) {
 		retval = spinand_read_status(info->spi, &status);
@@ -685,9 +685,9 @@ static void spinand_select_chip(struct mtd_info *mtd, int dev)
 {
 }
 
-static u8 spinand_read_byte(struct mtd_info *mtd)
+static u8 spinand_read_byte(struct nand_chip *chip)
 {
-	struct spinand_state *state = mtd_to_state(mtd);
+	struct spinand_state *state = mtd_to_state(nand_to_mtd(chip));
 	u8 data;
 
 	data = state->buf[state->buf_ptr];
@@ -732,9 +732,9 @@ static void spinand_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
 	state->buf_ptr += len;
 }
 
-static void spinand_read_buf(struct mtd_info *mtd, u8 *buf, int len)
+static void spinand_read_buf(struct nand_chip *chip, u8 *buf, int len)
 {
-	struct spinand_state *state = mtd_to_state(mtd);
+	struct spinand_state *state = mtd_to_state(nand_to_mtd(chip));
 
 	memcpy(buf, state->buf + state->buf_ptr, len);
 	state->buf_ptr += len;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 527947e81447..f324a82fe6a2 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1283,10 +1283,10 @@ struct nand_chip {
 	void __iomem *IO_ADDR_R;
 	void __iomem *IO_ADDR_W;
 
-	uint8_t (*read_byte)(struct mtd_info *mtd);
+	uint8_t (*read_byte)(struct nand_chip *chip);
 	void (*write_byte)(struct mtd_info *mtd, uint8_t byte);
 	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
-	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
+	void (*read_buf)(struct nand_chip *chip, uint8_t *buf, int len);
 	void (*select_chip)(struct mtd_info *mtd, int chip);
 	int (*block_bad)(struct mtd_info *mtd, loff_t ofs);
 	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
-- 
cgit v1.2.3


From c0739d85723a381302907f9613392d7ac8515176 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:23 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->write_xxx() hooks

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle all chip->write_xxx() hooks at once.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/ams-delta.c                 |  9 +++---
 drivers/mtd/nand/raw/atmel/nand-controller.c     | 12 +++----
 drivers/mtd/nand/raw/au1550nd.c                  | 34 +++++++++-----------
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  6 ++--
 drivers/mtd/nand/raw/brcmnand/brcmnand.c         |  5 ++-
 drivers/mtd/nand/raw/cafe_nand.c                 |  5 ++-
 drivers/mtd/nand/raw/cmx270_nand.c               |  4 +--
 drivers/mtd/nand/raw/cs553x_nand.c               |  9 ++----
 drivers/mtd/nand/raw/davinci_nand.c              |  6 ++--
 drivers/mtd/nand/raw/denali.c                    | 13 ++++----
 drivers/mtd/nand/raw/diskonchip.c                | 20 +++++-------
 drivers/mtd/nand/raw/docg4.c                     | 10 +++---
 drivers/mtd/nand/raw/fsl_elbc_nand.c             | 10 +++---
 drivers/mtd/nand/raw/fsl_ifc_nand.c              |  6 ++--
 drivers/mtd/nand/raw/fsl_upm.c                   |  4 +--
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       |  3 +-
 drivers/mtd/nand/raw/hisi504_nand.c              |  5 ++-
 drivers/mtd/nand/raw/lpc32xx_slc.c               |  8 ++---
 drivers/mtd/nand/raw/mpc5121_nfc.c               |  6 ++--
 drivers/mtd/nand/raw/mtk_nand.c                  |  8 ++---
 drivers/mtd/nand/raw/mxc_nand.c                  |  8 ++---
 drivers/mtd/nand/raw/nand_base.c                 | 41 ++++++++++--------------
 drivers/mtd/nand/raw/nand_hynix.c                |  2 +-
 drivers/mtd/nand/raw/nandsim.c                   |  9 +++---
 drivers/mtd/nand/raw/ndfc.c                      |  3 +-
 drivers/mtd/nand/raw/nuc900_nand.c               |  4 +--
 drivers/mtd/nand/raw/omap2.c                     | 36 ++++++++++++---------
 drivers/mtd/nand/raw/oxnas_nand.c                |  4 +--
 drivers/mtd/nand/raw/pasemi_nand.c               |  5 ++-
 drivers/mtd/nand/raw/plat_nand.c                 | 12 +------
 drivers/mtd/nand/raw/qcom_nandc.c                |  3 +-
 drivers/mtd/nand/raw/r852.c                      |  4 +--
 drivers/mtd/nand/raw/s3c2410.c                   |  6 ++--
 drivers/mtd/nand/raw/sh_flctl.c                  |  6 ++--
 drivers/mtd/nand/raw/socrates_nand.c             |  7 ++--
 drivers/mtd/nand/raw/sunxi_nand.c                |  5 ++-
 drivers/mtd/nand/raw/tango_nand.c                |  8 ++---
 drivers/mtd/nand/raw/tmio_nand.c                 |  4 +--
 drivers/mtd/nand/raw/txx9ndfmc.c                 |  4 +--
 drivers/mtd/nand/raw/xway_nand.c                 |  4 +--
 drivers/staging/mt29f_spinand/mt29f_spinand.c    |  4 +--
 include/linux/mtd/rawnand.h                      |  4 +--
 42 files changed, 163 insertions(+), 203 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index eb48c939c4ae..d742b9444429 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -63,9 +63,8 @@ static const struct mtd_partition partition_info[] = {
 	  .size		=  3 * SZ_256K },
 };
 
-static void ams_delta_write_byte(struct mtd_info *mtd, u_char byte)
+static void ams_delta_write_byte(struct nand_chip *this, u_char byte)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	void __iomem *io_base = (void __iomem *)nand_get_controller_data(this);
 
 	writew(0, io_base + OMAP_MPUIO_IO_CNTL);
@@ -89,13 +88,13 @@ static u_char ams_delta_read_byte(struct nand_chip *this)
 	return res;
 }
 
-static void ams_delta_write_buf(struct mtd_info *mtd, const u_char *buf,
+static void ams_delta_write_buf(struct nand_chip *this, const u_char *buf,
 				int len)
 {
 	int i;
 
 	for (i=0; i<len; i++)
-		ams_delta_write_byte(mtd, buf[i]);
+		ams_delta_write_byte(this, buf[i]);
 }
 
 static void ams_delta_read_buf(struct nand_chip *this, u_char *buf, int len)
@@ -128,7 +127,7 @@ static void ams_delta_hwcontrol(struct mtd_info *mtd, int cmd,
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		ams_delta_write_byte(mtd, cmd);
+		ams_delta_write_byte(mtd_to_nand(mtd), cmd);
 }
 
 static int ams_delta_nand_ready(struct mtd_info *mtd)
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 371223e19dcc..84ddede5ede4 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -417,9 +417,8 @@ static u8 atmel_nand_read_byte(struct nand_chip *chip)
 	return ioread8(nand->activecs->io.virt);
 }
 
-static void atmel_nand_write_byte(struct mtd_info *mtd, u8 byte)
+static void atmel_nand_write_byte(struct nand_chip *chip, u8 byte)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 
 	if (chip->options & NAND_BUSWIDTH_16)
@@ -452,9 +451,8 @@ static void atmel_nand_read_buf(struct nand_chip *chip, u8 *buf, int len)
 		ioread8_rep(nand->activecs->io.virt, buf, len);
 }
 
-static void atmel_nand_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+static void atmel_nand_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_nand_controller *nc;
 
@@ -841,7 +839,7 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf,
 	if (ret)
 		return ret;
 
-	atmel_nand_write_buf(mtd, buf, mtd->writesize);
+	atmel_nand_write_buf(chip, buf, mtd->writesize);
 
 	ret = atmel_nand_pmecc_generate_eccbytes(chip, raw);
 	if (ret) {
@@ -851,7 +849,7 @@ static int atmel_nand_pmecc_write_pg(struct nand_chip *chip, const u8 *buf,
 
 	atmel_nand_pmecc_disable(chip, raw);
 
-	atmel_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	atmel_nand_write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -942,7 +940,7 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
 	if (ret)
 		return ret;
 
-	atmel_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	atmel_nand_write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	nc->op.cmds[0] = NAND_CMD_PAGEPROG;
 	nc->op.ncmds = 1;
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 76ea4141eb10..f1cc9f672262 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -24,7 +24,7 @@ struct au1550nd_ctx {
 
 	int cs;
 	void __iomem *base;
-	void (*write_byte)(struct mtd_info *, u_char);
+	void (*write_byte)(struct nand_chip *, u_char);
 };
 
 /**
@@ -42,14 +42,13 @@ static u_char au_read_byte(struct nand_chip *this)
 
 /**
  * au_write_byte -  write one byte to the chip
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @byte:	pointer to data byte to write
  *
  * write function for 8it buswidth
  */
-static void au_write_byte(struct mtd_info *mtd, u_char byte)
+static void au_write_byte(struct nand_chip *this, u_char byte)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	writeb(byte, this->IO_ADDR_W);
 	wmb(); /* drain writebuffer */
 }
@@ -69,30 +68,28 @@ static u_char au_read_byte16(struct nand_chip *this)
 
 /**
  * au_write_byte16 -  write one byte endianness aware to the chip
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @byte:	pointer to data byte to write
  *
  * write function for 16bit buswidth with endianness conversion
  */
-static void au_write_byte16(struct mtd_info *mtd, u_char byte)
+static void au_write_byte16(struct nand_chip *this, u_char byte)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	writew(le16_to_cpu((u16) byte), this->IO_ADDR_W);
 	wmb(); /* drain writebuffer */
 }
 
 /**
  * au_write_buf -  write buffer to chip
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @buf:	data buffer
  * @len:	number of bytes to write
  *
  * write function for 8bit buswidth
  */
-static void au_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
+static void au_write_buf(struct nand_chip *this, const u_char *buf, int len)
 {
 	int i;
-	struct nand_chip *this = mtd_to_nand(mtd);
 
 	for (i = 0; i < len; i++) {
 		writeb(buf[i], this->IO_ADDR_W);
@@ -120,16 +117,15 @@ static void au_read_buf(struct nand_chip *this, u_char *buf, int len)
 
 /**
  * au_write_buf16 -  write buffer to chip
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @buf:	data buffer
  * @len:	number of bytes to write
  *
  * write function for 16bit buswidth
  */
-static void au_write_buf16(struct mtd_info *mtd, const u_char *buf, int len)
+static void au_write_buf16(struct nand_chip *this, const u_char *buf, int len)
 {
 	int i;
-	struct nand_chip *this = mtd_to_nand(mtd);
 	u16 *p = (u16 *) buf;
 	len >>= 1;
 
@@ -272,9 +268,9 @@ static void au1550_command(struct mtd_info *mtd, unsigned command, int column, i
 			column -= 256;
 			readcmd = NAND_CMD_READ1;
 		}
-		ctx->write_byte(mtd, readcmd);
+		ctx->write_byte(this, readcmd);
 	}
-	ctx->write_byte(mtd, command);
+	ctx->write_byte(this, command);
 
 	/* Set ALE and clear CLE to start address cycle */
 	au1550_hwcontrol(mtd, NAND_CTL_CLRCLE);
@@ -288,10 +284,10 @@ static void au1550_command(struct mtd_info *mtd, unsigned command, int column, i
 			if (this->options & NAND_BUSWIDTH_16 &&
 					!nand_opcode_8bits(command))
 				column >>= 1;
-			ctx->write_byte(mtd, column);
+			ctx->write_byte(this, column);
 		}
 		if (page_addr != -1) {
-			ctx->write_byte(mtd, (u8)(page_addr & 0xff));
+			ctx->write_byte(this, (u8)(page_addr & 0xff));
 
 			if (command == NAND_CMD_READ0 ||
 			    command == NAND_CMD_READ1 ||
@@ -309,10 +305,10 @@ static void au1550_command(struct mtd_info *mtd, unsigned command, int column, i
 				au1550_hwcontrol(mtd, NAND_CTL_SETNCE);
 			}
 
-			ctx->write_byte(mtd, (u8)(page_addr >> 8));
+			ctx->write_byte(this, (u8)(page_addr >> 8));
 
 			if (this->options & NAND_ROW_ADDR_3)
-				ctx->write_byte(mtd,
+				ctx->write_byte(this,
 						((page_addr >> 16) & 0x0f));
 		}
 		/* Latch in address */
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index cf3e45358c60..83eec2812aa0 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -354,15 +354,15 @@ static void bcm47xxnflash_ops_bcm4706_read_buf(struct nand_chip *nand_chip,
 	pr_err("Invalid command for buf read: 0x%X\n", b47n->curr_command);
 }
 
-static void bcm47xxnflash_ops_bcm4706_write_buf(struct mtd_info *mtd,
+static void bcm47xxnflash_ops_bcm4706_write_buf(struct nand_chip *nand_chip,
 						const uint8_t *buf, int len)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct bcm47xxnflash *b47n = nand_get_controller_data(nand_chip);
 
 	switch (b47n->curr_command) {
 	case NAND_CMD_SEQIN:
-		bcm47xxnflash_ops_bcm4706_write(mtd, buf, len);
+		bcm47xxnflash_ops_bcm4706_write(nand_to_mtd(nand_chip), buf,
+						len);
 		return;
 	}
 
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 7cbc6045f16d..e24e77b27618 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -1481,11 +1481,10 @@ static void brcmnand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 		*buf = brcmnand_read_byte(chip);
 }
 
-static void brcmnand_write_buf(struct mtd_info *mtd, const uint8_t *buf,
-				   int len)
+static void brcmnand_write_buf(struct nand_chip *chip, const uint8_t *buf,
+			       int len)
 {
 	int i;
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 
 	switch (host->last_cmd) {
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 97d835f88b86..f801333161c9 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -117,9 +117,8 @@ static int cafe_device_ready(struct mtd_info *mtd)
 }
 
 
-static void cafe_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void cafe_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 
 	if (cafe->usedma)
@@ -540,7 +539,7 @@ static int cafe_nand_write_page_lowlevel(struct nand_chip *chip,
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	/* Set up ECC autogeneration */
 	cafe->ctl2 |= (1<<30);
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index 232d32391b1f..4e5c8b7721ab 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -54,10 +54,10 @@ static u_char cmx270_read_byte(struct nand_chip *this)
 	return (readl(this->IO_ADDR_R) >> 16);
 }
 
-static void cmx270_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
+static void cmx270_write_buf(struct nand_chip *this, const u_char *buf,
+			     int len)
 {
 	int i;
-	struct nand_chip *this = mtd_to_nand(mtd);
 
 	for (i=0; i<len; i++)
 		writel((*buf++ << 16), this->IO_ADDR_W);
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 4394eeebec7f..442fa583db44 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -103,10 +103,8 @@ static void cs553x_read_buf(struct nand_chip *this, u_char *buf, int len)
 	memcpy_fromio(buf, this->IO_ADDR_R, len);
 }
 
-static void cs553x_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
+static void cs553x_write_buf(struct nand_chip *this, const u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
-
 	while (unlikely(len > 0x800)) {
 		memcpy_toio(this->IO_ADDR_R, buf, 0x800);
 		buf += 0x800;
@@ -120,9 +118,8 @@ static unsigned char cs553x_read_byte(struct nand_chip *this)
 	return readb(this->IO_ADDR_R);
 }
 
-static void cs553x_write_byte(struct mtd_info *mtd, u_char byte)
+static void cs553x_write_byte(struct nand_chip *this, u_char byte)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	int i = 100000;
 
 	while (i && readb(this->IO_ADDR_R + MM_NAND_STS) & CS_NAND_CTLR_BUSY) {
@@ -142,7 +139,7 @@ static void cs553x_hwcontrol(struct mtd_info *mtd, int cmd,
 		writeb(ctl, mmio_base + MM_NAND_CTL);
 	}
 	if (cmd != NAND_CMD_NONE)
-		cs553x_write_byte(mtd, cmd);
+		cs553x_write_byte(this, cmd);
 }
 
 static int cs553x_device_ready(struct mtd_info *mtd)
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index b879049e51c6..02a2d3b05e34 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -446,11 +446,9 @@ static void nand_davinci_read_buf(struct nand_chip *chip, uint8_t *buf,
 		ioread8_rep(chip->IO_ADDR_R, buf, len);
 }
 
-static void nand_davinci_write_buf(struct mtd_info *mtd,
-		const uint8_t *buf, int len)
+static void nand_davinci_write_buf(struct nand_chip *chip, const uint8_t *buf,
+				   int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	if ((0x03 & ((uintptr_t)buf)) == 0 && (0x03 & len) == 0)
 		iowrite32_rep(chip->IO_ADDR_R, buf, len >> 2);
 	else if ((0x01 & ((uintptr_t)buf)) == 0 && (0x01 & len) == 0)
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 6e5be3efcb5d..101ffa11b606 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -226,9 +226,10 @@ static void denali_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 		buf[i] = denali->host_read(denali, addr);
 }
 
-static void denali_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void denali_write_buf(struct nand_chip *chip, const uint8_t *buf,
+			     int len)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 	u32 addr = DENALI_MAP11_DATA | DENALI_BANK(denali);
 	int i;
 
@@ -247,10 +248,10 @@ static void denali_read_buf16(struct nand_chip *chip, uint8_t *buf, int len)
 		buf16[i] = denali->host_read(denali, addr);
 }
 
-static void denali_write_buf16(struct mtd_info *mtd, const uint8_t *buf,
+static void denali_write_buf16(struct nand_chip *chip, const uint8_t *buf,
 			       int len)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 	u32 addr = DENALI_MAP11_DATA | DENALI_BANK(denali);
 	const uint16_t *buf16 = (const uint16_t *)buf;
 	int i;
@@ -268,9 +269,9 @@ static uint8_t denali_read_byte(struct nand_chip *chip)
 	return byte;
 }
 
-static void denali_write_byte(struct mtd_info *mtd, uint8_t byte)
+static void denali_write_byte(struct nand_chip *chip, uint8_t byte)
 {
-	denali_write_buf(mtd, &byte, 1);
+	denali_write_buf(chip, &byte, 1);
 }
 
 static void denali_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index de1059069e8f..a1bc2f7436db 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -290,9 +290,8 @@ static inline int DoC_WaitReady(struct doc_priv *doc)
 	return ret;
 }
 
-static void doc2000_write_byte(struct mtd_info *mtd, u_char datum)
+static void doc2000_write_byte(struct nand_chip *this, u_char datum)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 
@@ -316,9 +315,9 @@ static u_char doc2000_read_byte(struct nand_chip *this)
 	return ret;
 }
 
-static void doc2000_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
+static void doc2000_writebuf(struct nand_chip *this, const u_char *buf,
+			     int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int i;
@@ -448,9 +447,8 @@ static int doc200x_wait(struct mtd_info *mtd, struct nand_chip *this)
 	return status;
 }
 
-static void doc2001_write_byte(struct mtd_info *mtd, u_char datum)
+static void doc2001_write_byte(struct nand_chip *this, u_char datum)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 
@@ -472,9 +470,8 @@ static u_char doc2001_read_byte(struct nand_chip *this)
 	return ReadDOC(docptr, LastDataRead);
 }
 
-static void doc2001_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
+static void doc2001_writebuf(struct nand_chip *this, const u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int i;
@@ -515,9 +512,8 @@ static u_char doc2001plus_read_byte(struct nand_chip *this)
 	return ret;
 }
 
-static void doc2001plus_writebuf(struct mtd_info *mtd, const u_char *buf, int len)
+static void doc2001plus_writebuf(struct nand_chip *this, const u_char *buf, int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int i;
@@ -638,9 +634,9 @@ static void doc200x_hwcontrol(struct mtd_info *mtd, int cmd,
 	}
 	if (cmd != NAND_CMD_NONE) {
 		if (DoC_is_2000(doc))
-			doc2000_write_byte(mtd, cmd);
+			doc2000_write_byte(this, cmd);
 		else
-			doc2001_write_byte(mtd, cmd);
+			doc2001_write_byte(this, cmd);
 	}
 }
 
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index 284bc96dacd3..9cbe87448e77 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -271,10 +271,10 @@ static void docg4_read_buf(struct nand_chip *nand, uint8_t *buf, int len)
 		p[i] = readw(nand->IO_ADDR_R);
 }
 
-static void docg4_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void docg4_write_buf16(struct nand_chip *nand, const uint8_t *buf,
+			      int len)
 {
 	int i;
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	uint16_t *p = (uint16_t *) buf;
 	len >>= 1;
 
@@ -964,10 +964,10 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *nand,
 	write_nop(docptr);
 
 	/* write the page data */
-	docg4_write_buf16(mtd, buf, DOCG4_PAGE_SIZE);
+	docg4_write_buf16(nand, buf, DOCG4_PAGE_SIZE);
 
 	/* oob bytes 0 through 5 are written to I/O reg */
-	docg4_write_buf16(mtd, nand->oob_poi, 6);
+	docg4_write_buf16(nand, nand->oob_poi, 6);
 
 	/* oob byte 6 written to a separate reg */
 	writew(nand->oob_poi[6], docptr + DOCG4_OOB_6_7);
@@ -995,7 +995,7 @@ static int write_page(struct mtd_info *mtd, struct nand_chip *nand,
 		memcpy(ecc_buf, &nand->oob_poi[8], 8);
 	}
 
-	docg4_write_buf16(mtd, ecc_buf, 8);
+	docg4_write_buf16(nand, ecc_buf, 8);
 	write_nop(docptr);
 	write_nop(docptr);
 	writew(0, docptr + DOC_DATAEND);
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 22326bcb8b62..14d246323e94 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -543,9 +543,9 @@ static void fsl_elbc_select_chip(struct mtd_info *mtd, int chip)
 /*
  * Write buf to the FCM Controller Data Buffer
  */
-static void fsl_elbc_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+static void fsl_elbc_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct fsl_elbc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_elbc_fcm_ctrl *elbc_fcm_ctrl = priv->ctrl->nand;
 	unsigned int bufsize = mtd->writesize + mtd->oobsize;
@@ -735,7 +735,7 @@ static int fsl_elbc_write_page(struct nand_chip *chip, const uint8_t *buf,
 	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
-	fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	fsl_elbc_write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -750,8 +750,8 @@ static int fsl_elbc_write_subpage(struct nand_chip *chip, uint32_t offset,
 	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	nand_prog_page_begin_op(chip, page, 0, NULL, 0);
-	fsl_elbc_write_buf(mtd, buf, mtd->writesize);
-	fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	fsl_elbc_write_buf(chip, buf, mtd->writesize);
+	fsl_elbc_write_buf(chip, chip->oob_poi, mtd->oobsize);
 	return nand_prog_page_end_op(chip);
 }
 
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 7b6d0913a5bb..2e032db997a5 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -519,9 +519,9 @@ static void fsl_ifc_select_chip(struct mtd_info *mtd, int chip)
 /*
  * Write buf to the IFC NAND Controller Data Buffer
  */
-static void fsl_ifc_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+static void fsl_ifc_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
 	unsigned int bufsize = mtd->writesize + mtd->oobsize;
 
@@ -710,7 +710,7 @@ static int fsl_ifc_write_page(struct nand_chip *chip, const uint8_t *buf,
 	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
-	fsl_ifc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	fsl_ifc_write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index 340547f1b6c7..d3d3adcb7282 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -140,9 +140,9 @@ static void fun_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 		buf[i] = in_8(fun->chip.IO_ADDR_R);
 }
 
-static void fun_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void fun_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	struct fsl_upm_nand *fun = to_fsl_upm_nand(mtd);
+	struct fsl_upm_nand *fun = to_fsl_upm_nand(nand_to_mtd(chip));
 	int i;
 
 	for (i = 0; i < len; i++) {
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index d0d5caa1b7a6..bd1b8445b358 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -868,9 +868,8 @@ static void gpmi_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 	gpmi_read_data(this, buf, len);
 }
 
-static void gpmi_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void gpmi_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 
 	dev_dbg(this->dev, "len is %d\n", len);
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index e1fe6963c908..b4e5bfd30022 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -380,9 +380,8 @@ static uint8_t hisi_nfc_read_byte(struct nand_chip *chip)
 }
 
 static void
-hisi_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+hisi_nfc_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct hinfc_host *host = nand_get_controller_data(chip);
 
 	memcpy(host->buffer + host->offset, buf, len);
@@ -583,7 +582,7 @@ static int hisi_nand_write_page_hwecc(struct nand_chip *chip,
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
 	if (oob_required)
-		chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+		chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 5820c86cb1f1..d04b30989041 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -381,9 +381,9 @@ static void lpc32xx_nand_read_buf(struct nand_chip *chip, u_char *buf, int len)
 /*
  * Simple device write without ECC
  */
-static void lpc32xx_nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void lpc32xx_nand_write_buf(struct nand_chip *chip, const uint8_t *buf,
+				   int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 
 	/* Direct device write with no ECC */
@@ -706,7 +706,7 @@ static int lpc32xx_nand_write_page_syndrome(struct nand_chip *chip,
 	lpc32xx_slc_ecc_copy(pb, (uint32_t *)host->ecc_buf, chip->ecc.steps);
 
 	/* Write ECC data to device */
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -724,7 +724,7 @@ static int lpc32xx_nand_write_page_raw_syndrome(struct nand_chip *chip,
 	/* Raw writes can just use the FIFO interface */
 	nand_prog_page_begin_op(chip, page, 0, buf,
 				chip->ecc.size * chip->ecc.steps);
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index 49031f5a3b6d..dc573a0b5fe1 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -499,10 +499,10 @@ static void mpc5121_nfc_read_buf(struct nand_chip *chip, u_char *buf, int len)
 }
 
 /* Write data to NFC buffers */
-static void mpc5121_nfc_write_buf(struct mtd_info *mtd,
-						const u_char *buf, int len)
+static void mpc5121_nfc_write_buf(struct nand_chip *chip, const u_char *buf,
+				  int len)
 {
-	mpc5121_nfc_buf_copy(mtd, (u_char *)buf, len, 1);
+	mpc5121_nfc_buf_copy(nand_to_mtd(chip), (u_char *)buf, len, 1);
 }
 
 /* Read byte from NFC buffers */
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 1c7392242d4d..bd2002a1fabd 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -474,9 +474,9 @@ static void mtk_nfc_read_buf(struct nand_chip *chip, u8 *buf, int len)
 		buf[i] = mtk_nfc_read_byte(chip);
 }
 
-static void mtk_nfc_write_byte(struct mtd_info *mtd, u8 byte)
+static void mtk_nfc_write_byte(struct nand_chip *chip, u8 byte)
 {
-	struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
+	struct mtk_nfc *nfc = nand_get_controller_data(chip);
 	u32 reg;
 
 	reg = nfi_readl(nfc, NFI_STA) & NFI_FSM_MASK;
@@ -495,12 +495,12 @@ static void mtk_nfc_write_byte(struct mtd_info *mtd, u8 byte)
 	nfi_writeb(nfc, byte, NFI_DATAW);
 }
 
-static void mtk_nfc_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+static void mtk_nfc_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++)
-		mtk_nfc_write_byte(mtd, buf[i]);
+		mtk_nfc_write_byte(chip, buf[i]);
 }
 
 static int mtk_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 8fed2919f35e..d5d8f8c16b60 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -921,10 +921,10 @@ static u_char mxc_nand_read_byte(struct nand_chip *nand_chip)
 /* Write data of length len to buffer buf. The data to be
  * written on NAND Flash is first copied to RAMbuffer. After the Data Input
  * Operation by the NFC, the data is written to NAND Flash */
-static void mxc_nand_write_buf(struct mtd_info *mtd,
-				const u_char *buf, int len)
+static void mxc_nand_write_buf(struct nand_chip *nand_chip, const u_char *buf,
+			       int len)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand_chip);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
 	u16 col = host->buf_start;
 	int n = mtd->oobsize + mtd->writesize - col;
@@ -1405,7 +1405,7 @@ static int mxc_nand_set_features(struct mtd_info *mtd, struct nand_chip *chip,
 	host->buf_start = 0;
 
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
-		chip->write_byte(mtd, subfeature_param[i]);
+		chip->write_byte(chip, subfeature_param[i]);
 
 	memcpy32_toio(host->main_area0, host->data_buf, mtd->writesize);
 	host->devtype_data->send_cmd(host, NAND_CMD_SET_FEATURES, false);
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index e4686078011d..6c20c0b805a3 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -300,28 +300,25 @@ static void nand_select_chip(struct mtd_info *mtd, int chipnr)
 
 /**
  * nand_write_byte - [DEFAULT] write single byte to chip
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @byte: value to write
  *
  * Default function to write a byte to I/O[7:0]
  */
-static void nand_write_byte(struct mtd_info *mtd, uint8_t byte)
+static void nand_write_byte(struct nand_chip *chip, uint8_t byte)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
-	chip->write_buf(mtd, &byte, 1);
+	chip->write_buf(chip, &byte, 1);
 }
 
 /**
  * nand_write_byte16 - [DEFAULT] write single byte to a chip with width 16
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @byte: value to write
  *
  * Default function to write a byte to I/O[7:0] on a 16-bit wide chip.
  */
-static void nand_write_byte16(struct mtd_info *mtd, uint8_t byte)
+static void nand_write_byte16(struct nand_chip *chip, uint8_t byte)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	uint16_t word = byte;
 
 	/*
@@ -340,21 +337,19 @@ static void nand_write_byte16(struct mtd_info *mtd, uint8_t byte)
 	 * neither an address nor a command transfer. Let's assume a 0 on the
 	 * upper I/O lines is OK.
 	 */
-	chip->write_buf(mtd, (uint8_t *)&word, 2);
+	chip->write_buf(chip, (uint8_t *)&word, 2);
 }
 
 /**
  * nand_write_buf - [DEFAULT] write buffer to chip
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: data buffer
  * @len: number of bytes to write
  *
  * Default write function for 8bit buswidth.
  */
-static void nand_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void nand_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	iowrite8_rep(chip->IO_ADDR_W, buf, len);
 }
 
@@ -373,15 +368,15 @@ static void nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 
 /**
  * nand_write_buf16 - [DEFAULT] write buffer to chip
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: data buffer
  * @len: number of bytes to write
  *
  * Default write function for 16bit buswidth.
  */
-static void nand_write_buf16(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void nand_write_buf16(struct nand_chip *chip, const uint8_t *buf,
+			     int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	u16 *p = (u16 *) buf;
 
 	iowrite16_rep(chip->IO_ADDR_W, p, len >> 1);
@@ -1801,7 +1796,7 @@ int nand_prog_page_begin_op(struct nand_chip *chip, unsigned int page,
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page);
 
 	if (buf)
-		chip->write_buf(mtd, buf, len);
+		chip->write_buf(chip, buf, len);
 
 	return 0;
 }
@@ -1886,7 +1881,7 @@ int nand_prog_page_op(struct nand_chip *chip, unsigned int page,
 						len, true);
 	} else {
 		chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page);
-		chip->write_buf(mtd, buf, len);
+		chip->write_buf(chip, buf, len);
 		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 		status = chip->waitfunc(mtd, chip);
 	}
@@ -1955,7 +1950,7 @@ int nand_change_write_column_op(struct nand_chip *chip,
 
 	chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset_in_page, -1);
 	if (len)
-		chip->write_buf(mtd, buf, len);
+		chip->write_buf(chip, buf, len);
 
 	return 0;
 }
@@ -2175,7 +2170,7 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature,
 
 	chip->cmdfunc(mtd, NAND_CMD_SET_FEATURES, feature, -1);
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
-		chip->write_byte(mtd, params[i]);
+		chip->write_byte(chip, params[i]);
 
 	ret = chip->waitfunc(mtd, chip);
 	if (ret < 0)
@@ -2343,8 +2338,6 @@ EXPORT_SYMBOL_GPL(nand_read_data_op);
 int nand_write_data_op(struct nand_chip *chip, const void *buf,
 		       unsigned int len, bool force_8bit)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	if (!len || !buf)
 		return -EINVAL;
 
@@ -2364,9 +2357,9 @@ int nand_write_data_op(struct nand_chip *chip, const void *buf,
 		unsigned int i;
 
 		for (i = 0; i < len; i++)
-			chip->write_byte(mtd, p[i]);
+			chip->write_byte(chip, p[i]);
 	} else {
-		chip->write_buf(mtd, buf, len);
+		chip->write_buf(chip, buf, len);
 	}
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c
index 4ffbb26e76d6..197256c2e1ee 100644
--- a/drivers/mtd/nand/raw/nand_hynix.c
+++ b/drivers/mtd/nand/raw/nand_hynix.c
@@ -111,7 +111,7 @@ static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val)
 	}
 
 	chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1);
-	chip->write_byte(mtd, val);
+	chip->write_byte(chip, val);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index 04feb4e8d112..880ba12e07ba 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -1933,9 +1933,8 @@ static u_char ns_nand_read_byte(struct nand_chip *chip)
 	return outb;
 }
 
-static void ns_nand_write_byte(struct mtd_info *mtd, u_char byte)
+static void ns_nand_write_byte(struct nand_chip *chip, u_char byte)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nandsim *ns = nand_get_controller_data(chip);
 
 	/* Sanity and correctness checks */
@@ -2098,7 +2097,7 @@ static void ns_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int bitmask)
 	ns->lines.ce = bitmask & NAND_NCE ? 1 : 0;
 
 	if (cmd != NAND_CMD_NONE)
-		ns_nand_write_byte(mtd, cmd);
+		ns_nand_write_byte(chip, cmd);
 }
 
 static int ns_device_ready(struct mtd_info *mtd)
@@ -2107,9 +2106,9 @@ static int ns_device_ready(struct mtd_info *mtd)
 	return 1;
 }
 
-static void ns_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
+static void ns_nand_write_buf(struct nand_chip *chip, const u_char *buf,
+			      int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nandsim *ns = nand_get_controller_data(chip);
 
 	/* Check that chip is expecting data input */
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 56bb1eaa5ef3..02b102addeb5 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -125,9 +125,8 @@ static void ndfc_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 		*p++ = in_be32(ndfc->ndfcbase + NDFC_DATA);
 }
 
-static void ndfc_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void ndfc_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct ndfc_controller *ndfc = nand_get_controller_data(chip);
 	uint32_t *p = (uint32_t *) buf;
 
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 3a88da5ec97f..357b3cf03195 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -99,11 +99,11 @@ static void nuc900_nand_read_buf(struct nand_chip *chip,
 		buf[i] = (unsigned char)read_data_reg(nand);
 }
 
-static void nuc900_nand_write_buf(struct mtd_info *mtd,
+static void nuc900_nand_write_buf(struct nand_chip *chip,
 				  const unsigned char *buf, int len)
 {
 	int i;
-	struct nuc900_nand *nand = mtd_to_nuc900(mtd);
+	struct nuc900_nand *nand = mtd_to_nuc900(nand_to_mtd(chip));
 
 	for (i = 0; i < len; i++)
 		write_data_reg(nand, buf[i]);
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 4f2e5cd86050..5a2bf1ed9c86 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -384,13 +384,14 @@ static void omap_read_buf_pref(struct nand_chip *chip, u_char *buf, int len)
 
 /**
  * omap_write_buf_pref - write buffer to NAND controller
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: data buffer
  * @len: number of bytes to write
  */
-static void omap_write_buf_pref(struct mtd_info *mtd,
-					const u_char *buf, int len)
+static void omap_write_buf_pref(struct nand_chip *chip, const u_char *buf,
+				int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct omap_nand_info *info = mtd_to_omap(mtd);
 	uint32_t w_count = 0;
 	int i = 0, ret = 0;
@@ -547,18 +548,20 @@ static void omap_read_buf_dma_pref(struct nand_chip *chip, u_char *buf,
 
 /**
  * omap_write_buf_dma_pref - write buffer to NAND controller
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: data buffer
  * @len: number of bytes to write
  */
-static void omap_write_buf_dma_pref(struct mtd_info *mtd,
-					const u_char *buf, int len)
+static void omap_write_buf_dma_pref(struct nand_chip *chip, const u_char *buf,
+				    int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	if (len <= mtd->oobsize)
-		omap_write_buf_pref(mtd, buf, len);
+		omap_write_buf_pref(chip, buf, len);
 	else
 		/* start transfer in DMA mode */
-		omap_nand_dma_transfer(mtd, (u_char *) buf, len, 0x1);
+		omap_nand_dma_transfer(mtd, (u_char *)buf, len, 0x1);
 }
 
 /*
@@ -657,20 +660,21 @@ out_copy:
 
 /*
  * omap_write_buf_irq_pref - write buffer to NAND controller
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @buf: data buffer
  * @len: number of bytes to write
  */
-static void omap_write_buf_irq_pref(struct mtd_info *mtd,
-					const u_char *buf, int len)
+static void omap_write_buf_irq_pref(struct nand_chip *chip, const u_char *buf,
+				    int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct omap_nand_info *info = mtd_to_omap(mtd);
 	int ret = 0;
 	unsigned long tim, limit;
 	u32 val;
 
 	if (len <= mtd->oobsize) {
-		omap_write_buf_pref(mtd, buf, len);
+		omap_write_buf_pref(chip, buf, len);
 		return;
 	}
 
@@ -1537,7 +1541,7 @@ static int omap_write_page_bch(struct nand_chip *chip, const uint8_t *buf,
 	chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 	/* Write data */
-	chip->write_buf(mtd, buf, mtd->writesize);
+	chip->write_buf(chip, buf, mtd->writesize);
 
 	/* Update ecc vector from GPMC result registers */
 	omap_calculate_ecc_bch_multi(mtd, buf, &ecc_calc[0]);
@@ -1548,7 +1552,7 @@ static int omap_write_page_bch(struct nand_chip *chip, const uint8_t *buf,
 		return ret;
 
 	/* Write ecc vector to OOB area */
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -1589,7 +1593,7 @@ static int omap_write_subpage_bch(struct nand_chip *chip, u32 offset,
 	chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 	/* Write data */
-	chip->write_buf(mtd, buf, mtd->writesize);
+	chip->write_buf(chip, buf, mtd->writesize);
 
 	for (step = 0; step < ecc_steps; step++) {
 		/* mask ECC of un-touched subpages by padding 0xFF */
@@ -1614,7 +1618,7 @@ static int omap_write_subpage_bch(struct nand_chip *chip, u32 offset,
 		return ret;
 
 	/* write OOB buffer to NAND device */
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index 6156abb30b7a..93c04bec471d 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -52,9 +52,9 @@ static void oxnas_nand_read_buf(struct nand_chip *chip, u8 *buf, int len)
 	ioread8_rep(oxnas->io_base, buf, len);
 }
 
-static void oxnas_nand_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+static void oxnas_nand_write_buf(struct nand_chip *chip, const u8 *buf,
+				 int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct oxnas_nand_ctrl *oxnas = nand_get_controller_data(chip);
 
 	iowrite8_rep(oxnas->io_base, buf, len);
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index 551e5db670be..70aff4180ab7 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -53,10 +53,9 @@ static void pasemi_read_buf(struct nand_chip *chip, u_char *buf, int len)
 	memcpy_fromio(buf, chip->IO_ADDR_R, len);
 }
 
-static void pasemi_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
+static void pasemi_write_buf(struct nand_chip *chip, const u_char *buf,
+			     int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	while (len > 0x800) {
 		memcpy_toio(chip->IO_ADDR_R, buf, 0x800);
 		buf += 0x800;
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 5193806923ba..adfc3f50e8d5 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -44,14 +44,6 @@ static void plat_nand_select_chip(struct mtd_info *mtd, int cs)
 	pdata->ctrl.select_chip(mtd_to_nand(mtd), cs);
 }
 
-static void plat_nand_write_buf(struct mtd_info *mtd, const uint8_t *buf,
-				int len)
-{
-	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
-
-	pdata->ctrl.write_buf(mtd_to_nand(mtd), buf, len);
-}
-
 /*
  * Probe for the NAND device.
  */
@@ -101,9 +93,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 	if (pdata->ctrl.select_chip)
 		data->chip.select_chip = plat_nand_select_chip;
 
-	if (pdata->ctrl.write_buf)
-		data->chip.write_buf = plat_nand_write_buf;
-
+	data->chip.write_buf = pdata->ctrl.write_buf;
 	data->chip.read_buf = pdata->ctrl.read_buf;
 	data->chip.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 63bb9f3fe23b..af4908e26766 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2312,10 +2312,9 @@ static void qcom_nandc_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 	nandc->buf_start += real_len;
 }
 
-static void qcom_nandc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
+static void qcom_nandc_write_buf(struct nand_chip *chip, const uint8_t *buf,
 				 int len)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 	int real_len = min_t(size_t, len, nandc->buf_count - nandc->buf_start);
 
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 07055bd657cd..19f49ca1ed5b 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -232,9 +232,9 @@ static void r852_do_dma(struct r852_device *dev, uint8_t *buf, int do_read)
 /*
  * Program data lines of the nand chip to send data to it
  */
-static void r852_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void r852_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	struct r852_device *dev = r852_get_dev(mtd);
+	struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
 	uint32_t reg;
 
 	/* Don't allow any access to hardware if we suspect card removal */
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 54c86ec612dd..a420a84eaf51 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -696,16 +696,16 @@ static void s3c2440_nand_read_buf(struct nand_chip *this, u_char *buf, int len)
 	}
 }
 
-static void s3c2410_nand_write_buf(struct mtd_info *mtd, const u_char *buf,
+static void s3c2410_nand_write_buf(struct nand_chip *this, const u_char *buf,
 				   int len)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	writesb(this->IO_ADDR_W, buf, len);
 }
 
-static void s3c2440_nand_write_buf(struct mtd_info *mtd, const u_char *buf,
+static void s3c2440_nand_write_buf(struct nand_chip *this, const u_char *buf,
 				   int len)
 {
+	struct mtd_info *mtd = nand_to_mtd(this);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 
 	writesl(info->regs + S3C2440_NFDATA, buf, len >> 2);
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index 6966a18f8ac4..742b7eb82ab7 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -628,7 +628,7 @@ static int flctl_write_page_hwecc(struct nand_chip *chip, const uint8_t *buf,
 	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
-	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
+	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
 	return nand_prog_page_end_op(chip);
 }
 
@@ -970,9 +970,9 @@ static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
 	}
 }
 
-static void flctl_write_buf(struct mtd_info *mtd, const uint8_t *buf, int len)
+static void flctl_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	struct sh_flctl *flctl = mtd_to_flctl(nand_to_mtd(chip));
 
 	memcpy(&flctl->done_buff[flctl->index], buf, len);
 	flctl->index += len;
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 007e37680b88..deedc1cd4dee 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -34,15 +34,14 @@ struct socrates_nand_host {
 
 /**
  * socrates_nand_write_buf -  write buffer to chip
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @buf:	data buffer
  * @len:	number of bytes to write
  */
-static void socrates_nand_write_buf(struct mtd_info *mtd,
-		const uint8_t *buf, int len)
+static void socrates_nand_write_buf(struct nand_chip *this, const uint8_t *buf,
+				    int len)
 {
 	int i;
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct socrates_nand_host *host = nand_get_controller_data(this);
 
 	for (i = 0; i < len; i++) {
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 2a0e624eca6c..80d9d2f8f5de 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -501,10 +501,9 @@ static void sunxi_nfc_read_buf(struct nand_chip *nand, uint8_t *buf, int len)
 	}
 }
 
-static void sunxi_nfc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
+static void sunxi_nfc_write_buf(struct nand_chip *nand, const uint8_t *buf,
 				int len)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
 	int ret;
@@ -760,7 +759,7 @@ static void sunxi_nfc_randomizer_write_buf(struct mtd_info *mtd,
 {
 	sunxi_nfc_randomizer_config(mtd, page, ecc);
 	sunxi_nfc_randomizer_enable(mtd);
-	sunxi_nfc_write_buf(mtd, buf, len);
+	sunxi_nfc_write_buf(mtd_to_nand(mtd), buf, len);
 	sunxi_nfc_randomizer_disable(mtd);
 }
 
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index 20d6fa983a6b..7fc95c6980a7 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -149,9 +149,9 @@ static void tango_read_buf(struct nand_chip *chip, u8 *buf, int len)
 	ioread8_rep(tchip->base + PBUS_DATA, buf, len);
 }
 
-static void tango_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+static void tango_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 {
-	struct tango_chip *tchip = to_tango_chip(mtd_to_nand(mtd));
+	struct tango_chip *tchip = to_tango_chip(chip);
 
 	iowrite8_rep(tchip->base + PBUS_DATA, buf, len);
 }
@@ -338,15 +338,13 @@ static void aux_read(struct nand_chip *chip, u8 **buf, int len, int *pos)
 
 static void aux_write(struct nand_chip *chip, const u8 **buf, int len, int *pos)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	*pos += len;
 
 	if (!*buf) {
 		/* skip over "len" bytes */
 		nand_change_write_column_op(chip, *pos, NULL, 0, false);
 	} else {
-		tango_write_buf(mtd, *buf, len);
+		tango_write_buf(chip, *buf, len);
 		*buf += len;
 	}
 }
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 570ea045fbce..d627d855b254 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -245,9 +245,9 @@ static u_char tmio_nand_read_byte(struct nand_chip *chip)
   *buffer functions.
  */
 static void
-tmio_nand_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
+tmio_nand_write_buf(struct nand_chip *chip, const u_char *buf, int len)
 {
-	struct tmio_nand *tmio = mtd_to_tmio(mtd);
+	struct tmio_nand *tmio = mtd_to_tmio(nand_to_mtd(chip));
 
 	tmio_iowrite16_rep(tmio->fcr + FCR_DATA, buf, len >> 1);
 }
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index c68b638c4fe8..b7ff8eca441b 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -109,10 +109,10 @@ static uint8_t txx9ndfmc_read_byte(struct nand_chip *chip)
 	return txx9ndfmc_read(dev, TXX9_NDFDTR);
 }
 
-static void txx9ndfmc_write_buf(struct mtd_info *mtd, const uint8_t *buf,
+static void txx9ndfmc_write_buf(struct nand_chip *chip, const uint8_t *buf,
 				int len)
 {
-	struct platform_device *dev = mtd_to_platdev(mtd);
+	struct platform_device *dev = mtd_to_platdev(nand_to_mtd(chip));
 	void __iomem *ndfdtr = ndregaddr(dev, TXX9_NDFDTR);
 	u32 mcr = txx9ndfmc_read(dev, TXX9_NDFMCR);
 
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index edbcfaa85ed8..77759f27d154 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -138,12 +138,12 @@ static void xway_read_buf(struct nand_chip *chip, u_char *buf, int len)
 		buf[i] = xway_readb(nand_to_mtd(chip), NAND_WRITE_DATA);
 }
 
-static void xway_write_buf(struct mtd_info *mtd, const u_char *buf, int len)
+static void xway_write_buf(struct nand_chip *chip, const u_char *buf, int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++)
-		xway_writeb(mtd, NAND_WRITE_DATA, buf[i]);
+		xway_writeb(nand_to_mtd(chip), NAND_WRITE_DATA, buf[i]);
 }
 
 /*
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 644c91ff2734..7e9ee17a389b 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -724,9 +724,9 @@ static int spinand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 	return 0;
 }
 
-static void spinand_write_buf(struct mtd_info *mtd, const u8 *buf, int len)
+static void spinand_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 {
-	struct spinand_state *state = mtd_to_state(mtd);
+	struct spinand_state *state = mtd_to_state(nand_to_mtd(chip));
 
 	memcpy(state->buf + state->buf_ptr, buf, len);
 	state->buf_ptr += len;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index f324a82fe6a2..cd94cb3b9c2e 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1284,8 +1284,8 @@ struct nand_chip {
 	void __iomem *IO_ADDR_W;
 
 	uint8_t (*read_byte)(struct nand_chip *chip);
-	void (*write_byte)(struct mtd_info *mtd, uint8_t byte);
-	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
+	void (*write_byte)(struct nand_chip *chip, uint8_t byte);
+	void (*write_buf)(struct nand_chip *chip, const uint8_t *buf, int len);
 	void (*read_buf)(struct nand_chip *chip, uint8_t *buf, int len);
 	void (*select_chip)(struct mtd_info *mtd, int chip);
 	int (*block_bad)(struct mtd_info *mtd, loff_t ofs);
-- 
cgit v1.2.3


From 758b56f58b66bebc5bc2e0e180e1904aafa2b523 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:24 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->select_chip()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->select_chip() hook.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c     |  9 ++-
 drivers/mtd/nand/raw/au1550nd.c                  |  4 +-
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  4 +-
 drivers/mtd/nand/raw/cafe_nand.c                 |  3 +-
 drivers/mtd/nand/raw/davinci_nand.c              |  4 +-
 drivers/mtd/nand/raw/denali.c                    |  6 +-
 drivers/mtd/nand/raw/diskonchip.c                | 11 ++-
 drivers/mtd/nand/raw/docg4.c                     |  3 +-
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |  2 +-
 drivers/mtd/nand/raw/fsl_ifc_nand.c              |  2 +-
 drivers/mtd/nand/raw/fsl_upm.c                   |  4 +-
 drivers/mtd/nand/raw/fsmc_nand.c                 |  4 +-
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       | 20 +++---
 drivers/mtd/nand/raw/hisi504_nand.c              |  3 +-
 drivers/mtd/nand/raw/jz4740_nand.c               |  9 ++-
 drivers/mtd/nand/raw/jz4780_nand.c               |  4 +-
 drivers/mtd/nand/raw/marvell_nand.c              |  3 +-
 drivers/mtd/nand/raw/mpc5121_nfc.c               | 12 ++--
 drivers/mtd/nand/raw/mtk_nand.c                  |  5 +-
 drivers/mtd/nand/raw/mxc_nand.c                  |  8 +--
 drivers/mtd/nand/raw/nand_base.c                 | 86 ++++++++++++------------
 drivers/mtd/nand/raw/ndfc.c                      |  3 +-
 drivers/mtd/nand/raw/plat_nand.c                 | 11 +--
 drivers/mtd/nand/raw/qcom_nandc.c                |  3 +-
 drivers/mtd/nand/raw/r852.c                      |  5 +-
 drivers/mtd/nand/raw/s3c2410.c                   |  5 +-
 drivers/mtd/nand/raw/sh_flctl.c                  |  4 +-
 drivers/mtd/nand/raw/sunxi_nand.c                |  4 +-
 drivers/mtd/nand/raw/tango_nand.c                |  3 +-
 drivers/mtd/nand/raw/tegra_nand.c                |  3 +-
 drivers/mtd/nand/raw/vf610_nfc.c                 |  8 +--
 drivers/mtd/nand/raw/xway_nand.c                 |  3 +-
 drivers/staging/mt29f_spinand/mt29f_spinand.c    |  2 +-
 include/linux/mtd/rawnand.h                      |  2 +-
 34 files changed, 117 insertions(+), 145 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 84ddede5ede4..5c8ef476ed47 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -483,9 +483,8 @@ static int atmel_nand_dev_ready(struct mtd_info *mtd)
 	return gpiod_get_value(nand->activecs->rb.gpio);
 }
 
-static void atmel_nand_select_chip(struct mtd_info *mtd, int cs)
+static void atmel_nand_select_chip(struct nand_chip *chip, int cs)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 
 	if (cs < 0 || cs >= nand->numcs) {
@@ -514,15 +513,15 @@ static int atmel_hsmc_nand_dev_ready(struct mtd_info *mtd)
 	return status & ATMEL_HSMC_NFC_SR_RBEDGE(nand->activecs->rb.id);
 }
 
-static void atmel_hsmc_nand_select_chip(struct mtd_info *mtd, int cs)
+static void atmel_hsmc_nand_select_chip(struct nand_chip *chip, int cs)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_hsmc_nand_controller *nc;
 
 	nc = to_hsmc_nand_controller(chip->controller);
 
-	atmel_nand_select_chip(mtd, cs);
+	atmel_nand_select_chip(chip, cs);
 
 	if (!nand->activecs) {
 		regmap_write(nc->base.smc, ATMEL_HSMC_NFC_CTRL,
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index f1cc9f672262..1bae3b2779aa 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -227,10 +227,10 @@ int au1550_device_ready(struct mtd_info *mtd)
  *	chip needs it to be asserted during chip not ready time but the NAND
  *	controller keeps it released.
  *
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @chip:	chipnumber to select, -1 for deselect
  */
-static void au1550_select_chip(struct mtd_info *mtd, int chip)
+static void au1550_select_chip(struct nand_chip *this, int chip)
 {
 }
 
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 83eec2812aa0..c8e30b0308bc 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -191,8 +191,8 @@ static void bcm47xxnflash_ops_bcm4706_cmd_ctrl(struct mtd_info *mtd, int cmd,
 }
 
 /* Default nand_select_chip calls cmd_ctrl, which is not used in BCM4706 */
-static void bcm47xxnflash_ops_bcm4706_select_chip(struct mtd_info *mtd,
-						  int chip)
+static void bcm47xxnflash_ops_bcm4706_select_chip(struct nand_chip *chip,
+						  int cs)
 {
 	return;
 }
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index f801333161c9..e70a47aad538 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -314,9 +314,8 @@ static void cafe_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 	cafe_writel(cafe, cafe->ctl2, NAND_CTRL2);
 }
 
-static void cafe_select_chip(struct mtd_info *mtd, int chipnr)
+static void cafe_select_chip(struct nand_chip *chip, int chipnr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 
 	cafe_dev_dbg(&cafe->pdev->dev, "select_chip %d\n", chipnr);
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 02a2d3b05e34..85bc801424b0 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -118,9 +118,9 @@ static void nand_davinci_hwcontrol(struct mtd_info *mtd, int cmd,
 		iowrite8(cmd, nand->IO_ADDR_W);
 }
 
-static void nand_davinci_select_chip(struct mtd_info *mtd, int chip)
+static void nand_davinci_select_chip(struct nand_chip *nand, int chip)
 {
-	struct davinci_nand_info	*info = to_davinci_nand(mtd);
+	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(nand));
 
 	info->current_cs = info->vaddr;
 
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 101ffa11b606..e29ec95f24de 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -897,11 +897,11 @@ static int denali_write_page(struct nand_chip *chip, const uint8_t *buf,
 				page, 0, 1);
 }
 
-static void denali_select_chip(struct mtd_info *mtd, int chip)
+static void denali_select_chip(struct nand_chip *chip, int cs)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 
-	denali->active_bank = chip;
+	denali->active_bank = cs;
 }
 
 static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index a1bc2f7436db..4d7b00d066fe 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -85,7 +85,7 @@ static u_char empty_write_ecc[6] = { 0x4b, 0x00, 0xe2, 0x0e, 0x93, 0xf7 };
 
 static void doc200x_hwcontrol(struct mtd_info *mtd, int cmd,
 			      unsigned int bitmask);
-static void doc200x_select_chip(struct mtd_info *mtd, int chip);
+static void doc200x_select_chip(struct nand_chip *this, int chip);
 
 static int debug = 0;
 module_param(debug, int, 0);
@@ -371,7 +371,7 @@ static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr)
 	struct doc_priv *doc = nand_get_controller_data(this);
 	uint16_t ret;
 
-	doc200x_select_chip(mtd, nr);
+	doc200x_select_chip(this, nr);
 	doc200x_hwcontrol(mtd, NAND_CMD_READID,
 			  NAND_CTRL_CLE | NAND_CTRL_CHANGE);
 	doc200x_hwcontrol(mtd, 0, NAND_CTRL_ALE | NAND_CTRL_CHANGE);
@@ -559,9 +559,8 @@ static void doc2001plus_readbuf(struct nand_chip *this, u_char *buf, int len)
 		printk("\n");
 }
 
-static void doc2001plus_select_chip(struct mtd_info *mtd, int chip)
+static void doc2001plus_select_chip(struct nand_chip *this, int chip)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int floor = 0;
@@ -586,9 +585,9 @@ static void doc2001plus_select_chip(struct mtd_info *mtd, int chip)
 	doc->curfloor = floor;
 }
 
-static void doc200x_select_chip(struct mtd_info *mtd, int chip)
+static void doc200x_select_chip(struct nand_chip *this, int chip)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(this);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int floor = 0;
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index 9cbe87448e77..78c1d6fd42b2 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -333,13 +333,12 @@ static int docg4_wait(struct mtd_info *mtd, struct nand_chip *nand)
 	return status;
 }
 
-static void docg4_select_chip(struct mtd_info *mtd, int chip)
+static void docg4_select_chip(struct nand_chip *nand, int chip)
 {
 	/*
 	 * Select among multiple cascaded chips ("floors").  Multiple floors are
 	 * not yet supported, so the only valid non-negative value is 0.
 	 */
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct docg4_priv *doc = nand_get_controller_data(nand);
 	void __iomem *docptr = doc->virtadr;
 
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 14d246323e94..74b804a61f2d 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -533,7 +533,7 @@ static void fsl_elbc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 	}
 }
 
-static void fsl_elbc_select_chip(struct mtd_info *mtd, int chip)
+static void fsl_elbc_select_chip(struct nand_chip *chip, int cs)
 {
 	/* The hardware does not seem to support multiple
 	 * chips per bank.
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 2e032db997a5..da846ffa3e5c 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -509,7 +509,7 @@ static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
 	}
 }
 
-static void fsl_ifc_select_chip(struct mtd_info *mtd, int chip)
+static void fsl_ifc_select_chip(struct nand_chip *chip, int cs)
 {
 	/* The hardware does not seem to support multiple
 	 * chips per bank.
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index d3d3adcb7282..ec3553cb737a 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -108,9 +108,9 @@ static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 		fun_wait_rnb(fun);
 }
 
-static void fun_select_chip(struct mtd_info *mtd, int mchip_nr)
+static void fun_select_chip(struct nand_chip *chip, int mchip_nr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct fsl_upm_nand *fun = to_fsl_upm_nand(mtd);
 
 	if (mchip_nr == -1) {
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index 5fc036c89cc8..15bf533c907a 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -610,9 +610,9 @@ static void fsmc_write_buf_dma(struct mtd_info *mtd, const uint8_t *buf,
 }
 
 /* fsmc_select_chip - assert or deassert nCE */
-static void fsmc_select_chip(struct mtd_info *mtd, int chipnr)
+static void fsmc_select_chip(struct nand_chip *chip, int chipnr)
 {
-	struct fsmc_nand_data *host = mtd_to_fsmc(mtd);
+	struct fsmc_nand_data *host = mtd_to_fsmc(nand_to_mtd(chip));
 	u32 pc;
 
 	/* Support only one CS */
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index bd1b8445b358..f5f1aebf0d64 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -825,9 +825,8 @@ static int gpmi_dev_ready(struct mtd_info *mtd)
 	return gpmi_is_ready(this, this->current_chip);
 }
 
-static void gpmi_select_chip(struct mtd_info *mtd, int chipnr)
+static void gpmi_select_chip(struct nand_chip *chip, int chipnr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	int ret;
 
@@ -1552,7 +1551,7 @@ static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	int column, page, chipnr;
 
 	chipnr = (int)(ofs >> chip->chip_shift);
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 
 	column = !GPMI_IS_MX23(this) ? mtd->writesize : 0;
 
@@ -1565,7 +1564,7 @@ static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs)
 
 	ret = nand_prog_page_op(chip, page, column, block_mark, 1);
 
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 
 	return ret;
 }
@@ -1602,7 +1601,6 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this)
 	struct boot_rom_geometry *rom_geo = &this->rom_geometry;
 	struct device *dev = this->dev;
 	struct nand_chip *chip = &this->nand;
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	unsigned int search_area_size_in_strides;
 	unsigned int stride;
 	unsigned int page;
@@ -1614,7 +1612,7 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this)
 	search_area_size_in_strides = 1 << rom_geo->search_area_stride_exponent;
 
 	saved_chip_number = this->current_chip;
-	chip->select_chip(mtd, 0);
+	chip->select_chip(chip, 0);
 
 	/*
 	 * Loop through the first search area, looking for the NCB fingerprint.
@@ -1642,7 +1640,7 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this)
 
 	}
 
-	chip->select_chip(mtd, saved_chip_number);
+	chip->select_chip(chip, saved_chip_number);
 
 	if (found_an_ncb_fingerprint)
 		dev_dbg(dev, "\tFound a fingerprint\n");
@@ -1685,7 +1683,7 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this)
 
 	/* Select chip 0. */
 	saved_chip_number = this->current_chip;
-	chip->select_chip(mtd, 0);
+	chip->select_chip(chip, 0);
 
 	/* Loop over blocks in the first search area, erasing them. */
 	dev_dbg(dev, "Erasing the search area...\n");
@@ -1717,7 +1715,7 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this)
 	}
 
 	/* Deselect chip 0. */
-	chip->select_chip(mtd, saved_chip_number);
+	chip->select_chip(chip, saved_chip_number);
 	return 0;
 }
 
@@ -1766,10 +1764,10 @@ static int mx23_boot_init(struct gpmi_nand_data  *this)
 		byte = block <<  chip->phys_erase_shift;
 
 		/* Send the command to read the conventional block mark. */
-		chip->select_chip(mtd, chipnr);
+		chip->select_chip(chip, chipnr);
 		nand_read_page_op(chip, page, mtd->writesize, NULL, 0);
 		block_mark = chip->read_byte(chip);
-		chip->select_chip(mtd, -1);
+		chip->select_chip(chip, -1);
 
 		/*
 		 * Check if the block is marked bad. If so, we need to mark it
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index b4e5bfd30022..86dd7b54159d 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -353,9 +353,8 @@ static int hisi_nfc_send_cmd_reset(struct hinfc_host *host, int chipselect)
 	return 0;
 }
 
-static void hisi_nfc_select_chip(struct mtd_info *mtd, int chipselect)
+static void hisi_nfc_select_chip(struct nand_chip *chip, int chipselect)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct hinfc_host *host = nand_get_controller_data(chip);
 
 	if (chipselect < 0)
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index e926ed6ed296..b6e68048b83d 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -78,10 +78,9 @@ static inline struct jz_nand *mtd_to_jz_nand(struct mtd_info *mtd)
 	return container_of(mtd_to_nand(mtd), struct jz_nand, chip);
 }
 
-static void jz_nand_select_chip(struct mtd_info *mtd, int chipnr)
+static void jz_nand_select_chip(struct nand_chip *chip, int chipnr)
 {
-	struct jz_nand *nand = mtd_to_jz_nand(mtd);
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
 	uint32_t ctrl;
 	int banknr;
 
@@ -336,14 +335,14 @@ static int jz_nand_detect_bank(struct platform_device *pdev,
 			goto notfound_id;
 
 		/* Retrieve the IDs from the first chip. */
-		chip->select_chip(mtd, 0);
+		chip->select_chip(chip, 0);
 		nand_reset_op(chip);
 		nand_readid_op(chip, 0, id, sizeof(id));
 		*nand_maf_id = id[0];
 		*nand_dev_id = id[1];
 	} else {
 		/* Detect additional chip. */
-		chip->select_chip(mtd, chipnr);
+		chip->select_chip(chip, chipnr);
 		nand_reset_op(chip);
 		nand_readid_op(chip, 0, id, sizeof(id));
 		if (*nand_maf_id != id[0] || *nand_dev_id != id[1]) {
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 42c5dcdea4a9..29e597b0ca59 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -71,9 +71,9 @@ static inline struct jz4780_nand_controller
 	return container_of(ctrl, struct jz4780_nand_controller, controller);
 }
 
-static void jz4780_nand_select_chip(struct mtd_info *mtd, int chipnr)
+static void jz4780_nand_select_chip(struct nand_chip *chip, int chipnr)
 {
-	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(mtd);
+	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(nand_to_mtd(chip));
 	struct jz4780_nand_controller *nfc = to_jz4780_nand_controller(nand->chip.controller);
 	struct jz4780_nand_cs *cs;
 
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 5f5709c2e58e..1f4e75f8fa3e 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -701,9 +701,8 @@ static int marvell_nfc_wait_op(struct nand_chip *chip, unsigned int timeout_ms)
 	return 0;
 }
 
-static void marvell_nfc_select_chip(struct mtd_info *mtd, int die_nr)
+static void marvell_nfc_select_chip(struct nand_chip *chip, int die_nr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip);
 	struct marvell_nfc *nfc = to_marvell_nfc(chip->controller);
 	u32 ndcr_generic;
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index dc573a0b5fe1..c2002c4d467b 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -263,8 +263,10 @@ static void mpc5121_nfc_addr_cycle(struct mtd_info *mtd, int column, int page)
 }
 
 /* Control chip select signals */
-static void mpc5121_nfc_select_chip(struct mtd_info *mtd, int chip)
+static void mpc5121_nfc_select_chip(struct nand_chip *nand, int chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(nand);
+
 	if (chip < 0) {
 		nfc_clear(mtd, NFC_CONFIG1, NFC_CE);
 		return;
@@ -299,9 +301,9 @@ static int ads5121_chipselect_init(struct mtd_info *mtd)
 }
 
 /* Control chips select signal on ADS5121 board */
-static void ads5121_select_chip(struct mtd_info *mtd, int chip)
+static void ads5121_select_chip(struct nand_chip *nand, int chip)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand);
 	struct mpc5121_nfc_prv *prv = nand_get_controller_data(nand);
 	u8 v;
 
@@ -309,10 +311,10 @@ static void ads5121_select_chip(struct mtd_info *mtd, int chip)
 	v |= 0x0F;
 
 	if (chip >= 0) {
-		mpc5121_nfc_select_chip(mtd, 0);
+		mpc5121_nfc_select_chip(nand, 0);
 		v &= ~(1 << chip);
 	} else
-		mpc5121_nfc_select_chip(mtd, -1);
+		mpc5121_nfc_select_chip(nand, -1);
 
 	out_8(prv->csreg, v);
 }
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index bd2002a1fabd..6e5d4afd6b1a 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -389,16 +389,15 @@ static int mtk_nfc_hw_runtime_config(struct mtd_info *mtd)
 	return 0;
 }
 
-static void mtk_nfc_select_chip(struct mtd_info *mtd, int chip)
+static void mtk_nfc_select_chip(struct nand_chip *nand, int chip)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct mtk_nfc *nfc = nand_get_controller_data(nand);
 	struct mtk_nfc_nand_chip *mtk_nand = to_mtk_nand(nand);
 
 	if (chip < 0)
 		return;
 
-	mtk_nfc_hw_runtime_config(mtd);
+	mtk_nfc_hw_runtime_config(nand_to_mtd(nand));
 
 	nfi_writel(nfc, mtk_nand->sels[chip], NFI_CSEL);
 }
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index d5d8f8c16b60..d070ce461b69 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -136,7 +136,7 @@ struct mxc_nand_devtype_data {
 	void (*irq_control)(struct mxc_nand_host *, int);
 	u32 (*get_ecc_status)(struct mxc_nand_host *);
 	const struct mtd_ooblayout_ops *ooblayout;
-	void (*select_chip)(struct mtd_info *mtd, int chip);
+	void (*select_chip)(struct nand_chip *chip, int cs);
 	int (*setup_data_interface)(struct mtd_info *mtd, int csline,
 				    const struct nand_data_interface *conf);
 	void (*enable_hwecc)(struct nand_chip *chip, bool enable);
@@ -957,9 +957,8 @@ static void mxc_nand_read_buf(struct nand_chip *nand_chip, u_char *buf,
 
 /* This function is used by upper layer for select and
  * deselect of the NAND chip */
-static void mxc_nand_select_chip_v1_v3(struct mtd_info *mtd, int chip)
+static void mxc_nand_select_chip_v1_v3(struct nand_chip *nand_chip, int chip)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
 
 	if (chip == -1) {
@@ -978,9 +977,8 @@ static void mxc_nand_select_chip_v1_v3(struct mtd_info *mtd, int chip)
 	}
 }
 
-static void mxc_nand_select_chip_v2(struct mtd_info *mtd, int chip)
+static void mxc_nand_select_chip_v2(struct nand_chip *nand_chip, int chip)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
 
 	if (chip == -1) {
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 6c20c0b805a3..3d3e3c704a5a 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -277,18 +277,17 @@ static uint8_t nand_read_byte16(struct nand_chip *chip)
 
 /**
  * nand_select_chip - [DEFAULT] control CE line
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @chipnr: chipnumber to select, -1 for deselect
  *
  * Default select function for 1 chip devices.
  */
-static void nand_select_chip(struct mtd_info *mtd, int chipnr)
+static void nand_select_chip(struct nand_chip *chip, int chipnr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	switch (chipnr) {
 	case -1:
-		chip->cmd_ctrl(mtd, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
+		chip->cmd_ctrl(nand_to_mtd(chip), NAND_CMD_NONE,
+			       0 | NAND_CTRL_CHANGE);
 		break;
 	case 0:
 		break;
@@ -1261,10 +1260,10 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr)
 
 	/* Change the mode on the chip side (if supported by the NAND chip) */
 	if (nand_supports_set_features(chip, ONFI_FEATURE_ADDR_TIMING_MODE)) {
-		chip->select_chip(mtd, chipnr);
+		chip->select_chip(chip, chipnr);
 		ret = nand_set_features(chip, ONFI_FEATURE_ADDR_TIMING_MODE,
 					tmode_param);
-		chip->select_chip(mtd, -1);
+		chip->select_chip(chip, -1);
 		if (ret)
 			return ret;
 	}
@@ -1279,10 +1278,10 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr)
 		return 0;
 
 	memset(tmode_param, 0, ONFI_SUBFEATURE_PARAM_LEN);
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 	ret = nand_get_features(chip, ONFI_FEATURE_ADDR_TIMING_MODE,
 				tmode_param);
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 	if (ret)
 		goto err_reset_chip;
 
@@ -1300,9 +1299,9 @@ err_reset_chip:
 	 * timing mode.
 	 */
 	nand_reset_data_interface(chip, chipnr);
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 	nand_reset_op(chip);
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 
 	return ret;
 }
@@ -2794,7 +2793,6 @@ EXPORT_SYMBOL_GPL(nand_subop_get_data_len);
  */
 int nand_reset(struct nand_chip *chip, int chipnr)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nand_data_interface saved_data_intf = chip->data_interface;
 	int ret;
 
@@ -2806,9 +2804,9 @@ int nand_reset(struct nand_chip *chip, int chipnr)
 	 * The CS line has to be released before we can apply the new NAND
 	 * interface settings, hence this weird ->select_chip() dance.
 	 */
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 	ret = nand_reset_op(chip);
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 	if (ret)
 		return ret;
 
@@ -3553,7 +3551,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 	bool ecc_fail = false;
 
 	chipnr = (int)(from >> chip->chip_shift);
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 
 	realpage = (int)(from >> chip->page_shift);
 	page = realpage & chip->pagemask;
@@ -3684,11 +3682,11 @@ read_retry:
 		/* Check, if we cross a chip boundary */
 		if (!page) {
 			chipnr++;
-			chip->select_chip(mtd, -1);
-			chip->select_chip(mtd, chipnr);
+			chip->select_chip(chip, -1);
+			chip->select_chip(chip, chipnr);
 		}
 	}
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 
 	ops->retlen = ops->len - (size_t) readlen;
 	if (oob)
@@ -3887,7 +3885,7 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 	len = mtd_oobavail(mtd, ops);
 
 	chipnr = (int)(from >> chip->chip_shift);
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 
 	/* Shift to get page */
 	realpage = (int)(from >> chip->page_shift);
@@ -3920,11 +3918,11 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 		/* Check, if we cross a chip boundary */
 		if (!page) {
 			chipnr++;
-			chip->select_chip(mtd, -1);
-			chip->select_chip(mtd, chipnr);
+			chip->select_chip(chip, -1);
+			chip->select_chip(chip, chipnr);
 		}
 	}
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 
 	ops->oobretlen = ops->ooblen - readlen;
 
@@ -4406,7 +4404,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 	column = to & (mtd->writesize - 1);
 
 	chipnr = (int)(to >> chip->chip_shift);
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 
 	/* Check, if it is write protected */
 	if (nand_check_wp(mtd)) {
@@ -4482,8 +4480,8 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		/* Check, if we cross a chip boundary */
 		if (!page) {
 			chipnr++;
-			chip->select_chip(mtd, -1);
-			chip->select_chip(mtd, chipnr);
+			chip->select_chip(chip, -1);
+			chip->select_chip(chip, chipnr);
 		}
 	}
 
@@ -4492,7 +4490,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 		ops->oobretlen = ops->ooblen;
 
 err_out:
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 	return ret;
 }
 
@@ -4518,7 +4516,7 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len,
 	/* Grab the device */
 	panic_nand_get_device(chip, mtd, FL_WRITING);
 
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 
 	/* Wait for the device to get ready */
 	panic_nand_wait(mtd, chip, 400);
@@ -4570,14 +4568,14 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 	 */
 	nand_reset(chip, chipnr);
 
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 
 	/* Shift to get page */
 	page = (int)(to >> chip->page_shift);
 
 	/* Check, if it is write protected */
 	if (nand_check_wp(mtd)) {
-		chip->select_chip(mtd, -1);
+		chip->select_chip(chip, -1);
 		return -EROFS;
 	}
 
@@ -4592,7 +4590,7 @@ static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
 	else
 		status = chip->ecc.write_oob(chip, page & chip->pagemask);
 
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 
 	if (status)
 		return status;
@@ -4700,7 +4698,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 	pages_per_block = 1 << (chip->phys_erase_shift - chip->page_shift);
 
 	/* Select the NAND device */
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 
 	/* Check, if it is write protected */
 	if (nand_check_wp(mtd)) {
@@ -4750,8 +4748,8 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		/* Check, if we cross a chip boundary */
 		if (len && !(page & chip->pagemask)) {
 			chipnr++;
-			chip->select_chip(mtd, -1);
-			chip->select_chip(mtd, chipnr);
+			chip->select_chip(chip, -1);
+			chip->select_chip(chip, chipnr);
 		}
 	}
 
@@ -4759,7 +4757,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 erase_exit:
 
 	/* Deselect and wake up anyone waiting on the device */
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 	nand_release_device(mtd);
 
 	/* Return more or less happy */
@@ -4795,11 +4793,11 @@ static int nand_block_isbad(struct mtd_info *mtd, loff_t offs)
 
 	/* Select the NAND device */
 	nand_get_device(mtd, FL_READING);
-	chip->select_chip(mtd, chipnr);
+	chip->select_chip(chip, chipnr);
 
 	ret = nand_block_checkbad(mtd, offs, 0);
 
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 	nand_release_device(mtd);
 
 	return ret;
@@ -5626,7 +5624,7 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 		return ret;
 
 	/* Select the device */
-	chip->select_chip(mtd, 0);
+	chip->select_chip(chip, 0);
 
 	/* Send the command for reading device ID */
 	ret = nand_readid_op(chip, 0, id_data, 2);
@@ -5986,14 +5984,14 @@ static int nand_scan_ident(struct nand_chip *chip, int maxchips,
 	if (ret) {
 		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
 			pr_warn("No NAND device found\n");
-		chip->select_chip(mtd, -1);
+		chip->select_chip(chip, -1);
 		return ret;
 	}
 
 	nand_maf_id = chip->id.data[0];
 	nand_dev_id = chip->id.data[1];
 
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 
 	/* Check for a chip array */
 	for (i = 1; i < maxchips; i++) {
@@ -6002,15 +6000,15 @@ static int nand_scan_ident(struct nand_chip *chip, int maxchips,
 		/* See comment in nand_get_flash_type for reset */
 		nand_reset(chip, i);
 
-		chip->select_chip(mtd, i);
+		chip->select_chip(chip, i);
 		/* Send the command for reading device ID */
 		nand_readid_op(chip, 0, id, sizeof(id));
 		/* Read manufacturer and device IDs */
 		if (nand_maf_id != id[0] || nand_dev_id != id[1]) {
-			chip->select_chip(mtd, -1);
+			chip->select_chip(chip, -1);
 			break;
 		}
-		chip->select_chip(mtd, -1);
+		chip->select_chip(chip, -1);
 	}
 	if (i > 1)
 		pr_info("%d chips detected\n", i);
@@ -6432,9 +6430,9 @@ static int nand_scan_tail(struct nand_chip *chip)
 	 * to explictly select the relevant die when interacting with the NAND
 	 * chip.
 	 */
-	chip->select_chip(mtd, 0);
+	chip->select_chip(chip, 0);
 	ret = nand_manufacturer_init(chip);
-	chip->select_chip(mtd, -1);
+	chip->select_chip(chip, -1);
 	if (ret)
 		goto err_free_buf;
 
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 02b102addeb5..addcc736ae1d 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -44,10 +44,9 @@ struct ndfc_controller {
 
 static struct ndfc_controller ndfc_ctrl[NDFC_MAX_CS];
 
-static void ndfc_select_chip(struct mtd_info *mtd, int chip)
+static void ndfc_select_chip(struct nand_chip *nchip, int chip)
 {
 	uint32_t ccr;
-	struct nand_chip *nchip = mtd_to_nand(mtd);
 	struct ndfc_controller *ndfc = nand_get_controller_data(nchip);
 
 	ccr = in_be32(ndfc->ndfcbase + NDFC_CCR);
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index adfc3f50e8d5..dd9e241b7584 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -37,13 +37,6 @@ static int plat_nand_dev_ready(struct mtd_info *mtd)
 	return pdata->ctrl.dev_ready(mtd_to_nand(mtd));
 }
 
-static void plat_nand_select_chip(struct mtd_info *mtd, int cs)
-{
-	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
-
-	pdata->ctrl.select_chip(mtd_to_nand(mtd), cs);
-}
-
 /*
  * Probe for the NAND device.
  */
@@ -90,9 +83,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 	if (pdata->ctrl.dev_ready)
 		data->chip.dev_ready = plat_nand_dev_ready;
 
-	if (pdata->ctrl.select_chip)
-		data->chip.select_chip = plat_nand_select_chip;
-
+	data->chip.select_chip = pdata->ctrl.select_chip;
 	data->chip.write_buf = pdata->ctrl.write_buf;
 	data->chip.read_buf = pdata->ctrl.read_buf;
 	data->chip.chip_delay = pdata->chip.chip_delay;
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index af4908e26766..626c9ab8c8db 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2324,9 +2324,8 @@ static void qcom_nandc_write_buf(struct nand_chip *chip, const uint8_t *buf,
 }
 
 /* we support only one external chip for now */
-static void qcom_nandc_select_chip(struct mtd_info *mtd, int chipnr)
+static void qcom_nandc_select_chip(struct nand_chip *chip, int chipnr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 
 	if (chipnr <= 0)
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 19f49ca1ed5b..312a971aa456 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -1026,7 +1026,6 @@ static int r852_suspend(struct device *device)
 static int r852_resume(struct device *device)
 {
 	struct r852_device *dev = pci_get_drvdata(to_pci_dev(device));
-	struct mtd_info *mtd = nand_to_mtd(dev->chip);
 
 	r852_disable_irqs(dev);
 	r852_card_update_present(dev);
@@ -1046,9 +1045,9 @@ static int r852_resume(struct device *device)
 	/* Otherwise, initialize the card */
 	if (dev->card_registred) {
 		r852_engine_enable(dev);
-		dev->chip->select_chip(mtd, 0);
+		dev->chip->select_chip(dev->chip, 0);
 		nand_reset_op(dev->chip);
-		dev->chip->select_chip(mtd, -1);
+		dev->chip->select_chip(dev->chip, -1);
 	}
 
 	/* Program card detection IRQ */
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index a420a84eaf51..353011e7fb79 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -404,7 +404,7 @@ static int s3c2410_nand_inithw(struct s3c2410_nand_info *info)
 
 /**
  * s3c2410_nand_select_chip - select the given nand chip
- * @mtd: The MTD instance for this chip.
+ * @this: NAND chip object.
  * @chip: The chip number.
  *
  * This is called by the MTD layer to either select a given chip for the
@@ -415,11 +415,10 @@ static int s3c2410_nand_inithw(struct s3c2410_nand_info *info)
  * platform specific selection code is called to route nFCE to the specific
  * chip.
  */
-static void s3c2410_nand_select_chip(struct mtd_info *mtd, int chip)
+static void s3c2410_nand_select_chip(struct nand_chip *this, int chip)
 {
 	struct s3c2410_nand_info *info;
 	struct s3c2410_nand_mtd *nmtd;
-	struct nand_chip *this = mtd_to_nand(mtd);
 	unsigned long cur;
 
 	nmtd = nand_get_controller_data(this);
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index 742b7eb82ab7..e2a4939971b5 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -926,9 +926,9 @@ runtime_exit:
 	return;
 }
 
-static void flctl_select_chip(struct mtd_info *mtd, int chipnr)
+static void flctl_select_chip(struct nand_chip *chip, int chipnr)
 {
-	struct sh_flctl *flctl = mtd_to_flctl(mtd);
+	struct sh_flctl *flctl = mtd_to_flctl(nand_to_mtd(chip));
 	int ret;
 
 	switch (chipnr) {
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 80d9d2f8f5de..97a0666df615 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -420,9 +420,9 @@ static int sunxi_nfc_dev_ready(struct mtd_info *mtd)
 	return !!(readl(nfc->regs + NFC_REG_ST) & mask);
 }
 
-static void sunxi_nfc_select_chip(struct mtd_info *mtd, int chip)
+static void sunxi_nfc_select_chip(struct nand_chip *nand, int chip)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand);
 	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
 	struct sunxi_nand_chip_sel *sel;
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index 7fc95c6980a7..5e0bc2993e5d 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -156,9 +156,8 @@ static void tango_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 	iowrite8_rep(tchip->base + PBUS_DATA, buf, len);
 }
 
-static void tango_select_chip(struct mtd_info *mtd, int idx)
+static void tango_select_chip(struct nand_chip *chip, int idx)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
 	struct tango_chip *tchip = to_tango_chip(chip);
 
diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c
index df8e78814a08..1088741eed1d 100644
--- a/drivers/mtd/nand/raw/tegra_nand.c
+++ b/drivers/mtd/nand/raw/tegra_nand.c
@@ -462,9 +462,8 @@ static int tegra_nand_exec_op(struct nand_chip *chip,
 				      check_only);
 }
 
-static void tegra_nand_select_chip(struct mtd_info *mtd, int die_nr)
+static void tegra_nand_select_chip(struct nand_chip *chip, int die_nr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct tegra_nand_chip *nand = to_tegra_chip(chip);
 	struct tegra_nand_controller *ctrl = to_tegra_ctrl(chip->controller);
 
diff --git a/drivers/mtd/nand/raw/vf610_nfc.c b/drivers/mtd/nand/raw/vf610_nfc.c
index bce6f6769cd6..9814fd4a84cf 100644
--- a/drivers/mtd/nand/raw/vf610_nfc.c
+++ b/drivers/mtd/nand/raw/vf610_nfc.c
@@ -498,9 +498,9 @@ static int vf610_nfc_exec_op(struct nand_chip *chip,
 /*
  * This function supports Vybrid only (MPC5125 would have full RB and four CS)
  */
-static void vf610_nfc_select_chip(struct mtd_info *mtd, int chip)
+static void vf610_nfc_select_chip(struct nand_chip *chip, int cs)
 {
-	struct vf610_nfc *nfc = mtd_to_nfc(mtd);
+	struct vf610_nfc *nfc = mtd_to_nfc(nand_to_mtd(chip));
 	u32 tmp = vf610_nfc_read(nfc, NFC_ROW_ADDR);
 
 	/* Vybrid only (MPC5125 would have full RB and four CS) */
@@ -509,9 +509,9 @@ static void vf610_nfc_select_chip(struct mtd_info *mtd, int chip)
 
 	tmp &= ~(ROW_ADDR_CHIP_SEL_RB_MASK | ROW_ADDR_CHIP_SEL_MASK);
 
-	if (chip >= 0) {
+	if (cs >= 0) {
 		tmp |= 1 << ROW_ADDR_CHIP_SEL_RB_SHIFT;
-		tmp |= BIT(chip) << ROW_ADDR_CHIP_SEL_SHIFT;
+		tmp |= BIT(cs) << ROW_ADDR_CHIP_SEL_SHIFT;
 	}
 
 	vf610_nfc_write(nfc, NFC_ROW_ADDR, tmp);
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index 77759f27d154..a6388fa1dce7 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -85,9 +85,8 @@ static void xway_writeb(struct mtd_info *mtd, int op, u8 value)
 	writeb(value, data->nandaddr + op);
 }
 
-static void xway_select_chip(struct mtd_info *mtd, int select)
+static void xway_select_chip(struct nand_chip *chip, int select)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct xway_nand_data *data = nand_get_controller_data(chip);
 
 	switch (select) {
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 7e9ee17a389b..c0df8b6ab19b 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -681,7 +681,7 @@ static int spinand_read_page_hwecc(struct nand_chip *chip, u8 *buf,
 }
 #endif
 
-static void spinand_select_chip(struct mtd_info *mtd, int dev)
+static void spinand_select_chip(struct nand_chip *chip, int dev)
 {
 }
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index cd94cb3b9c2e..65a25e89b426 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1287,7 +1287,7 @@ struct nand_chip {
 	void (*write_byte)(struct nand_chip *chip, uint8_t byte);
 	void (*write_buf)(struct nand_chip *chip, const uint8_t *buf, int len);
 	void (*read_buf)(struct nand_chip *chip, uint8_t *buf, int len);
-	void (*select_chip)(struct mtd_info *mtd, int chip);
+	void (*select_chip)(struct nand_chip *chip, int cs);
 	int (*block_bad)(struct mtd_info *mtd, loff_t ofs);
 	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
-- 
cgit v1.2.3


From c17556f545c0283f53561c8a38d5cd4e91a35fe5 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:25 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->block_xxx() hooks

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle all chip->block_xxx() hooks at once.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/cafe_nand.c           |  2 +-
 drivers/mtd/nand/raw/diskonchip.c          |  2 +-
 drivers/mtd/nand/raw/docg4.c               |  6 +++---
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c |  6 +++---
 drivers/mtd/nand/raw/nand_base.c           | 16 ++++++++--------
 drivers/mtd/nand/raw/nand_bbt.c            |  3 +--
 drivers/mtd/nand/raw/qcom_nandc.c          |  7 +++----
 drivers/mtd/nand/raw/sm_common.c           |  3 ++-
 include/linux/mtd/rawnand.h                |  4 ++--
 9 files changed, 24 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index e70a47aad538..af6870269f9c 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -546,7 +546,7 @@ static int cafe_nand_write_page_lowlevel(struct nand_chip *chip,
 	return nand_prog_page_end_op(chip);
 }
 
-static int cafe_nand_block_bad(struct mtd_info *mtd, loff_t ofs)
+static int cafe_nand_block_bad(struct nand_chip *chip, loff_t ofs)
 {
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 4d7b00d066fe..9cbcf020cabe 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -777,7 +777,7 @@ static int doc200x_dev_ready(struct mtd_info *mtd)
 	}
 }
 
-static int doc200x_block_bad(struct mtd_info *mtd, loff_t ofs)
+static int doc200x_block_bad(struct nand_chip *this, loff_t ofs)
 {
 	/* This is our last resort if we couldn't find or create a BBT.  Just
 	   pretend all blocks are good. */
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index 78c1d6fd42b2..9e6255408d49 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -1102,7 +1102,7 @@ static int __init read_factory_bbt(struct mtd_info *mtd)
 	return 0;
 }
 
-static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)
+static int docg4_block_markbad(struct nand_chip *nand, loff_t ofs)
 {
 	/*
 	 * Mark a block as bad.  Bad blocks are marked in the oob area of the
@@ -1115,7 +1115,7 @@ static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)
 
 	int ret, i;
 	uint8_t *buf;
-	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand);
 	struct docg4_priv *doc = nand_get_controller_data(nand);
 	struct nand_bbt_descr *bbtd = nand->badblock_pattern;
 	int page = (int)(ofs >> nand->page_shift);
@@ -1147,7 +1147,7 @@ static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	return ret;
 }
 
-static int docg4_block_neverbad(struct mtd_info *mtd, loff_t ofs)
+static int docg4_block_neverbad(struct nand_chip *nand, loff_t ofs)
 {
 	/* only called when module_param ignore_badblocks is set */
 	return 0;
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index f5f1aebf0d64..2dce9b62ebe7 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1542,9 +1542,9 @@ static int gpmi_ecc_write_oob_raw(struct nand_chip *chip, int page)
 	return gpmi_ecc_write_page_raw(chip, NULL, 1, page);
 }
 
-static int gpmi_block_markbad(struct mtd_info *mtd, loff_t ofs)
+static int gpmi_block_markbad(struct nand_chip *chip, loff_t ofs)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	int ret = 0;
 	uint8_t *block_mark;
@@ -1776,7 +1776,7 @@ static int mx23_boot_init(struct gpmi_nand_data  *this)
 		 */
 		if (block_mark != 0xff) {
 			dev_dbg(dev, "Transcribing mark in block %u\n", block);
-			ret = chip->block_markbad(mtd, byte);
+			ret = chip->block_markbad(chip, byte);
 			if (ret)
 				dev_err(dev,
 					"Failed to mark block bad with ret %d\n",
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 3d3e3c704a5a..add85235497e 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -398,15 +398,15 @@ static void nand_read_buf16(struct nand_chip *chip, uint8_t *buf, int len)
 
 /**
  * nand_block_bad - [DEFAULT] Read bad block marker from the chip
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @ofs: offset from device start
  *
  * Check, if the block is bad.
  */
-static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
+static int nand_block_bad(struct nand_chip *chip, loff_t ofs)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int page, page_end, res;
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	u8 bad;
 
 	if (chip->bbt_options & NAND_BBT_SCANLASTPAGE)
@@ -435,16 +435,16 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 
 /**
  * nand_default_block_markbad - [DEFAULT] mark a block bad via bad block marker
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @ofs: offset from device start
  *
  * This is the default implementation, which can be overridden by a hardware
  * specific driver. It provides the details for writing a bad block marker to a
  * block.
  */
-static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
+static int nand_default_block_markbad(struct nand_chip *chip, loff_t ofs)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct mtd_oob_ops ops;
 	uint8_t buf[2] = { 0, 0 };
 	int ret = 0, res, i = 0;
@@ -510,7 +510,7 @@ static int nand_block_markbad_lowlevel(struct mtd_info *mtd, loff_t ofs)
 
 		/* Write bad block marker to OOB */
 		nand_get_device(mtd, FL_WRITING);
-		ret = chip->block_markbad(mtd, ofs);
+		ret = chip->block_markbad(chip, ofs);
 		nand_release_device(mtd);
 	}
 
@@ -583,7 +583,7 @@ static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int allowbbt)
 	struct nand_chip *chip = mtd_to_nand(mtd);
 
 	if (!chip->bbt)
-		return chip->block_bad(mtd, ofs);
+		return chip->block_bad(chip, ofs);
 
 	/* Return info from the table */
 	return nand_isbad_bbt(mtd, ofs, allowbbt);
diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c
index 39db352f8757..76849a441518 100644
--- a/drivers/mtd/nand/raw/nand_bbt.c
+++ b/drivers/mtd/nand/raw/nand_bbt.c
@@ -683,14 +683,13 @@ static void mark_bbt_block_bad(struct nand_chip *this,
 			       struct nand_bbt_descr *td,
 			       int chip, int block)
 {
-	struct mtd_info *mtd = nand_to_mtd(this);
 	loff_t to;
 	int res;
 
 	bbt_mark_entry(this, block, BBT_BLOCK_WORN);
 
 	to = (loff_t)block << this->bbt_erase_shift;
-	res = this->block_markbad(mtd, to);
+	res = this->block_markbad(this, to);
 	if (res)
 		pr_warn("nand_bbt: error %d while marking block %d bad\n",
 			res, block);
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 626c9ab8c8db..c6eb205e0f76 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2196,9 +2196,9 @@ static int qcom_nandc_write_oob(struct nand_chip *chip, int page)
 	return nand_prog_page_end_op(chip);
 }
 
-static int qcom_nandc_block_bad(struct mtd_info *mtd, loff_t ofs)
+static int qcom_nandc_block_bad(struct nand_chip *chip, loff_t ofs)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
@@ -2234,9 +2234,8 @@ err:
 	return bad;
 }
 
-static int qcom_nandc_block_markbad(struct mtd_info *mtd, loff_t ofs)
+static int qcom_nandc_block_markbad(struct nand_chip *chip, loff_t ofs)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
diff --git a/drivers/mtd/nand/raw/sm_common.c b/drivers/mtd/nand/raw/sm_common.c
index 02ac6e9b2d16..bf143e0db787 100644
--- a/drivers/mtd/nand/raw/sm_common.c
+++ b/drivers/mtd/nand/raw/sm_common.c
@@ -99,8 +99,9 @@ static const struct mtd_ooblayout_ops oob_sm_small_ops = {
 	.free = oob_sm_small_ooblayout_free,
 };
 
-static int sm_block_markbad(struct mtd_info *mtd, loff_t ofs)
+static int sm_block_markbad(struct nand_chip *chip, loff_t ofs)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct mtd_oob_ops ops;
 	struct sm_oob oob;
 	int ret;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 65a25e89b426..0d8e2708e125 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1288,8 +1288,8 @@ struct nand_chip {
 	void (*write_buf)(struct nand_chip *chip, const uint8_t *buf, int len);
 	void (*read_buf)(struct nand_chip *chip, uint8_t *buf, int len);
 	void (*select_chip)(struct nand_chip *chip, int cs);
-	int (*block_bad)(struct mtd_info *mtd, loff_t ofs);
-	int (*block_markbad)(struct mtd_info *mtd, loff_t ofs);
+	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
+	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
 	int (*dev_ready)(struct mtd_info *mtd);
 	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
-- 
cgit v1.2.3


From 0f808c1602bc75c74399989d47842197118f7e72 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:26 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->cmd_ctrl()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->cmd_ctrl() hook.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/ams-delta.c                 |  4 +-
 drivers/mtd/nand/raw/atmel/nand-controller.c     |  6 +--
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  7 ++--
 drivers/mtd/nand/raw/brcmnand/brcmnand.c         |  4 +-
 drivers/mtd/nand/raw/cmx270_nand.c               |  3 +-
 drivers/mtd/nand/raw/cs553x_nand.c               |  3 +-
 drivers/mtd/nand/raw/davinci_nand.c              |  5 +--
 drivers/mtd/nand/raw/denali.c                    |  4 +-
 drivers/mtd/nand/raw/diskonchip.c                | 22 +++++------
 drivers/mtd/nand/raw/fsl_upm.c                   | 10 ++---
 drivers/mtd/nand/raw/gpio.c                      |  5 ++-
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       |  3 +-
 drivers/mtd/nand/raw/jz4740_nand.c               |  6 +--
 drivers/mtd/nand/raw/jz4780_nand.c               |  4 +-
 drivers/mtd/nand/raw/lpc32xx_mlc.c               |  3 +-
 drivers/mtd/nand/raw/lpc32xx_slc.c               |  5 +--
 drivers/mtd/nand/raw/mtk_nand.c                  |  5 ++-
 drivers/mtd/nand/raw/nand_base.c                 | 47 ++++++++++++------------
 drivers/mtd/nand/raw/nandsim.c                   |  3 +-
 drivers/mtd/nand/raw/ndfc.c                      |  3 +-
 drivers/mtd/nand/raw/omap2.c                     |  6 +--
 drivers/mtd/nand/raw/orion_nand.c                |  4 +-
 drivers/mtd/nand/raw/oxnas_nand.c                |  3 +-
 drivers/mtd/nand/raw/pasemi_nand.c               |  4 +-
 drivers/mtd/nand/raw/plat_nand.c                 | 11 +-----
 drivers/mtd/nand/raw/r852.c                      |  4 +-
 drivers/mtd/nand/raw/s3c2410.c                   |  6 ++-
 drivers/mtd/nand/raw/sharpsl.c                   |  5 +--
 drivers/mtd/nand/raw/socrates_nand.c             |  5 +--
 drivers/mtd/nand/raw/sunxi_nand.c                |  3 +-
 drivers/mtd/nand/raw/tango_nand.c                |  4 +-
 drivers/mtd/nand/raw/tmio_nand.c                 |  7 ++--
 drivers/mtd/nand/raw/txx9ndfmc.c                 |  3 +-
 drivers/mtd/nand/raw/xway_nand.c                 |  4 +-
 include/linux/mtd/rawnand.h                      |  2 +-
 35 files changed, 98 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index d742b9444429..8121d26194cf 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -113,7 +113,7 @@ static void ams_delta_read_buf(struct nand_chip *this, u_char *buf, int len)
  * NAND_CLE: bit 1 -> bit 7
  * NAND_ALE: bit 2 -> bit 6
  */
-static void ams_delta_hwcontrol(struct mtd_info *mtd, int cmd,
+static void ams_delta_hwcontrol(struct nand_chip *this, int cmd,
 				unsigned int ctrl)
 {
 
@@ -127,7 +127,7 @@ static void ams_delta_hwcontrol(struct mtd_info *mtd, int cmd,
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		ams_delta_write_byte(mtd_to_nand(mtd), cmd);
+		ams_delta_write_byte(this, cmd);
 }
 
 static int ams_delta_nand_ready(struct mtd_info *mtd)
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 5c8ef476ed47..f088bff06723 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -594,10 +594,9 @@ static int atmel_nfc_exec_op(struct atmel_hsmc_nand_controller *nc, bool poll)
 	return ret;
 }
 
-static void atmel_hsmc_nand_cmd_ctrl(struct mtd_info *mtd, int dat,
+static void atmel_hsmc_nand_cmd_ctrl(struct nand_chip *chip, int dat,
 				     unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_hsmc_nand_controller *nc;
 
@@ -621,10 +620,9 @@ static void atmel_hsmc_nand_cmd_ctrl(struct mtd_info *mtd, int dat,
 	}
 }
 
-static void atmel_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void atmel_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 				unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_nand_controller *nc;
 
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index c8e30b0308bc..d326f9d3648b 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -170,10 +170,9 @@ static void bcm47xxnflash_ops_bcm4706_write(struct mtd_info *mtd,
  * NAND chip ops
  **************************************************/
 
-static void bcm47xxnflash_ops_bcm4706_cmd_ctrl(struct mtd_info *mtd, int cmd,
-					       unsigned int ctrl)
+static void bcm47xxnflash_ops_bcm4706_cmd_ctrl(struct nand_chip *nand_chip,
+					       int cmd, unsigned int ctrl)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct bcm47xxnflash *b47n = nand_get_controller_data(nand_chip);
 	u32 code = 0;
 
@@ -229,7 +228,7 @@ static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct mtd_info *mtd,
 
 	switch (command) {
 	case NAND_CMD_RESET:
-		nand_chip->cmd_ctrl(mtd, command, NAND_CTRL_CLE);
+		nand_chip->cmd_ctrl(nand_chip, command, NAND_CTRL_CLE);
 
 		ndelay(100);
 		nand_wait_ready(nand_chip);
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index e24e77b27618..80f5b4b9ee75 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -1231,8 +1231,8 @@ static void brcmnand_send_cmd(struct brcmnand_host *host, int cmd)
  * NAND MTD API: read/program/erase
  ***********************************************************************/
 
-static void brcmnand_cmd_ctrl(struct mtd_info *mtd, int dat,
-	unsigned int ctrl)
+static void brcmnand_cmd_ctrl(struct nand_chip *chip, int dat,
+			      unsigned int ctrl)
 {
 	/* intentionally left blank */
 }
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index 4e5c8b7721ab..a0f0ad2da6f1 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -86,10 +86,9 @@ static void nand_cs_off(void)
 /*
  *	hardware specific access to control-lines
  */
-static void cmx270_hwcontrol(struct mtd_info *mtd, int dat,
+static void cmx270_hwcontrol(struct nand_chip *this, int dat,
 			     unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	unsigned int nandaddr = (unsigned int)this->IO_ADDR_W;
 
 	dsb();
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 442fa583db44..b7432f086f9b 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -129,10 +129,9 @@ static void cs553x_write_byte(struct nand_chip *this, u_char byte)
 	writeb(byte, this->IO_ADDR_W + 0x801);
 }
 
-static void cs553x_hwcontrol(struct mtd_info *mtd, int cmd,
+static void cs553x_hwcontrol(struct nand_chip *this, int cmd,
 			     unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	void __iomem *mmio_base = this->IO_ADDR_R;
 	if (ctrl & NAND_CTRL_CHANGE) {
 		unsigned char ctl = (ctrl & ~NAND_CTRL_CHANGE ) ^ 0x01;
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 85bc801424b0..c2a3ad10610c 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -97,12 +97,11 @@ static inline void davinci_nand_writel(struct davinci_nand_info *info,
  * Access to hardware control lines:  ALE, CLE, secondary chipselect.
  */
 
-static void nand_davinci_hwcontrol(struct mtd_info *mtd, int cmd,
+static void nand_davinci_hwcontrol(struct nand_chip *nand, int cmd,
 				   unsigned int ctrl)
 {
-	struct davinci_nand_info	*info = to_davinci_nand(mtd);
+	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(nand));
 	void __iomem			*addr = info->current_cs;
-	struct nand_chip		*nand = mtd_to_nand(mtd);
 
 	/* Did the control lines change? */
 	if (ctrl & NAND_CTRL_CHANGE) {
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index e29ec95f24de..6529780e31a4 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -274,9 +274,9 @@ static void denali_write_byte(struct nand_chip *chip, uint8_t byte)
 	denali_write_buf(chip, &byte, 1);
 }
 
-static void denali_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+static void denali_cmd_ctrl(struct nand_chip *chip, int dat, unsigned int ctrl)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 	uint32_t type;
 
 	if (ctrl & NAND_CLE)
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 9cbcf020cabe..16498b277764 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -83,7 +83,7 @@ static u_char empty_write_ecc[6] = { 0x4b, 0x00, 0xe2, 0x0e, 0x93, 0xf7 };
 #define DoC_is_Millennium(doc) ((doc)->ChipID == DOC_ChipID_DocMil)
 #define DoC_is_2000(doc) ((doc)->ChipID == DOC_ChipID_Doc2k)
 
-static void doc200x_hwcontrol(struct mtd_info *mtd, int cmd,
+static void doc200x_hwcontrol(struct nand_chip *this, int cmd,
 			      unsigned int bitmask);
 static void doc200x_select_chip(struct nand_chip *this, int chip);
 
@@ -372,10 +372,10 @@ static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr)
 	uint16_t ret;
 
 	doc200x_select_chip(this, nr);
-	doc200x_hwcontrol(mtd, NAND_CMD_READID,
+	doc200x_hwcontrol(this, NAND_CMD_READID,
 			  NAND_CTRL_CLE | NAND_CTRL_CHANGE);
-	doc200x_hwcontrol(mtd, 0, NAND_CTRL_ALE | NAND_CTRL_CHANGE);
-	doc200x_hwcontrol(mtd, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
+	doc200x_hwcontrol(this, 0, NAND_CTRL_ALE | NAND_CTRL_CHANGE);
+	doc200x_hwcontrol(this, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
 
 	/* We can't use dev_ready here, but at least we wait for the
 	 * command to complete
@@ -393,10 +393,10 @@ static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr)
 		} ident;
 		void __iomem *docptr = doc->virtadr;
 
-		doc200x_hwcontrol(mtd, NAND_CMD_READID,
+		doc200x_hwcontrol(this, NAND_CMD_READID,
 				  NAND_CTRL_CLE | NAND_CTRL_CHANGE);
-		doc200x_hwcontrol(mtd, 0, NAND_CTRL_ALE | NAND_CTRL_CHANGE);
-		doc200x_hwcontrol(mtd, NAND_CMD_NONE,
+		doc200x_hwcontrol(this, 0, NAND_CTRL_ALE | NAND_CTRL_CHANGE);
+		doc200x_hwcontrol(this, NAND_CMD_NONE,
 				  NAND_NCE | NAND_CTRL_CHANGE);
 
 		udelay(50);
@@ -587,7 +587,6 @@ static void doc2001plus_select_chip(struct nand_chip *this, int chip)
 
 static void doc200x_select_chip(struct nand_chip *this, int chip)
 {
-	struct mtd_info *mtd = nand_to_mtd(this);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 	int floor = 0;
@@ -602,12 +601,12 @@ static void doc200x_select_chip(struct nand_chip *this, int chip)
 	chip -= (floor * doc->chips_per_floor);
 
 	/* 11.4.4 -- deassert CE before changing chip */
-	doc200x_hwcontrol(mtd, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
+	doc200x_hwcontrol(this, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
 
 	WriteDOC(floor, docptr, FloorSelect);
 	WriteDOC(chip, docptr, CDSNDeviceSelect);
 
-	doc200x_hwcontrol(mtd, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
+	doc200x_hwcontrol(this, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
 
 	doc->curchip = chip;
 	doc->curfloor = floor;
@@ -615,10 +614,9 @@ static void doc200x_select_chip(struct nand_chip *this, int chip)
 
 #define CDSN_CTRL_MSK (CDSN_CTRL_CE | CDSN_CTRL_CLE | CDSN_CTRL_ALE)
 
-static void doc200x_hwcontrol(struct mtd_info *mtd, int cmd,
+static void doc200x_hwcontrol(struct nand_chip *this, int cmd,
 			      unsigned int ctrl)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index ec3553cb737a..7a2488c6c212 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -78,10 +78,9 @@ static void fun_wait_rnb(struct fsl_upm_nand *fun)
 	}
 }
 
-static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void fun_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-	struct fsl_upm_nand *fun = to_fsl_upm_nand(mtd);
+	struct fsl_upm_nand *fun = to_fsl_upm_nand(nand_to_mtd(chip));
 	u32 mar;
 
 	if (!(ctrl & fun->last_ctrl)) {
@@ -110,11 +109,10 @@ static void fun_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
 
 static void fun_select_chip(struct nand_chip *chip, int mchip_nr)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-	struct fsl_upm_nand *fun = to_fsl_upm_nand(mtd);
+	struct fsl_upm_nand *fun = to_fsl_upm_nand(nand_to_mtd(chip));
 
 	if (mchip_nr == -1) {
-		chip->cmd_ctrl(mtd, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
+		chip->cmd_ctrl(chip, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
 	} else if (mchip_nr >= 0 && mchip_nr < NAND_MAX_CHIPS) {
 		fun->mchip_number = mchip_nr;
 		chip->IO_ADDR_R = fun->io_base + fun->mchip_offsets[mchip_nr];
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 0e7d00faf33c..722a930ac836 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -73,9 +73,10 @@ static void gpio_nand_dosync(struct gpiomtd *gpiomtd)
 static inline void gpio_nand_dosync(struct gpiomtd *gpiomtd) {}
 #endif
 
-static void gpio_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void gpio_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
+			       unsigned int ctrl)
 {
-	struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+	struct gpiomtd *gpiomtd = gpio_nand_getpriv(nand_to_mtd(chip));
 
 	gpio_nand_dosync(gpiomtd);
 
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 2dce9b62ebe7..460f2f77a424 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -783,9 +783,8 @@ error_alloc:
 	return -ENOMEM;
 }
 
-static void gpmi_cmd_ctrl(struct mtd_info *mtd, int data, unsigned int ctrl)
+static void gpmi_cmd_ctrl(struct nand_chip *chip, int data, unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	int ret;
 
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index b6e68048b83d..7999e691e636 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -99,10 +99,10 @@ static void jz_nand_select_chip(struct nand_chip *chip, int chipnr)
 	nand->selected_bank = banknr;
 }
 
-static void jz_nand_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+static void jz_nand_cmd_ctrl(struct nand_chip *chip, int dat,
+			     unsigned int ctrl)
 {
-	struct jz_nand *nand = mtd_to_jz_nand(mtd);
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
 	uint32_t reg;
 	void __iomem *bank_base = nand->bank_base[nand->selected_bank];
 
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 29e597b0ca59..1d2cba546258 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -86,10 +86,10 @@ static void jz4780_nand_select_chip(struct nand_chip *chip, int chipnr)
 	nfc->selected = chipnr;
 }
 
-static void jz4780_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void jz4780_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 				 unsigned int ctrl)
 {
-	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(mtd);
+	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(nand_to_mtd(chip));
 	struct jz4780_nand_controller *nfc = to_jz4780_nand_controller(nand->chip.controller);
 	struct jz4780_nand_cs *cs;
 
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 79a02acb0517..0e989d944ddb 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -286,10 +286,9 @@ static void lpc32xx_nand_setup(struct lpc32xx_nand_host *host)
 /*
  * Hardware specific access to control lines
  */
-static void lpc32xx_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void lpc32xx_nand_cmd_ctrl(struct nand_chip *nand_chip, int cmd,
 				  unsigned int ctrl)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(nand_chip);
 
 	if (cmd != NAND_CMD_NONE) {
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index d04b30989041..e42584de875c 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -278,11 +278,10 @@ static void lpc32xx_nand_setup(struct lpc32xx_nand_host *host)
 /*
  * Hardware specific access to control lines
  */
-static void lpc32xx_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
-	unsigned int ctrl)
+static void lpc32xx_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
+				  unsigned int ctrl)
 {
 	uint32_t tmp;
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 
 	/* Does CE state need to be changed? */
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 6e5d4afd6b1a..6baa41483931 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -412,9 +412,10 @@ static int mtk_nfc_dev_ready(struct mtd_info *mtd)
 	return 1;
 }
 
-static void mtk_nfc_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+static void mtk_nfc_cmd_ctrl(struct nand_chip *chip, int dat,
+			     unsigned int ctrl)
 {
-	struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
+	struct mtk_nfc *nfc = nand_get_controller_data(chip);
 
 	if (ctrl & NAND_ALE) {
 		mtk_nfc_send_address(nfc, dat);
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index add85235497e..f0d70164a2f1 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -286,8 +286,7 @@ static void nand_select_chip(struct nand_chip *chip, int chipnr)
 {
 	switch (chipnr) {
 	case -1:
-		chip->cmd_ctrl(nand_to_mtd(chip), NAND_CMD_NONE,
-			       0 | NAND_CTRL_CHANGE);
+		chip->cmd_ctrl(chip, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
 		break;
 	case 0:
 		break;
@@ -760,11 +759,11 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 			column -= 256;
 			readcmd = NAND_CMD_READ1;
 		}
-		chip->cmd_ctrl(mtd, readcmd, ctrl);
+		chip->cmd_ctrl(chip, readcmd, ctrl);
 		ctrl &= ~NAND_CTRL_CHANGE;
 	}
 	if (command != NAND_CMD_NONE)
-		chip->cmd_ctrl(mtd, command, ctrl);
+		chip->cmd_ctrl(chip, command, ctrl);
 
 	/* Address cycle, when necessary */
 	ctrl = NAND_CTRL_ALE | NAND_CTRL_CHANGE;
@@ -774,17 +773,17 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 		if (chip->options & NAND_BUSWIDTH_16 &&
 				!nand_opcode_8bits(command))
 			column >>= 1;
-		chip->cmd_ctrl(mtd, column, ctrl);
+		chip->cmd_ctrl(chip, column, ctrl);
 		ctrl &= ~NAND_CTRL_CHANGE;
 	}
 	if (page_addr != -1) {
-		chip->cmd_ctrl(mtd, page_addr, ctrl);
+		chip->cmd_ctrl(chip, page_addr, ctrl);
 		ctrl &= ~NAND_CTRL_CHANGE;
-		chip->cmd_ctrl(mtd, page_addr >> 8, ctrl);
+		chip->cmd_ctrl(chip, page_addr >> 8, ctrl);
 		if (chip->options & NAND_ROW_ADDR_3)
-			chip->cmd_ctrl(mtd, page_addr >> 16, ctrl);
+			chip->cmd_ctrl(chip, page_addr >> 16, ctrl);
 	}
-	chip->cmd_ctrl(mtd, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
+	chip->cmd_ctrl(chip, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
 
 	/*
 	 * Program and erase have their own busy handlers status and sequential
@@ -806,9 +805,9 @@ static void nand_command(struct mtd_info *mtd, unsigned int command,
 		if (chip->dev_ready)
 			break;
 		udelay(chip->chip_delay);
-		chip->cmd_ctrl(mtd, NAND_CMD_STATUS,
+		chip->cmd_ctrl(chip, NAND_CMD_STATUS,
 			       NAND_CTRL_CLE | NAND_CTRL_CHANGE);
-		chip->cmd_ctrl(mtd,
+		chip->cmd_ctrl(chip,
 			       NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
 		/* EZ-NAND can take upto 250ms as per ONFi v4.0 */
 		nand_wait_status_ready(mtd, 250);
@@ -887,7 +886,7 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 
 	/* Command latch cycle */
 	if (command != NAND_CMD_NONE)
-		chip->cmd_ctrl(mtd, command,
+		chip->cmd_ctrl(chip, command,
 			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
 
 	if (column != -1 || page_addr != -1) {
@@ -899,23 +898,23 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 			if (chip->options & NAND_BUSWIDTH_16 &&
 					!nand_opcode_8bits(command))
 				column >>= 1;
-			chip->cmd_ctrl(mtd, column, ctrl);
+			chip->cmd_ctrl(chip, column, ctrl);
 			ctrl &= ~NAND_CTRL_CHANGE;
 
 			/* Only output a single addr cycle for 8bits opcodes. */
 			if (!nand_opcode_8bits(command))
-				chip->cmd_ctrl(mtd, column >> 8, ctrl);
+				chip->cmd_ctrl(chip, column >> 8, ctrl);
 		}
 		if (page_addr != -1) {
-			chip->cmd_ctrl(mtd, page_addr, ctrl);
-			chip->cmd_ctrl(mtd, page_addr >> 8,
+			chip->cmd_ctrl(chip, page_addr, ctrl);
+			chip->cmd_ctrl(chip, page_addr >> 8,
 				       NAND_NCE | NAND_ALE);
 			if (chip->options & NAND_ROW_ADDR_3)
-				chip->cmd_ctrl(mtd, page_addr >> 16,
+				chip->cmd_ctrl(chip, page_addr >> 16,
 					       NAND_NCE | NAND_ALE);
 		}
 	}
-	chip->cmd_ctrl(mtd, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
+	chip->cmd_ctrl(chip, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
 
 	/*
 	 * Program and erase have their own busy handlers status, sequential
@@ -942,9 +941,9 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 		if (chip->dev_ready)
 			break;
 		udelay(chip->chip_delay);
-		chip->cmd_ctrl(mtd, NAND_CMD_STATUS,
+		chip->cmd_ctrl(chip, NAND_CMD_STATUS,
 			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
-		chip->cmd_ctrl(mtd, NAND_CMD_NONE,
+		chip->cmd_ctrl(chip, NAND_CMD_NONE,
 			       NAND_NCE | NAND_CTRL_CHANGE);
 		/* EZ-NAND can take upto 250ms as per ONFi v4.0 */
 		nand_wait_status_ready(mtd, 250);
@@ -952,9 +951,9 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 
 	case NAND_CMD_RNDOUT:
 		/* No ready / busy check necessary */
-		chip->cmd_ctrl(mtd, NAND_CMD_RNDOUTSTART,
+		chip->cmd_ctrl(chip, NAND_CMD_RNDOUTSTART,
 			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
-		chip->cmd_ctrl(mtd, NAND_CMD_NONE,
+		chip->cmd_ctrl(chip, NAND_CMD_NONE,
 			       NAND_NCE | NAND_CTRL_CHANGE);
 
 		nand_ccs_delay(chip);
@@ -970,9 +969,9 @@ static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
 		if (column == -1 && page_addr == -1)
 			return;
 
-		chip->cmd_ctrl(mtd, NAND_CMD_READSTART,
+		chip->cmd_ctrl(chip, NAND_CMD_READSTART,
 			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
-		chip->cmd_ctrl(mtd, NAND_CMD_NONE,
+		chip->cmd_ctrl(chip, NAND_CMD_NONE,
 			       NAND_NCE | NAND_CTRL_CHANGE);
 
 		/* This applies to read commands */
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index 880ba12e07ba..a6b626c935a8 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2087,9 +2087,8 @@ static void ns_nand_write_byte(struct nand_chip *chip, u_char byte)
 	return;
 }
 
-static void ns_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int bitmask)
+static void ns_hwcontrol(struct nand_chip *chip, int cmd, unsigned int bitmask)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct nandsim *ns = nand_get_controller_data(chip);
 
 	ns->lines.cle = bitmask & NAND_CLE ? 1 : 0;
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index addcc736ae1d..05ac7bf94874 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -58,9 +58,8 @@ static void ndfc_select_chip(struct nand_chip *nchip, int chip)
 	out_be32(ndfc->ndfcbase + NDFC_CCR, ccr);
 }
 
-static void ndfc_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void ndfc_hwcontrol(struct nand_chip *chip, int cmd, unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct ndfc_controller *ndfc = nand_get_controller_data(chip);
 
 	if (cmd == NAND_CMD_NONE)
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 5a2bf1ed9c86..4bae782cd877 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -240,7 +240,7 @@ static int omap_prefetch_reset(int cs, struct omap_nand_info *info)
 
 /**
  * omap_hwcontrol - hardware specific access to control-lines
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @cmd: command to device
  * @ctrl:
  * NAND_NCE: bit 0 -> don't care
@@ -249,9 +249,9 @@ static int omap_prefetch_reset(int cs, struct omap_nand_info *info)
  *
  * NOTE: boards may use different bits for these!!
  */
-static void omap_hwcontrol(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void omap_hwcontrol(struct nand_chip *chip, int cmd, unsigned int ctrl)
 {
-	struct omap_nand_info *info = mtd_to_omap(mtd);
+	struct omap_nand_info *info = mtd_to_omap(nand_to_mtd(chip));
 
 	if (cmd != NAND_CMD_NONE) {
 		if (ctrl & NAND_CLE)
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 870eabe6fff8..92d8f249ee97 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -26,9 +26,9 @@ struct orion_nand_info {
 	struct clk *clk;
 };
 
-static void orion_nand_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void orion_nand_cmd_ctrl(struct nand_chip *nc, int cmd,
+				unsigned int ctrl)
 {
-	struct nand_chip *nc = mtd_to_nand(mtd);
 	struct orion_nand_data *board = nand_get_controller_data(nc);
 	u32 offs;
 
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index 93c04bec471d..ab32df146505 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -61,10 +61,9 @@ static void oxnas_nand_write_buf(struct nand_chip *chip, const u8 *buf,
 }
 
 /* Single CS command control */
-static void oxnas_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void oxnas_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 				unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct oxnas_nand_ctrl *oxnas = nand_get_controller_data(chip);
 
 	if (ctrl & NAND_CLE)
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index 70aff4180ab7..661ba57f2934 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -64,11 +64,9 @@ static void pasemi_write_buf(struct nand_chip *chip, const u_char *buf,
 	memcpy_toio(chip->IO_ADDR_R, buf, len);
 }
 
-static void pasemi_hwcontrol(struct mtd_info *mtd, int cmd,
+static void pasemi_hwcontrol(struct nand_chip *chip, int cmd,
 			     unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	if (cmd == NAND_CMD_NONE)
 		return;
 
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index dd9e241b7584..bfb5d8e7b00b 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -23,13 +23,6 @@ struct plat_nand_data {
 	void __iomem		*io_base;
 };
 
-static void plat_nand_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
-{
-	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
-
-	pdata->ctrl.cmd_ctrl(mtd_to_nand(mtd), dat, ctrl);
-}
-
 static int plat_nand_dev_ready(struct mtd_info *mtd)
 {
 	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
@@ -76,9 +69,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 
 	data->chip.IO_ADDR_R = data->io_base;
 	data->chip.IO_ADDR_W = data->io_base;
-
-	if (pdata->ctrl.cmd_ctrl)
-		data->chip.cmd_ctrl = plat_nand_cmd_ctrl;
+	data->chip.cmd_ctrl = pdata->ctrl.cmd_ctrl;
 
 	if (pdata->ctrl.dev_ready)
 		data->chip.dev_ready = plat_nand_dev_ready;
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 312a971aa456..e90549e031a7 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -317,9 +317,9 @@ static uint8_t r852_read_byte(struct nand_chip *chip)
 /*
  * Control several chip lines & send commands
  */
-static void r852_cmdctl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+static void r852_cmdctl(struct nand_chip *chip, int dat, unsigned int ctrl)
 {
-	struct r852_device *dev = r852_get_dev(mtd);
+	struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
 
 	if (dev->card_unstable)
 		return;
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 353011e7fb79..98ba94936631 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -456,9 +456,10 @@ static void s3c2410_nand_select_chip(struct nand_chip *this, int chip)
  * Issue command and address cycles to the chip
 */
 
-static void s3c2410_nand_hwcontrol(struct mtd_info *mtd, int cmd,
+static void s3c2410_nand_hwcontrol(struct nand_chip *chip, int cmd,
 				   unsigned int ctrl)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 
 	if (cmd == NAND_CMD_NONE)
@@ -472,9 +473,10 @@ static void s3c2410_nand_hwcontrol(struct mtd_info *mtd, int cmd,
 
 /* command and control functions */
 
-static void s3c2440_nand_hwcontrol(struct mtd_info *mtd, int cmd,
+static void s3c2440_nand_hwcontrol(struct nand_chip *chip, int cmd,
 				   unsigned int ctrl)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 
 	if (cmd == NAND_CMD_NONE)
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 4d931ce71af5..7486a00b1ae5 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -59,11 +59,10 @@ static inline struct sharpsl_nand *mtd_to_sharpsl(struct mtd_info *mtd)
  *	NAND_ALE: bit 2 -> bit 2
  *
  */
-static void sharpsl_nand_hwcontrol(struct mtd_info *mtd, int cmd,
+static void sharpsl_nand_hwcontrol(struct nand_chip *chip, int cmd,
 				   unsigned int ctrl)
 {
-	struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd);
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct sharpsl_nand *sharpsl = mtd_to_sharpsl(nand_to_mtd(chip));
 
 	if (ctrl & NAND_CTRL_CHANGE) {
 		unsigned char bits = ctrl & 0x07;
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index deedc1cd4dee..c44b19fc1350 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -87,10 +87,9 @@ static uint8_t socrates_nand_read_byte(struct nand_chip *this)
 /*
  * Hardware specific access to control-lines
  */
-static void socrates_nand_cmd_ctrl(struct mtd_info *mtd, int cmd,
-		unsigned int ctrl)
+static void socrates_nand_cmd_ctrl(struct nand_chip *nand_chip, int cmd,
+				   unsigned int ctrl)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct socrates_nand_host *host = nand_get_controller_data(nand_chip);
 	uint32_t val;
 
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 97a0666df615..1d85ff02afdb 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -547,10 +547,9 @@ static uint8_t sunxi_nfc_read_byte(struct nand_chip *nand)
 	return ret;
 }
 
-static void sunxi_nfc_cmd_ctrl(struct mtd_info *mtd, int dat,
+static void sunxi_nfc_cmd_ctrl(struct nand_chip *nand, int dat,
 			       unsigned int ctrl)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
 	int ret;
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index 5e0bc2993e5d..c8fb03f71a3b 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -116,9 +116,9 @@ struct tango_chip {
 
 #define TIMING(t0, t1, t2, t3) ((t0) << 24 | (t1) << 16 | (t2) << 8 | (t3))
 
-static void tango_cmd_ctrl(struct mtd_info *mtd, int dat, unsigned int ctrl)
+static void tango_cmd_ctrl(struct nand_chip *chip, int dat, unsigned int ctrl)
 {
-	struct tango_chip *tchip = to_tango_chip(mtd_to_nand(mtd));
+	struct tango_chip *tchip = to_tango_chip(chip);
 
 	if (ctrl & NAND_CLE)
 		writeb_relaxed(dat, tchip->base + PBUS_CMD);
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index d627d855b254..1221353b11a7 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -126,11 +126,10 @@ static inline struct tmio_nand *mtd_to_tmio(struct mtd_info *mtd)
 
 /*--------------------------------------------------------------------------*/
 
-static void tmio_nand_hwcontrol(struct mtd_info *mtd, int cmd,
-				   unsigned int ctrl)
+static void tmio_nand_hwcontrol(struct nand_chip *chip, int cmd,
+				unsigned int ctrl)
 {
-	struct tmio_nand *tmio = mtd_to_tmio(mtd);
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct tmio_nand *tmio = mtd_to_tmio(nand_to_mtd(chip));
 
 	if (ctrl & NAND_CTRL_CHANGE) {
 		u8 mode;
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index b7ff8eca441b..f3bce6fb1fac 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -131,10 +131,9 @@ static void txx9ndfmc_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 		*buf++ = __raw_readl(ndfdtr);
 }
 
-static void txx9ndfmc_cmd_ctrl(struct mtd_info *mtd, int cmd,
+static void txx9ndfmc_cmd_ctrl(struct nand_chip *chip, int cmd,
 			       unsigned int ctrl)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct txx9ndfmc_priv *txx9_priv = nand_get_controller_data(chip);
 	struct platform_device *dev = txx9_priv->dev;
 	struct txx9ndfmc_platform_data *plat = dev_get_platdata(&dev->dev);
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index a6388fa1dce7..3b38d31c59c6 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -105,8 +105,10 @@ static void xway_select_chip(struct nand_chip *chip, int select)
 	}
 }
 
-static void xway_cmd_ctrl(struct mtd_info *mtd, int cmd, unsigned int ctrl)
+static void xway_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
+
 	if (cmd == NAND_CMD_NONE)
 		return;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 0d8e2708e125..b53ccc7139c2 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1290,7 +1290,7 @@ struct nand_chip {
 	void (*select_chip)(struct nand_chip *chip, int cs);
 	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
-	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
+	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
 	int (*dev_ready)(struct mtd_info *mtd);
 	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
 			int page_addr);
-- 
cgit v1.2.3


From 50a487e7719caa07e951dfcfd983b2c5517e2f76 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:27 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->dev_ready()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->dev_ready() hook.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/ams-delta.c                 |  2 +-
 drivers/mtd/nand/raw/atmel/nand-controller.c     |  6 ++----
 drivers/mtd/nand/raw/au1550nd.c                  |  6 +++---
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  3 +--
 drivers/mtd/nand/raw/cafe_nand.c                 |  3 +--
 drivers/mtd/nand/raw/cmx270_nand.c               |  2 +-
 drivers/mtd/nand/raw/cs553x_nand.c               |  3 +--
 drivers/mtd/nand/raw/davinci_nand.c              |  4 ++--
 drivers/mtd/nand/raw/denali.c                    |  4 ++--
 drivers/mtd/nand/raw/diskonchip.c                |  5 ++---
 drivers/mtd/nand/raw/fsl_upm.c                   |  6 +++---
 drivers/mtd/nand/raw/gpio.c                      |  4 ++--
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       |  3 +--
 drivers/mtd/nand/raw/jz4740_nand.c               |  4 ++--
 drivers/mtd/nand/raw/jz4780_nand.c               |  4 ++--
 drivers/mtd/nand/raw/lpc32xx_mlc.c               |  3 +--
 drivers/mtd/nand/raw/lpc32xx_slc.c               |  3 +--
 drivers/mtd/nand/raw/mpc5121_nfc.c               |  2 +-
 drivers/mtd/nand/raw/mtk_nand.c                  |  4 ++--
 drivers/mtd/nand/raw/mxc_nand.c                  |  2 +-
 drivers/mtd/nand/raw/nand_base.c                 | 10 +++++-----
 drivers/mtd/nand/raw/nandsim.c                   |  2 +-
 drivers/mtd/nand/raw/ndfc.c                      |  3 +--
 drivers/mtd/nand/raw/nuc900_nand.c               |  6 +++---
 drivers/mtd/nand/raw/omap2.c                     |  4 ++--
 drivers/mtd/nand/raw/pasemi_nand.c               |  2 +-
 drivers/mtd/nand/raw/plat_nand.c                 | 12 +-----------
 drivers/mtd/nand/raw/r852.c                      |  6 +++---
 drivers/mtd/nand/raw/s3c2410.c                   |  9 ++++++---
 drivers/mtd/nand/raw/sharpsl.c                   |  4 ++--
 drivers/mtd/nand/raw/socrates_nand.c             |  3 +--
 drivers/mtd/nand/raw/sunxi_nand.c                |  3 +--
 drivers/mtd/nand/raw/tango_nand.c                |  3 +--
 drivers/mtd/nand/raw/tmio_nand.c                 |  8 ++++----
 drivers/mtd/nand/raw/txx9ndfmc.c                 |  4 ++--
 drivers/mtd/nand/raw/xway_nand.c                 |  2 +-
 include/linux/mtd/rawnand.h                      |  2 +-
 37 files changed, 68 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index 8121d26194cf..48413203dbc2 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -130,7 +130,7 @@ static void ams_delta_hwcontrol(struct nand_chip *this, int cmd,
 		ams_delta_write_byte(this, cmd);
 }
 
-static int ams_delta_nand_ready(struct mtd_info *mtd)
+static int ams_delta_nand_ready(struct nand_chip *this)
 {
 	return gpio_get_value(AMS_DELTA_GPIO_PIN_NAND_RB);
 }
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index f088bff06723..2dcd8aa0ce0b 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -475,9 +475,8 @@ static void atmel_nand_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 		iowrite8_rep(nand->activecs->io.virt, buf, len);
 }
 
-static int atmel_nand_dev_ready(struct mtd_info *mtd)
+static int atmel_nand_dev_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 
 	return gpiod_get_value(nand->activecs->rb.gpio);
@@ -499,9 +498,8 @@ static void atmel_nand_select_chip(struct nand_chip *chip, int cs)
 		chip->dev_ready = atmel_nand_dev_ready;
 }
 
-static int atmel_hsmc_nand_dev_ready(struct mtd_info *mtd)
+static int atmel_hsmc_nand_dev_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_hsmc_nand_controller *nc;
 	u32 status;
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 1bae3b2779aa..1f0fba8d87c6 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -213,7 +213,7 @@ static void au1550_hwcontrol(struct mtd_info *mtd, int cmd)
 	wmb(); /* Drain the writebuffer */
 }
 
-int au1550_device_ready(struct mtd_info *mtd)
+int au1550_device_ready(struct nand_chip *this)
 {
 	return (alchemy_rdsmem(AU1000_MEM_STSTAT) & 0x1) ? 1 : 0;
 }
@@ -341,7 +341,7 @@ static void au1550_command(struct mtd_info *mtd, unsigned command, int column, i
 		/* Apply a short delay always to ensure that we do wait tWB. */
 		ndelay(100);
 		/* Wait for a chip to become ready... */
-		for (i = this->chip_delay; !this->dev_ready(mtd) && i > 0; --i)
+		for (i = this->chip_delay; !this->dev_ready(this) && i > 0; --i)
 			udelay(1);
 
 		/* Release -CE and re-enable interrupts. */
@@ -352,7 +352,7 @@ static void au1550_command(struct mtd_info *mtd, unsigned command, int column, i
 	/* Apply this short delay always to ensure that we do wait tWB. */
 	ndelay(100);
 
-	while(!this->dev_ready(mtd));
+	while(!this->dev_ready(this));
 }
 
 static int find_nand_cs(unsigned long nand_base)
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index d326f9d3648b..f6f694b3cd8e 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -196,9 +196,8 @@ static void bcm47xxnflash_ops_bcm4706_select_chip(struct nand_chip *chip,
 	return;
 }
 
-static int bcm47xxnflash_ops_bcm4706_dev_ready(struct mtd_info *mtd)
+static int bcm47xxnflash_ops_bcm4706_dev_ready(struct nand_chip *nand_chip)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct bcm47xxnflash *b47n = nand_get_controller_data(nand_chip);
 
 	return !!(bcma_cc_read32(b47n->cc, BCMA_CC_NFLASH_CTL) & NCTL_READY);
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index af6870269f9c..60a2eecc2b2a 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -100,9 +100,8 @@ static const char *part_probes[] = { "cmdlinepart", "RedBoot", NULL };
 #define cafe_readl(cafe, addr)			readl((cafe)->mmio + CAFE_##addr)
 #define cafe_writel(cafe, datum, addr)		writel(datum, (cafe)->mmio + CAFE_##addr)
 
-static int cafe_device_ready(struct mtd_info *mtd)
+static int cafe_device_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 	int result = !!(cafe_readl(cafe, NAND_STATUS) & 0x40000000);
 	uint32_t irqs = cafe_readl(cafe, NAND_IRQ);
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index a0f0ad2da6f1..e8458036419b 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -119,7 +119,7 @@ static void cmx270_hwcontrol(struct nand_chip *this, int dat,
 /*
  *	read device ready pin
  */
-static int cmx270_device_ready(struct mtd_info *mtd)
+static int cmx270_device_ready(struct nand_chip *this)
 {
 	dsb();
 
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index b7432f086f9b..c1628c03282a 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -141,9 +141,8 @@ static void cs553x_hwcontrol(struct nand_chip *this, int cmd,
 		cs553x_write_byte(this, cmd);
 }
 
-static int cs553x_device_ready(struct mtd_info *mtd)
+static int cs553x_device_ready(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	void __iomem *mmio_base = this->IO_ADDR_R;
 	unsigned char foo = readb(mmio_base + MM_NAND_STS);
 
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index c2a3ad10610c..4b261c73b240 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -460,9 +460,9 @@ static void nand_davinci_write_buf(struct nand_chip *chip, const uint8_t *buf,
  * Check hardware register for wait status. Returns 1 if device is ready,
  * 0 if it is still busy.
  */
-static int nand_davinci_dev_ready(struct mtd_info *mtd)
+static int nand_davinci_dev_ready(struct nand_chip *chip)
 {
-	struct davinci_nand_info *info = to_davinci_nand(mtd);
+	struct davinci_nand_info *info = to_davinci_nand(nand_to_mtd(chip));
 
 	return davinci_nand_readl(info, NANDFSR_OFFSET) & BIT(0);
 }
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 6529780e31a4..7258dd13b3f9 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -296,9 +296,9 @@ static void denali_cmd_ctrl(struct nand_chip *chip, int dat, unsigned int ctrl)
 	denali->host_write(denali, DENALI_BANK(denali) | type, dat);
 }
 
-static int denali_dev_ready(struct mtd_info *mtd)
+static int denali_dev_ready(struct nand_chip *chip)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 
 	return !!(denali_check_irq(denali) & INTR__INT_ACT);
 }
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 16498b277764..e40a4e120c7b 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -739,12 +739,11 @@ static void doc2001plus_command(struct mtd_info *mtd, unsigned command, int colu
 	 * any case on any machine. */
 	ndelay(100);
 	/* wait until command is processed */
-	while (!this->dev_ready(mtd)) ;
+	while (!this->dev_ready(this)) ;
 }
 
-static int doc200x_dev_ready(struct mtd_info *mtd)
+static int doc200x_dev_ready(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index 7a2488c6c212..48c5215f9a0e 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -52,9 +52,9 @@ static inline struct fsl_upm_nand *to_fsl_upm_nand(struct mtd_info *mtdinfo)
 			    chip);
 }
 
-static int fun_chip_ready(struct mtd_info *mtd)
+static int fun_chip_ready(struct nand_chip *chip)
 {
-	struct fsl_upm_nand *fun = to_fsl_upm_nand(mtd);
+	struct fsl_upm_nand *fun = to_fsl_upm_nand(nand_to_mtd(chip));
 
 	if (gpio_get_value(fun->rnb_gpio[fun->mchip_number]))
 		return 1;
@@ -69,7 +69,7 @@ static void fun_wait_rnb(struct fsl_upm_nand *fun)
 		struct mtd_info *mtd = nand_to_mtd(&fun->chip);
 		int cnt = 1000000;
 
-		while (--cnt && !fun_chip_ready(mtd))
+		while (--cnt && !fun_chip_ready(&fun->chip))
 			cpu_relax();
 		if (!cnt)
 			dev_err(fun->dev, "tired waiting for RNB\n");
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 722a930ac836..273437c1ae6c 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -94,9 +94,9 @@ static void gpio_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 	gpio_nand_dosync(gpiomtd);
 }
 
-static int gpio_nand_devready(struct mtd_info *mtd)
+static int gpio_nand_devready(struct nand_chip *chip)
 {
-	struct gpiomtd *gpiomtd = gpio_nand_getpriv(mtd);
+	struct gpiomtd *gpiomtd = gpio_nand_getpriv(nand_to_mtd(chip));
 
 	return gpiod_get_value(gpiomtd->rdy);
 }
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 460f2f77a424..1ed594a155ed 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -816,9 +816,8 @@ static void gpmi_cmd_ctrl(struct nand_chip *chip, int data, unsigned int ctrl)
 	this->command_length = 0;
 }
 
-static int gpmi_dev_ready(struct mtd_info *mtd)
+static int gpmi_dev_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 
 	return gpmi_is_ready(this, this->current_chip);
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 7999e691e636..946a71cf816d 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -127,9 +127,9 @@ static void jz_nand_cmd_ctrl(struct nand_chip *chip, int dat,
 		writeb(dat, chip->IO_ADDR_W);
 }
 
-static int jz_nand_dev_ready(struct mtd_info *mtd)
+static int jz_nand_dev_ready(struct nand_chip *chip)
 {
-	struct jz_nand *nand = mtd_to_jz_nand(mtd);
+	struct jz_nand *nand = mtd_to_jz_nand(nand_to_mtd(chip));
 	return gpiod_get_value_cansleep(nand->busy_gpio);
 }
 
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 1d2cba546258..d54b2774f7f9 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -109,9 +109,9 @@ static void jz4780_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 		writeb(cmd, cs->base + OFFSET_CMD);
 }
 
-static int jz4780_nand_dev_ready(struct mtd_info *mtd)
+static int jz4780_nand_dev_ready(struct nand_chip *chip)
 {
-	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(mtd);
+	struct jz4780_nand_chip *nand = to_jz4780_nand_chip(nand_to_mtd(chip));
 
 	return !gpiod_get_value_cansleep(nand->busy_gpio);
 }
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 0e989d944ddb..726cd8868ac3 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -302,9 +302,8 @@ static void lpc32xx_nand_cmd_ctrl(struct nand_chip *nand_chip, int cmd,
 /*
  * Read Device Ready (NAND device _and_ controller ready)
  */
-static int lpc32xx_nand_device_ready(struct mtd_info *mtd)
+static int lpc32xx_nand_device_ready(struct nand_chip *nand_chip)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(nand_chip);
 
 	if ((readb(MLC_ISR(host->io_base)) &
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index e42584de875c..26d27a81f814 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -303,9 +303,8 @@ static void lpc32xx_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 /*
  * Read the Device Ready pin
  */
-static int lpc32xx_nand_device_ready(struct mtd_info *mtd)
+static int lpc32xx_nand_device_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 	int rdy = 0;
 
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index c2002c4d467b..ba7af061c0eb 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -320,7 +320,7 @@ static void ads5121_select_chip(struct nand_chip *nand, int chip)
 }
 
 /* Read NAND Ready/Busy signal */
-static int mpc5121_nfc_dev_ready(struct mtd_info *mtd)
+static int mpc5121_nfc_dev_ready(struct nand_chip *nand)
 {
 	/*
 	 * NFC handles ready/busy signal internally. Therefore, this function
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 6baa41483931..cf8c42fb8feb 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -402,9 +402,9 @@ static void mtk_nfc_select_chip(struct nand_chip *nand, int chip)
 	nfi_writel(nfc, mtk_nand->sels[chip], NFI_CSEL);
 }
 
-static int mtk_nfc_dev_ready(struct mtd_info *mtd)
+static int mtk_nfc_dev_ready(struct nand_chip *nand)
 {
-	struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
+	struct mtk_nfc *nfc = nand_get_controller_data(nand);
 
 	if (nfi_readl(nfc, NFI_STA) & STA_BUSY)
 		return 0;
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index d070ce461b69..82e5b1864399 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -701,7 +701,7 @@ static void mxc_nand_enable_hwecc_v3(struct nand_chip *chip, bool enable)
 }
 
 /* This functions is used by upper layer to checks if device is ready */
-static int mxc_nand_dev_ready(struct mtd_info *mtd)
+static int mxc_nand_dev_ready(struct nand_chip *chip)
 {
 	/*
 	 * NFC handles R/B internally. Therefore, this function
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index f0d70164a2f1..66dae8b69fe8 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -603,7 +603,7 @@ static void panic_nand_wait_ready(struct mtd_info *mtd, unsigned long timeo)
 
 	/* Wait for the device to get ready */
 	for (i = 0; i < timeo; i++) {
-		if (chip->dev_ready(mtd))
+		if (chip->dev_ready(chip))
 			break;
 		touch_softlockup_watchdog();
 		mdelay(1);
@@ -627,12 +627,12 @@ void nand_wait_ready(struct nand_chip *chip)
 	/* Wait until command is processed or timeout occurs */
 	timeo = jiffies + msecs_to_jiffies(timeo);
 	do {
-		if (chip->dev_ready(mtd))
+		if (chip->dev_ready(chip))
 			return;
 		cond_resched();
 	} while (time_before(jiffies, timeo));
 
-	if (!chip->dev_ready(mtd))
+	if (!chip->dev_ready(chip))
 		pr_warn_ratelimited("timeout while waiting for chip to become ready\n");
 }
 EXPORT_SYMBOL_GPL(nand_wait_ready);
@@ -1068,7 +1068,7 @@ static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip,
 	int i;
 	for (i = 0; i < timeo; i++) {
 		if (chip->dev_ready) {
-			if (chip->dev_ready(mtd))
+			if (chip->dev_ready(chip))
 				break;
 		} else {
 			int ret;
@@ -1116,7 +1116,7 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 		timeo = jiffies + msecs_to_jiffies(timeo);
 		do {
 			if (chip->dev_ready) {
-				if (chip->dev_ready(mtd))
+				if (chip->dev_ready(chip))
 					break;
 			} else {
 				ret = nand_read_data_op(chip, &status,
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index a6b626c935a8..f750783d5d6a 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2099,7 +2099,7 @@ static void ns_hwcontrol(struct nand_chip *chip, int cmd, unsigned int bitmask)
 		ns_nand_write_byte(chip, cmd);
 }
 
-static int ns_device_ready(struct mtd_info *mtd)
+static int ns_device_ready(struct nand_chip *chip)
 {
 	NS_DBG("device_ready\n");
 	return 1;
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 05ac7bf94874..b96070a3afff 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -71,9 +71,8 @@ static void ndfc_hwcontrol(struct nand_chip *chip, int cmd, unsigned int ctrl)
 		writel(cmd & 0xFF, ndfc->ndfcbase + NDFC_ALE);
 }
 
-static int ndfc_ready(struct mtd_info *mtd)
+static int ndfc_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct ndfc_controller *ndfc = nand_get_controller_data(chip);
 
 	return in_be32(ndfc->ndfcbase + NDFC_STAT) & NDFC_STAT_IS_READY;
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 357b3cf03195..4029b802243d 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -120,9 +120,9 @@ static int nuc900_check_rb(struct nuc900_nand *nand)
 	return val;
 }
 
-static int nuc900_nand_devready(struct mtd_info *mtd)
+static int nuc900_nand_devready(struct nand_chip *chip)
 {
-	struct nuc900_nand *nand = mtd_to_nuc900(mtd);
+	struct nuc900_nand *nand = mtd_to_nuc900(nand_to_mtd(chip));
 	int ready;
 
 	ready = (nuc900_check_rb(nand)) ? 1 : 0;
@@ -205,7 +205,7 @@ static void nuc900_nand_command_lp(struct mtd_info *mtd, unsigned int command,
 	 * any case on any machine. */
 	ndelay(100);
 
-	while (!chip->dev_ready(mtd))
+	while (!chip->dev_ready(chip))
 		;
 }
 
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 4bae782cd877..eef9cbadd3c4 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -1021,9 +1021,9 @@ static int omap_wait(struct mtd_info *mtd, struct nand_chip *chip)
  *
  * Returns true if ready and false if busy.
  */
-static int omap_dev_ready(struct mtd_info *mtd)
+static int omap_dev_ready(struct nand_chip *chip)
 {
-	struct omap_nand_info *info = mtd_to_omap(mtd);
+	struct omap_nand_info *info = mtd_to_omap(nand_to_mtd(chip));
 
 	return gpiod_get_value(info->ready_gpiod);
 }
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index 661ba57f2934..a1e3bf7a276b 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -80,7 +80,7 @@ static void pasemi_hwcontrol(struct nand_chip *chip, int cmd,
 	inl(lpcctl);
 }
 
-int pasemi_device_ready(struct mtd_info *mtd)
+int pasemi_device_ready(struct nand_chip *chip)
 {
 	return !!(inl(lpcctl) & LBICTRL_LPCCTL_NR);
 }
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index bfb5d8e7b00b..d65e4084dea4 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -23,13 +23,6 @@ struct plat_nand_data {
 	void __iomem		*io_base;
 };
 
-static int plat_nand_dev_ready(struct mtd_info *mtd)
-{
-	struct platform_nand_data *pdata = dev_get_platdata(mtd->dev.parent);
-
-	return pdata->ctrl.dev_ready(mtd_to_nand(mtd));
-}
-
 /*
  * Probe for the NAND device.
  */
@@ -70,10 +63,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 	data->chip.IO_ADDR_R = data->io_base;
 	data->chip.IO_ADDR_W = data->io_base;
 	data->chip.cmd_ctrl = pdata->ctrl.cmd_ctrl;
-
-	if (pdata->ctrl.dev_ready)
-		data->chip.dev_ready = plat_nand_dev_ready;
-
+	data->chip.dev_ready = pdata->ctrl.dev_ready;
 	data->chip.select_chip = pdata->ctrl.select_chip;
 	data->chip.write_buf = pdata->ctrl.write_buf;
 	data->chip.read_buf = pdata->ctrl.read_buf;
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index e90549e031a7..4331ff856fa5 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -373,7 +373,7 @@ static int r852_wait(struct mtd_info *mtd, struct nand_chip *chip)
 		msecs_to_jiffies(400) : msecs_to_jiffies(20));
 
 	while (time_before(jiffies, timeout))
-		if (chip->dev_ready(mtd))
+		if (chip->dev_ready(chip))
 			break;
 
 	nand_status_op(chip, &status);
@@ -390,9 +390,9 @@ static int r852_wait(struct mtd_info *mtd, struct nand_chip *chip)
  * Check if card is ready
  */
 
-static int r852_ready(struct mtd_info *mtd)
+static int r852_ready(struct nand_chip *chip)
 {
-	struct r852_device *dev = r852_get_dev(mtd);
+	struct r852_device *dev = r852_get_dev(nand_to_mtd(chip));
 	return !(r852_read_reg(dev, R852_CARD_STA) & R852_CARD_STA_BUSY);
 }
 
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 98ba94936631..1d549f5e53f5 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -493,20 +493,23 @@ static void s3c2440_nand_hwcontrol(struct nand_chip *chip, int cmd,
  * returns 0 if the nand is busy, 1 if it is ready
 */
 
-static int s3c2410_nand_devready(struct mtd_info *mtd)
+static int s3c2410_nand_devready(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	return readb(info->regs + S3C2410_NFSTAT) & S3C2410_NFSTAT_BUSY;
 }
 
-static int s3c2440_nand_devready(struct mtd_info *mtd)
+static int s3c2440_nand_devready(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	return readb(info->regs + S3C2440_NFSTAT) & S3C2440_NFSTAT_READY;
 }
 
-static int s3c2412_nand_devready(struct mtd_info *mtd)
+static int s3c2412_nand_devready(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	return readb(info->regs + S3C2412_NFSTAT) & S3C2412_NFSTAT_READY;
 }
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 7486a00b1ae5..31abbe33798e 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -78,9 +78,9 @@ static void sharpsl_nand_hwcontrol(struct nand_chip *chip, int cmd,
 		writeb(cmd, chip->IO_ADDR_W);
 }
 
-static int sharpsl_nand_dev_ready(struct mtd_info *mtd)
+static int sharpsl_nand_dev_ready(struct nand_chip *chip)
 {
-	struct sharpsl_nand *sharpsl = mtd_to_sharpsl(mtd);
+	struct sharpsl_nand *sharpsl = mtd_to_sharpsl(nand_to_mtd(chip));
 	return !((readb(sharpsl->io + FLASHCTL) & FLRYBY) == 0);
 }
 
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index c44b19fc1350..64ea9a014054 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -112,9 +112,8 @@ static void socrates_nand_cmd_ctrl(struct nand_chip *nand_chip, int cmd,
 /*
  * Read the Device Ready pin.
  */
-static int socrates_nand_device_ready(struct mtd_info *mtd)
+static int socrates_nand_device_ready(struct nand_chip *nand_chip)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
 	struct socrates_nand_host *host = nand_get_controller_data(nand_chip);
 
 	if (in_be32(host->io_base) & FPGA_NAND_BUSY)
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 1d85ff02afdb..fe30fb589ffb 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -400,9 +400,8 @@ static void sunxi_nfc_dma_op_cleanup(struct mtd_info *mtd,
 	       nfc->regs + NFC_REG_CTL);
 }
 
-static int sunxi_nfc_dev_ready(struct mtd_info *mtd)
+static int sunxi_nfc_dev_ready(struct nand_chip *nand)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nand_chip *sunxi_nand = to_sunxi_nand(nand);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(sunxi_nand->nand.controller);
 	u32 mask;
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index c8fb03f71a3b..cc719bc49b68 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -127,9 +127,8 @@ static void tango_cmd_ctrl(struct nand_chip *chip, int dat, unsigned int ctrl)
 		writeb_relaxed(dat, tchip->base + PBUS_ADDR);
 }
 
-static int tango_dev_ready(struct mtd_info *mtd)
+static int tango_dev_ready(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
 
 	return readl_relaxed(nfc->pbus_base + PBUS_CS_CTRL) & PBUS_IORDY;
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 1221353b11a7..7096fa3d50ab 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -158,9 +158,9 @@ static void tmio_nand_hwcontrol(struct nand_chip *chip, int cmd,
 		tmio_iowrite8(cmd, chip->IO_ADDR_W);
 }
 
-static int tmio_nand_dev_ready(struct mtd_info *mtd)
+static int tmio_nand_dev_ready(struct nand_chip *chip)
 {
-	struct tmio_nand *tmio = mtd_to_tmio(mtd);
+	struct tmio_nand *tmio = mtd_to_tmio(nand_to_mtd(chip));
 
 	return !(tmio_ioread8(tmio->fcr + FCR_STATUS) & FCR_STATUS_BUSY);
 }
@@ -198,10 +198,10 @@ tmio_nand_wait(struct mtd_info *mtd, struct nand_chip *nand_chip)
 	tmio_iowrite8(0x81, tmio->fcr + FCR_IMR);
 
 	timeout = wait_event_timeout(nand_chip->controller->wq,
-		tmio_nand_dev_ready(mtd),
+		tmio_nand_dev_ready(nand_chip),
 		msecs_to_jiffies(nand_chip->state == FL_ERASING ? 400 : 20));
 
-	if (unlikely(!tmio_nand_dev_ready(mtd))) {
+	if (unlikely(!tmio_nand_dev_ready(nand_chip))) {
 		tmio_iowrite8(0x00, tmio->fcr + FCR_IMR);
 		dev_warn(&tmio->dev->dev, "still busy with %s after %d ms\n",
 			nand_chip->state == FL_ERASING ? "erase" : "program",
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index f3bce6fb1fac..c84b2ad84cf7 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -162,9 +162,9 @@ static void txx9ndfmc_cmd_ctrl(struct nand_chip *chip, int cmd,
 	mmiowb();
 }
 
-static int txx9ndfmc_dev_ready(struct mtd_info *mtd)
+static int txx9ndfmc_dev_ready(struct nand_chip *chip)
 {
-	struct platform_device *dev = mtd_to_platdev(mtd);
+	struct platform_device *dev = mtd_to_platdev(nand_to_mtd(chip));
 
 	return !(txx9ndfmc_read(dev, TXX9_NDFSR) & TXX9_NDFSR_BUSY);
 }
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index 3b38d31c59c6..3d91e98df5a8 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -121,7 +121,7 @@ static void xway_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl)
 		;
 }
 
-static int xway_dev_ready(struct mtd_info *mtd)
+static int xway_dev_ready(struct nand_chip *chip)
 {
 	return ltq_ebu_r32(EBU_NAND_WAIT) & NAND_WAIT_RD;
 }
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index b53ccc7139c2..404ac7d4b279 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1291,7 +1291,7 @@ struct nand_chip {
 	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
 	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
-	int (*dev_ready)(struct mtd_info *mtd);
+	int (*dev_ready)(struct nand_chip *chip);
 	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
 			int page_addr);
 	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
-- 
cgit v1.2.3


From 5295cf2e047cf60ac1e14d4789cdf698af45cf2f Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:28 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->cmdfunc()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->cmdfunc() hook.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/au1550nd.c                  |  7 ++--
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  4 +--
 drivers/mtd/nand/raw/brcmnand/brcmnand.c         |  4 +--
 drivers/mtd/nand/raw/cafe_nand.c                 |  4 +--
 drivers/mtd/nand/raw/diskonchip.c                |  5 +--
 drivers/mtd/nand/raw/docg4.c                     |  4 +--
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |  4 +--
 drivers/mtd/nand/raw/fsl_ifc_nand.c              |  6 ++--
 drivers/mtd/nand/raw/hisi504_nand.c              |  6 ++--
 drivers/mtd/nand/raw/mpc5121_nfc.c               |  8 ++---
 drivers/mtd/nand/raw/mxc_nand.c                  |  6 ++--
 drivers/mtd/nand/raw/nand_base.c                 | 46 ++++++++++++------------
 drivers/mtd/nand/raw/nand_hynix.c                |  7 ++--
 drivers/mtd/nand/raw/nuc900_nand.c               |  5 +--
 drivers/mtd/nand/raw/qcom_nandc.c                |  3 +-
 drivers/mtd/nand/raw/sh_flctl.c                  |  3 +-
 drivers/staging/mt29f_spinand/mt29f_spinand.c    |  4 +--
 include/linux/mtd/rawnand.h                      |  2 +-
 18 files changed, 64 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 1f0fba8d87c6..d0ec8606e769 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -236,14 +236,15 @@ static void au1550_select_chip(struct nand_chip *this, int chip)
 
 /**
  * au1550_command - Send command to NAND device
- * @mtd:	MTD device structure
+ * @this:	NAND chip object
  * @command:	the command to be sent
  * @column:	the column address for this command, -1 if none
  * @page_addr:	the page address for this command, -1 if none
  */
-static void au1550_command(struct mtd_info *mtd, unsigned command, int column, int page_addr)
+static void au1550_command(struct nand_chip *this, unsigned command,
+			   int column, int page_addr)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(this);
 	struct au1550nd_ctx *ctx = container_of(this, struct au1550nd_ctx,
 						chip);
 	int ce_override = 0, i;
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index f6f694b3cd8e..59e1b88aae38 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -210,11 +210,11 @@ static int bcm47xxnflash_ops_bcm4706_dev_ready(struct nand_chip *nand_chip)
  * registers of ChipCommon core. Hacking cmd_ctrl to understand and convert
  * standard commands would be much more complicated.
  */
-static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct mtd_info *mtd,
+static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct nand_chip *nand_chip,
 					      unsigned command, int column,
 					      int page_addr)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand_chip);
 	struct bcm47xxnflash *b47n = nand_get_controller_data(nand_chip);
 	struct bcma_drv_cc *cc = b47n->cc;
 	u32 ctlcode;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 80f5b4b9ee75..4b814a39b24f 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -1310,10 +1310,10 @@ static int brcmnand_low_level_op(struct brcmnand_host *host,
 	return brcmnand_waitfunc(mtd, chip);
 }
 
-static void brcmnand_cmdfunc(struct mtd_info *mtd, unsigned command,
+static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
 			     int column, int page_addr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	struct brcmnand_controller *ctrl = host->ctrl;
 	u64 addr = (u64)page_addr << chip->page_shift;
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 60a2eecc2b2a..801045d77872 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -156,10 +156,10 @@ static uint8_t cafe_read_byte(struct nand_chip *chip)
 	return d;
 }
 
-static void cafe_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
+static void cafe_nand_cmdfunc(struct nand_chip *chip, unsigned command,
 			      int column, int page_addr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 	int adrbytes = 0;
 	uint32_t ctl1;
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index e40a4e120c7b..64bf0624343d 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -637,9 +637,10 @@ static void doc200x_hwcontrol(struct nand_chip *this, int cmd,
 	}
 }
 
-static void doc2001plus_command(struct mtd_info *mtd, unsigned command, int column, int page_addr)
+static void doc2001plus_command(struct nand_chip *this, unsigned command,
+				int column, int page_addr)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(this);
 	struct doc_priv *doc = nand_get_controller_data(this);
 	void __iomem *docptr = doc->virtadr;
 
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index 9e6255408d49..ba3b949369cb 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -705,12 +705,12 @@ static uint32_t mtd_to_docg4_address(int page, int column)
 	return (g4_page << 16) | g4_index;	      /* pack */
 }
 
-static void docg4_command(struct mtd_info *mtd, unsigned command, int column,
+static void docg4_command(struct nand_chip *nand, unsigned command, int column,
 			  int page_addr)
 {
 	/* handle standard nand commands */
 
-	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand);
 	struct docg4_priv *doc = nand_get_controller_data(nand);
 	uint32_t g4_addr = mtd_to_docg4_address(page_addr, column);
 
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 74b804a61f2d..93b82af3e518 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -317,10 +317,10 @@ static void fsl_elbc_do_read(struct nand_chip *chip, int oob)
 }
 
 /* cmdfunc send commands to the FCM */
-static void fsl_elbc_cmdfunc(struct mtd_info *mtd, unsigned int command,
+static void fsl_elbc_cmdfunc(struct nand_chip *chip, unsigned int command,
                              int column, int page_addr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct fsl_elbc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_lbc_ctrl *ctrl = priv->ctrl;
 	struct fsl_elbc_fcm_ctrl *elbc_fcm_ctrl = ctrl->nand;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index da846ffa3e5c..34962da03238 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -301,9 +301,9 @@ static void fsl_ifc_do_read(struct nand_chip *chip,
 }
 
 /* cmdfunc send commands to the IFC NAND Machine */
-static void fsl_ifc_cmdfunc(struct mtd_info *mtd, unsigned int command,
-			     int column, int page_addr) {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+static void fsl_ifc_cmdfunc(struct nand_chip *chip, unsigned int command,
+			    int column, int page_addr) {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
 	struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs;
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 86dd7b54159d..928a320c8517 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -429,10 +429,10 @@ static void set_addr(struct mtd_info *mtd, int column, int page_addr)
 	}
 }
 
-static void hisi_nfc_cmdfunc(struct mtd_info *mtd, unsigned command, int column,
-		int page_addr)
+static void hisi_nfc_cmdfunc(struct nand_chip *chip, unsigned command,
+			     int column, int page_addr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct hinfc_host *host = nand_get_controller_data(chip);
 	int is_cache_invalid = 1;
 	unsigned int flag = 0;
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index ba7af061c0eb..bd027674898d 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -330,10 +330,10 @@ static int mpc5121_nfc_dev_ready(struct nand_chip *nand)
 }
 
 /* Write command to NAND flash */
-static void mpc5121_nfc_command(struct mtd_info *mtd, unsigned command,
-							int column, int page)
+static void mpc5121_nfc_command(struct nand_chip *chip, unsigned command,
+				int column, int page)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct mpc5121_nfc_prv *prv = nand_get_controller_data(chip);
 
 	prv->column = (column >= 0) ? column : 0;
@@ -364,7 +364,7 @@ static void mpc5121_nfc_command(struct mtd_info *mtd, unsigned command,
 		break;
 
 	case NAND_CMD_SEQIN:
-		mpc5121_nfc_command(mtd, NAND_CMD_READ0, column, page);
+		mpc5121_nfc_command(chip, NAND_CMD_READ0, column, page);
 		column = 0;
 		break;
 
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 82e5b1864399..a03a33656cf4 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1333,10 +1333,10 @@ static void preset_v3(struct mtd_info *mtd)
 
 /* Used by the upper layer to write command to NAND Flash for
  * different operations to be carried out on NAND Flash */
-static void mxc_nand_command(struct mtd_info *mtd, unsigned command,
-				int column, int page_addr)
+static void mxc_nand_command(struct nand_chip *nand_chip, unsigned command,
+			     int column, int page_addr)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand_chip);
 	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
 
 	dev_dbg(host->dev, "mxc_nand_command (cmd = 0x%x, col = 0x%x, page = 0x%x)\n",
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 66dae8b69fe8..a74264f36a70 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -730,7 +730,7 @@ EXPORT_SYMBOL_GPL(nand_soft_waitrdy);
 
 /**
  * nand_command - [DEFAULT] Send command to NAND device
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @command: the command to be sent
  * @column: the column address for this command, -1 if none
  * @page_addr: the page address for this command, -1 if none
@@ -738,10 +738,10 @@ EXPORT_SYMBOL_GPL(nand_soft_waitrdy);
  * Send command to NAND device. This function is used for small page devices
  * (512 Bytes per page).
  */
-static void nand_command(struct mtd_info *mtd, unsigned int command,
+static void nand_command(struct nand_chip *chip, unsigned int command,
 			 int column, int page_addr)
 {
-	register struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ctrl = NAND_CTRL_CLE | NAND_CTRL_CHANGE;
 
 	/* Write out the command to the device */
@@ -864,7 +864,7 @@ static void nand_ccs_delay(struct nand_chip *chip)
 
 /**
  * nand_command_lp - [DEFAULT] Send command to NAND large page device
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @command: the command to be sent
  * @column: the column address for this command, -1 if none
  * @page_addr: the page address for this command, -1 if none
@@ -873,10 +873,10 @@ static void nand_ccs_delay(struct nand_chip *chip)
  * devices. We don't have the separate regions as we have in the small page
  * devices. We must emulate NAND_CMD_READOOB to keep the code compatible.
  */
-static void nand_command_lp(struct mtd_info *mtd, unsigned int command,
+static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 			    int column, int page_addr)
 {
-	register struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	/* Emulate NAND_CMD_READOOB */
 	if (command == NAND_CMD_READOOB) {
@@ -1530,7 +1530,7 @@ int nand_read_page_op(struct nand_chip *chip, unsigned int page,
 						 buf, len);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_READ0, offset_in_page, page);
+	chip->cmdfunc(chip, NAND_CMD_READ0, offset_in_page, page);
 	if (len)
 		chip->read_buf(chip, buf, len);
 
@@ -1579,7 +1579,7 @@ static int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_PARAM, page, -1);
+	chip->cmdfunc(chip, NAND_CMD_PARAM, page, -1);
 	for (i = 0; i < len; i++)
 		p[i] = chip->read_byte(chip);
 
@@ -1642,7 +1642,7 @@ int nand_change_read_column_op(struct nand_chip *chip,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_RNDOUT, offset_in_page, -1);
+	chip->cmdfunc(chip, NAND_CMD_RNDOUT, offset_in_page, -1);
 	if (len)
 		chip->read_buf(chip, buf, len);
 
@@ -1679,7 +1679,7 @@ int nand_read_oob_op(struct nand_chip *chip, unsigned int page,
 					 mtd->writesize + offset_in_oob,
 					 buf, len);
 
-	chip->cmdfunc(mtd, NAND_CMD_READOOB, offset_in_oob, page);
+	chip->cmdfunc(chip, NAND_CMD_READOOB, offset_in_oob, page);
 	if (len)
 		chip->read_buf(chip, buf, len);
 
@@ -1791,7 +1791,7 @@ int nand_prog_page_begin_op(struct nand_chip *chip, unsigned int page,
 		return nand_exec_prog_page_op(chip, page, offset_in_page, buf,
 					      len, false);
 
-	chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page);
+	chip->cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page, page);
 
 	if (buf)
 		chip->write_buf(chip, buf, len);
@@ -1833,7 +1833,7 @@ int nand_prog_page_end_op(struct nand_chip *chip)
 		if (ret)
 			return ret;
 	} else {
-		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+		chip->cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
 		ret = chip->waitfunc(mtd, chip);
 		if (ret < 0)
 			return ret;
@@ -1878,9 +1878,9 @@ int nand_prog_page_op(struct nand_chip *chip, unsigned int page,
 		status = nand_exec_prog_page_op(chip, page, offset_in_page, buf,
 						len, true);
 	} else {
-		chip->cmdfunc(mtd, NAND_CMD_SEQIN, offset_in_page, page);
+		chip->cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page, page);
 		chip->write_buf(chip, buf, len);
-		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
+		chip->cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
 		status = chip->waitfunc(mtd, chip);
 	}
 
@@ -1946,7 +1946,7 @@ int nand_change_write_column_op(struct nand_chip *chip,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_RNDIN, offset_in_page, -1);
+	chip->cmdfunc(chip, NAND_CMD_RNDIN, offset_in_page, -1);
 	if (len)
 		chip->write_buf(chip, buf, len);
 
@@ -1994,7 +1994,7 @@ int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_READID, addr, -1);
+	chip->cmdfunc(chip, NAND_CMD_READID, addr, -1);
 
 	for (i = 0; i < len; i++)
 		id[i] = chip->read_byte(chip);
@@ -2034,7 +2034,7 @@ int nand_status_op(struct nand_chip *chip, u8 *status)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
+	chip->cmdfunc(chip, NAND_CMD_STATUS, -1, -1);
 	if (status)
 		*status = chip->read_byte(chip);
 
@@ -2066,7 +2066,7 @@ int nand_exit_status_op(struct nand_chip *chip)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_READ0, -1, -1);
+	chip->cmdfunc(chip, NAND_CMD_READ0, -1, -1);
 
 	return 0;
 }
@@ -2115,8 +2115,8 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock)
 		if (ret)
 			return ret;
 	} else {
-		chip->cmdfunc(mtd, NAND_CMD_ERASE1, -1, page);
-		chip->cmdfunc(mtd, NAND_CMD_ERASE2, -1, -1);
+		chip->cmdfunc(chip, NAND_CMD_ERASE1, -1, page);
+		chip->cmdfunc(chip, NAND_CMD_ERASE2, -1, -1);
 
 		ret = chip->waitfunc(mtd, chip);
 		if (ret < 0)
@@ -2166,7 +2166,7 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_SET_FEATURES, feature, -1);
+	chip->cmdfunc(chip, NAND_CMD_SET_FEATURES, feature, -1);
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
 		chip->write_byte(chip, params[i]);
 
@@ -2215,7 +2215,7 @@ static int nand_get_features_op(struct nand_chip *chip, u8 feature,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_GET_FEATURES, feature, -1);
+	chip->cmdfunc(chip, NAND_CMD_GET_FEATURES, feature, -1);
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
 		params[i] = chip->read_byte(chip);
 
@@ -2270,7 +2270,7 @@ int nand_reset_op(struct nand_chip *chip)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_RESET, -1, -1);
+	chip->cmdfunc(chip, NAND_CMD_RESET, -1, -1);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c
index 197256c2e1ee..fa873e517131 100644
--- a/drivers/mtd/nand/raw/nand_hynix.c
+++ b/drivers/mtd/nand/raw/nand_hynix.c
@@ -79,8 +79,6 @@ static bool hynix_nand_has_valid_jedecid(struct nand_chip *chip)
 
 static int hynix_nand_cmd_op(struct nand_chip *chip, u8 cmd)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	if (chip->exec_op) {
 		struct nand_op_instr instrs[] = {
 			NAND_OP_CMD(cmd, 0),
@@ -90,14 +88,13 @@ static int hynix_nand_cmd_op(struct nand_chip *chip, u8 cmd)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, cmd, -1, -1);
+	chip->cmdfunc(chip, cmd, -1, -1);
 
 	return 0;
 }
 
 static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	u16 column = ((u16)addr << 8) | addr;
 
 	if (chip->exec_op) {
@@ -110,7 +107,7 @@ static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(mtd, NAND_CMD_NONE, column, -1);
+	chip->cmdfunc(chip, NAND_CMD_NONE, column, -1);
 	chip->write_byte(chip, val);
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 4029b802243d..3aae5fda5399 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -129,10 +129,11 @@ static int nuc900_nand_devready(struct nand_chip *chip)
 	return ready;
 }
 
-static void nuc900_nand_command_lp(struct mtd_info *mtd, unsigned int command,
+static void nuc900_nand_command_lp(struct nand_chip *chip,
+				   unsigned int command,
 				   int column, int page_addr)
 {
-	register struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct nuc900_nand *nand = mtd_to_nuc900(mtd);
 
 	if (command == NAND_CMD_READOOB) {
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index c6eb205e0f76..9037dddff99a 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -1440,10 +1440,9 @@ static void post_command(struct qcom_nand_host *host, int command)
  * NAND_CMD_READOOB would never be called because we have our own versions
  * of read_oob ops for nand_ecc_ctrl.
  */
-static void qcom_nandc_command(struct mtd_info *mtd, unsigned int command,
+static void qcom_nandc_command(struct nand_chip *chip, unsigned int command,
 			       int column, int page_addr)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct qcom_nand_host *host = to_qcom_nand_host(chip);
 	struct nand_ecc_ctrl *ecc = &chip->ecc;
 	struct qcom_nand_controller *nandc = get_qcom_nand_controller(chip);
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index e2a4939971b5..4b1c7e435937 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -750,9 +750,10 @@ static void execmd_write_oob(struct mtd_info *mtd)
 	}
 }
 
-static void flctl_cmdfunc(struct mtd_info *mtd, unsigned int command,
+static void flctl_cmdfunc(struct nand_chip *chip, unsigned int command,
 			int column, int page_addr)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct sh_flctl *flctl = mtd_to_flctl(mtd);
 	uint32_t read_cmd = 0;
 
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index c0df8b6ab19b..724e66c92fd2 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -759,10 +759,10 @@ static void spinand_reset(struct spi_device *spi_nand)
 		dev_err(&spi_nand->dev, "wait timedout!\n");
 }
 
-static void spinand_cmdfunc(struct mtd_info *mtd, unsigned int command,
+static void spinand_cmdfunc(struct nand_chip *chip, unsigned int command,
 			    int column, int page)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct spinand_info *info = nand_get_controller_data(chip);
 	struct spinand_state *state = info->priv;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 404ac7d4b279..2a74de9012c4 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1292,7 +1292,7 @@ struct nand_chip {
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
 	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
 	int (*dev_ready)(struct nand_chip *chip);
-	void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column,
+	void (*cmdfunc)(struct nand_chip *chip, unsigned command, int column,
 			int page_addr);
 	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
 	int (*exec_op)(struct nand_chip *chip,
-- 
cgit v1.2.3


From f1d46942e823ffdd8532409c9b1c1f87314bf20f Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:29 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->waitfunc()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->waitfunc() hook.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c  |  2 +-
 drivers/mtd/nand/raw/brcmnand/brcmnand.c      | 12 +++++------
 drivers/mtd/nand/raw/denali.c                 |  4 ++--
 drivers/mtd/nand/raw/diskonchip.c             |  2 +-
 drivers/mtd/nand/raw/docg4.c                  |  4 ++--
 drivers/mtd/nand/raw/fsl_elbc_nand.c          |  4 ++--
 drivers/mtd/nand/raw/fsl_ifc_nand.c           |  3 ++-
 drivers/mtd/nand/raw/lpc32xx_mlc.c            | 17 ++++++++--------
 drivers/mtd/nand/raw/nand_base.c              | 29 ++++++++-------------------
 drivers/mtd/nand/raw/omap2.c                  |  8 +++-----
 drivers/mtd/nand/raw/r852.c                   |  2 +-
 drivers/mtd/nand/raw/tango_nand.c             |  2 +-
 drivers/mtd/nand/raw/tmio_nand.c              |  5 ++---
 drivers/staging/mt29f_spinand/mt29f_spinand.c |  3 ++-
 include/linux/mtd/rawnand.h                   |  2 +-
 15 files changed, 42 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 2dcd8aa0ce0b..d5939114f999 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -945,7 +945,7 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
 		dev_err(nc->base.dev, "Failed to program NAND page (err = %d)\n",
 			ret);
 
-	status = chip->waitfunc(mtd, chip);
+	status = chip->waitfunc(chip);
 	if (status & NAND_STATUS_FAIL)
 		return -EIO;
 
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 4b814a39b24f..fee40a3ce5d2 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -1237,9 +1237,8 @@ static void brcmnand_cmd_ctrl(struct nand_chip *chip, int dat,
 	/* intentionally left blank */
 }
 
-static int brcmnand_waitfunc(struct mtd_info *mtd, struct nand_chip *this)
+static int brcmnand_waitfunc(struct nand_chip *chip)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct brcmnand_host *host = nand_get_controller_data(chip);
 	struct brcmnand_controller *ctrl = host->ctrl;
 	unsigned long timeo = msecs_to_jiffies(100);
@@ -1274,7 +1273,6 @@ static int brcmnand_low_level_op(struct brcmnand_host *host,
 				 enum brcmnand_llop_type type, u32 data,
 				 bool last_op)
 {
-	struct mtd_info *mtd = nand_to_mtd(&host->chip);
 	struct nand_chip *chip = &host->chip;
 	struct brcmnand_controller *ctrl = host->ctrl;
 	u32 tmp;
@@ -1307,7 +1305,7 @@ static int brcmnand_low_level_op(struct brcmnand_host *host,
 	(void)brcmnand_read_reg(ctrl, BRCMNAND_LL_OP);
 
 	brcmnand_send_cmd(host, CMD_LOW_LEVEL_OP);
-	return brcmnand_waitfunc(mtd, chip);
+	return brcmnand_waitfunc(chip);
 }
 
 static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
@@ -1383,7 +1381,7 @@ static void brcmnand_cmdfunc(struct nand_chip *chip, unsigned command,
 	(void)brcmnand_read_reg(ctrl, BRCMNAND_CMD_ADDRESS);
 
 	brcmnand_send_cmd(host, native_cmd);
-	brcmnand_waitfunc(mtd, chip);
+	brcmnand_waitfunc(chip);
 
 	if (native_cmd == CMD_PARAMETER_READ ||
 			native_cmd == CMD_PARAMETER_CHANGE_COL) {
@@ -1615,7 +1613,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
 		(void)brcmnand_read_reg(ctrl, BRCMNAND_CMD_ADDRESS);
 		/* SPARE_AREA_READ does not use ECC, so just use PAGE_READ */
 		brcmnand_send_cmd(host, CMD_PAGE_READ);
-		brcmnand_waitfunc(mtd, chip);
+		brcmnand_waitfunc(chip);
 
 		if (likely(buf)) {
 			brcmnand_soc_data_bus_prepare(ctrl->soc, false);
@@ -1893,7 +1891,7 @@ static int brcmnand_write(struct mtd_info *mtd, struct nand_chip *chip,
 
 		/* we cannot use SPARE_AREA_PROGRAM when PARTIAL_PAGE_EN=0 */
 		brcmnand_send_cmd(host, CMD_PROGRAM_PAGE);
-		status = brcmnand_waitfunc(mtd, chip);
+		status = brcmnand_waitfunc(chip);
 
 		if (status & NAND_STATUS_FAIL) {
 			dev_info(ctrl->dev, "program failed at %llx\n",
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 7258dd13b3f9..0c3fff9d65af 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -904,9 +904,9 @@ static void denali_select_chip(struct nand_chip *chip, int cs)
 	denali->active_bank = cs;
 }
 
-static int denali_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
+static int denali_waitfunc(struct nand_chip *chip)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 	uint32_t irq_status;
 
 	/* R/B# pin transitioned from low to high? */
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 64bf0624343d..0b305c19a9a3 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -433,7 +433,7 @@ static void __init doc2000_count_chips(struct mtd_info *mtd)
 	pr_debug("Detected %d chips per floor.\n", i);
 }
 
-static int doc200x_wait(struct mtd_info *mtd, struct nand_chip *this)
+static int doc200x_wait(struct nand_chip *this)
 {
 	struct doc_priv *doc = nand_get_controller_data(this);
 
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index ba3b949369cb..ae20172f1b60 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -315,7 +315,7 @@ static int poll_status(struct docg4_priv *doc)
 }
 
 
-static int docg4_wait(struct mtd_info *mtd, struct nand_chip *nand)
+static int docg4_wait(struct nand_chip *nand)
 {
 
 	struct docg4_priv *doc = nand_get_controller_data(nand);
@@ -938,7 +938,7 @@ static int docg4_erase_block(struct mtd_info *mtd, int page)
 	poll_status(doc);
 	write_nop(docptr);
 
-	status = nand->waitfunc(mtd, nand);
+	status = nand->waitfunc(nand);
 	if (status < 0)
 		return status;
 
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 93b82af3e518..98da5f9f04ac 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -621,7 +621,7 @@ static void fsl_elbc_read_buf(struct nand_chip *chip, u8 *buf, int len)
 /* This function is called after Program and Erase Operations to
  * check for success or failure.
  */
-static int fsl_elbc_wait(struct mtd_info *mtd, struct nand_chip *chip)
+static int fsl_elbc_wait(struct nand_chip *chip)
 {
 	struct fsl_elbc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_elbc_fcm_ctrl *elbc_fcm_ctrl = priv->ctrl->nand;
@@ -720,7 +720,7 @@ static int fsl_elbc_read_page(struct nand_chip *chip, uint8_t *buf,
 	if (oob_required)
 		fsl_elbc_read_buf(chip, chip->oob_poi, mtd->oobsize);
 
-	if (fsl_elbc_wait(mtd, chip) & NAND_STATUS_FAIL)
+	if (fsl_elbc_wait(chip) & NAND_STATUS_FAIL)
 		mtd->ecc_stats.failed++;
 
 	return elbc_fcm_ctrl->max_bitflips;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 34962da03238..cdcd82d1f8bc 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -614,8 +614,9 @@ static void fsl_ifc_read_buf(struct nand_chip *chip, u8 *buf, int len)
  * This function is called after Program and Erase Operations to
  * check for success or failure.
  */
-static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip)
+static int fsl_ifc_wait(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct fsl_ifc_mtd *priv = nand_get_controller_data(chip);
 	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
 	struct fsl_ifc_runtime __iomem *ifc = ctrl->rregs;
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 726cd8868ac3..ae31f6ccbeb3 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -328,8 +328,9 @@ static irqreturn_t lpc3xxx_nand_irq(int irq, struct lpc32xx_nand_host *host)
 	return IRQ_HANDLED;
 }
 
-static int lpc32xx_waitfunc_nand(struct mtd_info *mtd, struct nand_chip *chip)
+static int lpc32xx_waitfunc_nand(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 
 	if (readb(MLC_ISR(host->io_base)) & MLCISR_NAND_READY)
@@ -347,9 +348,9 @@ exit:
 	return NAND_STATUS_READY;
 }
 
-static int lpc32xx_waitfunc_controller(struct mtd_info *mtd,
-				       struct nand_chip *chip)
+static int lpc32xx_waitfunc_controller(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct lpc32xx_nand_host *host = nand_get_controller_data(chip);
 
 	if (readb(MLC_ISR(host->io_base)) & MLCISR_CONTROLLER_READY)
@@ -367,10 +368,10 @@ exit:
 	return NAND_STATUS_READY;
 }
 
-static int lpc32xx_waitfunc(struct mtd_info *mtd, struct nand_chip *chip)
+static int lpc32xx_waitfunc(struct nand_chip *chip)
 {
-	lpc32xx_waitfunc_nand(mtd, chip);
-	lpc32xx_waitfunc_controller(mtd, chip);
+	lpc32xx_waitfunc_nand(chip);
+	lpc32xx_waitfunc_controller(chip);
 
 	return NAND_STATUS_READY;
 }
@@ -469,7 +470,7 @@ static int lpc32xx_read_page(struct nand_chip *chip, uint8_t *buf,
 		writeb(0x00, MLC_ECC_AUTO_DEC_REG(host->io_base));
 
 		/* Wait for Controller Ready */
-		lpc32xx_waitfunc_controller(mtd, chip);
+		lpc32xx_waitfunc_controller(chip);
 
 		/* Check ECC Error status */
 		mlc_isr = readl(MLC_ISR(host->io_base));
@@ -550,7 +551,7 @@ static int lpc32xx_write_page_lowlevel(struct nand_chip *chip,
 		writeb(0x00, MLC_ECC_AUTO_ENC_REG(host->io_base));
 
 		/* Wait for Controller Ready */
-		lpc32xx_waitfunc_controller(mtd, chip);
+		lpc32xx_waitfunc_controller(chip);
 	}
 
 	return nand_prog_page_end_op(chip);
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index a74264f36a70..9be0f98c1244 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1062,8 +1062,7 @@ retry:
  * we are in interrupt context. May happen when in panic and trying to write
  * an oops through mtdoops.
  */
-static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip,
-			    unsigned long timeo)
+static void panic_nand_wait(struct nand_chip *chip, unsigned long timeo)
 {
 	int i;
 	for (i = 0; i < timeo; i++) {
@@ -1093,7 +1092,7 @@ static void panic_nand_wait(struct mtd_info *mtd, struct nand_chip *chip,
  *
  * Wait for command done. This applies to erase and program only.
  */
-static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
+static int nand_wait(struct nand_chip *chip)
 {
 
 	unsigned long timeo = 400;
@@ -1111,7 +1110,7 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 		return ret;
 
 	if (in_interrupt() || oops_in_progress)
-		panic_nand_wait(mtd, chip, timeo);
+		panic_nand_wait(chip, timeo);
 	else {
 		timeo = jiffies + msecs_to_jiffies(timeo);
 		do {
@@ -1553,7 +1552,6 @@ EXPORT_SYMBOL_GPL(nand_read_page_op);
 static int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf,
 				   unsigned int len)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	unsigned int i;
 	u8 *p = buf;
 
@@ -1811,7 +1809,6 @@ EXPORT_SYMBOL_GPL(nand_prog_page_begin_op);
  */
 int nand_prog_page_end_op(struct nand_chip *chip)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 	u8 status;
 
@@ -1834,7 +1831,7 @@ int nand_prog_page_end_op(struct nand_chip *chip)
 			return ret;
 	} else {
 		chip->cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
-		ret = chip->waitfunc(mtd, chip);
+		ret = chip->waitfunc(chip);
 		if (ret < 0)
 			return ret;
 
@@ -1881,7 +1878,7 @@ int nand_prog_page_op(struct nand_chip *chip, unsigned int page,
 		chip->cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page, page);
 		chip->write_buf(chip, buf, len);
 		chip->cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
-		status = chip->waitfunc(mtd, chip);
+		status = chip->waitfunc(chip);
 	}
 
 	if (status & NAND_STATUS_FAIL)
@@ -1970,7 +1967,6 @@ EXPORT_SYMBOL_GPL(nand_change_write_column_op);
 int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf,
 		   unsigned int len)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	unsigned int i;
 	u8 *id = buf;
 
@@ -2016,8 +2012,6 @@ EXPORT_SYMBOL_GPL(nand_readid_op);
  */
 int nand_status_op(struct nand_chip *chip, u8 *status)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	if (chip->exec_op) {
 		const struct nand_sdr_timings *sdr =
 			nand_get_sdr_timings(&chip->data_interface);
@@ -2055,8 +2049,6 @@ EXPORT_SYMBOL_GPL(nand_status_op);
  */
 int nand_exit_status_op(struct nand_chip *chip)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	if (chip->exec_op) {
 		struct nand_op_instr instrs[] = {
 			NAND_OP_CMD(NAND_CMD_READ0, 0),
@@ -2085,7 +2077,6 @@ EXPORT_SYMBOL_GPL(nand_exit_status_op);
  */
 int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	unsigned int page = eraseblock <<
 			    (chip->phys_erase_shift - chip->page_shift);
 	int ret;
@@ -2118,7 +2109,7 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock)
 		chip->cmdfunc(chip, NAND_CMD_ERASE1, -1, page);
 		chip->cmdfunc(chip, NAND_CMD_ERASE2, -1, -1);
 
-		ret = chip->waitfunc(mtd, chip);
+		ret = chip->waitfunc(chip);
 		if (ret < 0)
 			return ret;
 
@@ -2147,7 +2138,6 @@ EXPORT_SYMBOL_GPL(nand_erase_op);
 static int nand_set_features_op(struct nand_chip *chip, u8 feature,
 				const void *data)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	const u8 *params = data;
 	int i, ret;
 
@@ -2170,7 +2160,7 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature,
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
 		chip->write_byte(chip, params[i]);
 
-	ret = chip->waitfunc(mtd, chip);
+	ret = chip->waitfunc(chip);
 	if (ret < 0)
 		return ret;
 
@@ -2195,7 +2185,6 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature,
 static int nand_get_features_op(struct nand_chip *chip, u8 feature,
 				void *data)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	u8 *params = data;
 	int i;
 
@@ -2256,8 +2245,6 @@ static int nand_wait_rdy_op(struct nand_chip *chip, unsigned int timeout_ms,
  */
 int nand_reset_op(struct nand_chip *chip)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	if (chip->exec_op) {
 		const struct nand_sdr_timings *sdr =
 			nand_get_sdr_timings(&chip->data_interface);
@@ -4518,7 +4505,7 @@ static int panic_nand_write(struct mtd_info *mtd, loff_t to, size_t len,
 	chip->select_chip(chip, chipnr);
 
 	/* Wait for the device to get ready */
-	panic_nand_wait(mtd, chip, 400);
+	panic_nand_wait(chip, 400);
 
 	memset(&ops, 0, sizeof(ops));
 	ops.len = len;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index eef9cbadd3c4..6f0fec3596cc 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -981,8 +981,7 @@ static void omap_enable_hwecc(struct nand_chip *chip, int mode)
 
 /**
  * omap_wait - wait until the command is done
- * @mtd: MTD device structure
- * @chip: NAND Chip structure
+ * @this: NAND Chip structure
  *
  * Wait function is called during Program and erase operations and
  * the way it is called from MTD layer, we should wait till the NAND
@@ -991,10 +990,9 @@ static void omap_enable_hwecc(struct nand_chip *chip, int mode)
  * Erase can take up to 400ms and program up to 20ms according to
  * general NAND and SmartMedia specs
  */
-static int omap_wait(struct mtd_info *mtd, struct nand_chip *chip)
+static int omap_wait(struct nand_chip *this)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
-	struct omap_nand_info *info = mtd_to_omap(mtd);
+	struct omap_nand_info *info = mtd_to_omap(nand_to_mtd(this));
 	unsigned long timeo = jiffies;
 	int status, state = this->state;
 
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 4331ff856fa5..2c30e97ab2a4 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -362,7 +362,7 @@ static void r852_cmdctl(struct nand_chip *chip, int dat, unsigned int ctrl)
  * Wait till card is ready.
  * based on nand_wait, but returns errors on DMA error
  */
-static int r852_wait(struct mtd_info *mtd, struct nand_chip *chip)
+static int r852_wait(struct nand_chip *chip)
 {
 	struct r852_device *dev = nand_get_controller_data(chip);
 
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index cc719bc49b68..c21a0f2d26fc 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -314,7 +314,7 @@ static int tango_write_page(struct nand_chip *chip, const u8 *buf,
 	if (err)
 		return err;
 
-	status = chip->waitfunc(mtd, chip);
+	status = chip->waitfunc(chip);
 	if (status & NAND_STATUS_FAIL)
 		return -EIO;
 
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 7096fa3d50ab..f44621672779 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -186,10 +186,9 @@ static irqreturn_t tmio_irq(int irq, void *__tmio)
   *erase and write, we enable it to wake us up.  The irq handler
   *disables the interrupt.
  */
-static int
-tmio_nand_wait(struct mtd_info *mtd, struct nand_chip *nand_chip)
+static int tmio_nand_wait(struct nand_chip *nand_chip)
 {
-	struct tmio_nand *tmio = mtd_to_tmio(mtd);
+	struct tmio_nand *tmio = mtd_to_tmio(nand_to_mtd(nand_chip));
 	long timeout;
 	u8 status;
 
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 724e66c92fd2..f2e14f972319 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -695,8 +695,9 @@ static u8 spinand_read_byte(struct nand_chip *chip)
 	return data;
 }
 
-static int spinand_wait(struct mtd_info *mtd, struct nand_chip *chip)
+static int spinand_wait(struct nand_chip *chip)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct spinand_info *info = nand_get_controller_data(chip);
 
 	unsigned long timeo = jiffies;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 2a74de9012c4..c00e571d09ca 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1294,7 +1294,7 @@ struct nand_chip {
 	int (*dev_ready)(struct nand_chip *chip);
 	void (*cmdfunc)(struct nand_chip *chip, unsigned command, int column,
 			int page_addr);
-	int(*waitfunc)(struct mtd_info *mtd, struct nand_chip *this);
+	int (*waitfunc)(struct nand_chip *chip);
 	int (*exec_op)(struct nand_chip *chip,
 		       const struct nand_operation *op,
 		       bool check_only);
-- 
cgit v1.2.3


From a2098a9e4f6704471c32230d48b905e51f0cba32 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:30 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->erase()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->erase() hook.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/denali.c    | 4 ++--
 drivers/mtd/nand/raw/docg4.c     | 4 ++--
 drivers/mtd/nand/raw/nand_base.c | 7 +++----
 include/linux/mtd/rawnand.h      | 2 +-
 4 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 0c3fff9d65af..bb4ad3b822ad 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -915,9 +915,9 @@ static int denali_waitfunc(struct nand_chip *chip)
 	return irq_status & INTR__INT_ACT ? 0 : NAND_STATUS_FAIL;
 }
 
-static int denali_erase(struct mtd_info *mtd, int page)
+static int denali_erase(struct nand_chip *chip, int page)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 	uint32_t irq_status;
 
 	denali_reset_irq(denali);
diff --git a/drivers/mtd/nand/raw/docg4.c b/drivers/mtd/nand/raw/docg4.c
index ae20172f1b60..49500cae3d2f 100644
--- a/drivers/mtd/nand/raw/docg4.c
+++ b/drivers/mtd/nand/raw/docg4.c
@@ -892,9 +892,9 @@ static int docg4_read_oob(struct nand_chip *nand, int page)
 	return 0;
 }
 
-static int docg4_erase_block(struct mtd_info *mtd, int page)
+static int docg4_erase_block(struct nand_chip *nand, int page)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(nand);
 	struct docg4_priv *doc = nand_get_controller_data(nand);
 	void __iomem *docptr = doc->virtadr;
 	uint16_t g4_page;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 9be0f98c1244..26be436eb8f1 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4623,14 +4623,13 @@ out:
 
 /**
  * single_erase - [GENERIC] NAND standard block erase command function
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @page: the page address of the block which will be erased
  *
  * Standard erase command for NAND chips. Returns NAND status.
  */
-static int single_erase(struct mtd_info *mtd, int page)
+static int single_erase(struct nand_chip *chip, int page)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	unsigned int eraseblock;
 
 	/* Send commands to erase a block */
@@ -4715,7 +4714,7 @@ int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		    (page + pages_per_block))
 			chip->pagebuf = -1;
 
-		status = chip->erase(mtd, page & chip->pagemask);
+		status = chip->erase(chip, page & chip->pagemask);
 
 		/* See if block erase succeeded */
 		if (status) {
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index c00e571d09ca..8c8315d977de 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1298,7 +1298,7 @@ struct nand_chip {
 	int (*exec_op)(struct nand_chip *chip,
 		       const struct nand_operation *op,
 		       bool check_only);
-	int (*erase)(struct mtd_info *mtd, int page);
+	int (*erase)(struct nand_chip *chip, int page);
 	int (*set_features)(struct mtd_info *mtd, struct nand_chip *chip,
 			    int feature_addr, uint8_t *subfeature_para);
 	int (*get_features)(struct mtd_info *mtd, struct nand_chip *chip,
-- 
cgit v1.2.3


From aa36ff25ffdea656c3b748a5cf141bc884e6275c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:31 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->{get, set}_features()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->{get,set}_features() hooks.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/mxc_nand.c  | 16 ++++++++--------
 drivers/mtd/nand/raw/nand_base.c | 21 ++++++---------------
 include/linux/mtd/rawnand.h      | 12 ++++++------
 3 files changed, 20 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index a03a33656cf4..ec150e19a368 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1393,11 +1393,11 @@ static void mxc_nand_command(struct nand_chip *nand_chip, unsigned command,
 	}
 }
 
-static int mxc_nand_set_features(struct mtd_info *mtd, struct nand_chip *chip,
-				 int addr, u8 *subfeature_param)
+static int mxc_nand_set_features(struct nand_chip *chip, int addr,
+				 u8 *subfeature_param)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct mxc_nand_host *host = nand_get_controller_data(chip);
 	int i;
 
 	host->buf_start = 0;
@@ -1413,11 +1413,11 @@ static int mxc_nand_set_features(struct mtd_info *mtd, struct nand_chip *chip,
 	return 0;
 }
 
-static int mxc_nand_get_features(struct mtd_info *mtd, struct nand_chip *chip,
-				 int addr, u8 *subfeature_param)
+static int mxc_nand_get_features(struct nand_chip *chip, int addr,
+				 u8 *subfeature_param)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct mxc_nand_host *host = nand_get_controller_data(chip);
 	int i;
 
 	host->devtype_data->send_cmd(host, NAND_CMD_GET_FEATURES, false);
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 26be436eb8f1..0ae597ced5b4 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1163,12 +1163,10 @@ static bool nand_supports_set_features(struct nand_chip *chip, int addr)
 int nand_get_features(struct nand_chip *chip, int addr,
 		      u8 *subfeature_param)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	if (!nand_supports_get_features(chip, addr))
 		return -ENOTSUPP;
 
-	return chip->get_features(mtd, chip, addr, subfeature_param);
+	return chip->get_features(chip, addr, subfeature_param);
 }
 EXPORT_SYMBOL_GPL(nand_get_features);
 
@@ -1184,12 +1182,10 @@ EXPORT_SYMBOL_GPL(nand_get_features);
 int nand_set_features(struct nand_chip *chip, int addr,
 		      u8 *subfeature_param)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
-
 	if (!nand_supports_set_features(chip, addr))
 		return -ENOTSUPP;
 
-	return chip->set_features(mtd, chip, addr, subfeature_param);
+	return chip->set_features(chip, addr, subfeature_param);
 }
 EXPORT_SYMBOL_GPL(nand_set_features);
 
@@ -4846,13 +4842,11 @@ static int nand_max_bad_blocks(struct mtd_info *mtd, loff_t ofs, size_t len)
 
 /**
  * nand_default_set_features- [REPLACEABLE] set NAND chip features
- * @mtd: MTD device structure
  * @chip: nand chip info structure
  * @addr: feature address.
  * @subfeature_param: the subfeature parameters, a four bytes array.
  */
-static int nand_default_set_features(struct mtd_info *mtd,
-				     struct nand_chip *chip, int addr,
+static int nand_default_set_features(struct nand_chip *chip, int addr,
 				     uint8_t *subfeature_param)
 {
 	return nand_set_features_op(chip, addr, subfeature_param);
@@ -4860,13 +4854,11 @@ static int nand_default_set_features(struct mtd_info *mtd,
 
 /**
  * nand_default_get_features- [REPLACEABLE] get NAND chip features
- * @mtd: MTD device structure
  * @chip: nand chip info structure
  * @addr: feature address.
  * @subfeature_param: the subfeature parameters, a four bytes array.
  */
-static int nand_default_get_features(struct mtd_info *mtd,
-				     struct nand_chip *chip, int addr,
+static int nand_default_get_features(struct nand_chip *chip, int addr,
 				     uint8_t *subfeature_param)
 {
 	return nand_get_features_op(chip, addr, subfeature_param);
@@ -4874,7 +4866,6 @@ static int nand_default_get_features(struct mtd_info *mtd,
 
 /**
  * nand_get_set_features_notsupp - set/get features stub returning -ENOTSUPP
- * @mtd: MTD device structure
  * @chip: nand chip info structure
  * @addr: feature address.
  * @subfeature_param: the subfeature parameters, a four bytes array.
@@ -4882,8 +4873,8 @@ static int nand_default_get_features(struct mtd_info *mtd,
  * Should be used by NAND controller drivers that do not support the SET/GET
  * FEATURES operations.
  */
-int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
-				  int addr, u8 *subfeature_param)
+int nand_get_set_features_notsupp(struct nand_chip *chip, int addr,
+				  u8 *subfeature_param)
 {
 	return -ENOTSUPP;
 }
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 8c8315d977de..7c639070c512 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1299,10 +1299,10 @@ struct nand_chip {
 		       const struct nand_operation *op,
 		       bool check_only);
 	int (*erase)(struct nand_chip *chip, int page);
-	int (*set_features)(struct mtd_info *mtd, struct nand_chip *chip,
-			    int feature_addr, uint8_t *subfeature_para);
-	int (*get_features)(struct mtd_info *mtd, struct nand_chip *chip,
-			    int feature_addr, uint8_t *subfeature_para);
+	int (*set_features)(struct nand_chip *chip, int feature_addr,
+			    uint8_t *subfeature_para);
+	int (*get_features)(struct nand_chip *chip, int feature_addr,
+			    uint8_t *subfeature_para);
 	int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode);
 	int (*setup_data_interface)(struct mtd_info *mtd, int chipnr,
 				    const struct nand_data_interface *conf);
@@ -1681,8 +1681,8 @@ int nand_read_oob_syndrome(struct nand_chip *chip, int page);
 int nand_get_features(struct nand_chip *chip, int addr, u8 *subfeature_param);
 int nand_set_features(struct nand_chip *chip, int addr, u8 *subfeature_param);
 /* Stub used by drivers that do not support GET/SET FEATURES operations */
-int nand_get_set_features_notsupp(struct mtd_info *mtd, struct nand_chip *chip,
-				  int addr, u8 *subfeature_param);
+int nand_get_set_features_notsupp(struct nand_chip *chip, int addr,
+				  u8 *subfeature_param);
 
 /* Default read_page_raw implementation */
 int nand_read_page_raw(struct nand_chip *chip, uint8_t *buf, int oob_required,
-- 
cgit v1.2.3


From 2e7f1cec271c209128d0535e0cc1c49d3cf03624 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:32 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->setup_read_retry()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->setup_read_retry() hook.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c   | 12 +++++-------
 drivers/mtd/nand/raw/nand_hynix.c  |  3 +--
 drivers/mtd/nand/raw/nand_micron.c |  3 +--
 include/linux/mtd/rawnand.h        |  2 +-
 4 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 0ae597ced5b4..a7575aa68c48 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -3475,17 +3475,15 @@ static uint8_t *nand_transfer_oob(struct mtd_info *mtd, uint8_t *oob,
 
 /**
  * nand_setup_read_retry - [INTERN] Set the READ RETRY mode
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @retry_mode: the retry mode to use
  *
  * Some vendors supply a special command to shift the Vt threshold, to be used
  * when there are too many bitflips in a page (i.e., ECC error). After setting
  * a new threshold, the host should retry reading the page.
  */
-static int nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
+static int nand_setup_read_retry(struct nand_chip *chip, int retry_mode)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
-
 	pr_debug("setting READ RETRY mode %d\n", retry_mode);
 
 	if (retry_mode >= chip->read_retries)
@@ -3494,7 +3492,7 @@ static int nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
 	if (!chip->setup_read_retry)
 		return -EOPNOTSUPP;
 
-	return chip->setup_read_retry(mtd, retry_mode);
+	return chip->setup_read_retry(chip, retry_mode);
 }
 
 static void nand_wait_readrdy(struct nand_chip *chip)
@@ -3619,7 +3617,7 @@ read_retry:
 			if (mtd->ecc_stats.failed - ecc_failures) {
 				if (retry_mode + 1 < chip->read_retries) {
 					retry_mode++;
-					ret = nand_setup_read_retry(mtd,
+					ret = nand_setup_read_retry(chip,
 							retry_mode);
 					if (ret < 0)
 						break;
@@ -3646,7 +3644,7 @@ read_retry:
 
 		/* Reset to retry mode 0 */
 		if (retry_mode) {
-			ret = nand_setup_read_retry(mtd, 0);
+			ret = nand_setup_read_retry(chip, 0);
 			if (ret < 0)
 				break;
 			retry_mode = 0;
diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c
index fa873e517131..bb1c4f8ce785 100644
--- a/drivers/mtd/nand/raw/nand_hynix.c
+++ b/drivers/mtd/nand/raw/nand_hynix.c
@@ -113,9 +113,8 @@ static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val)
 	return 0;
 }
 
-static int hynix_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
+static int hynix_nand_setup_read_retry(struct nand_chip *chip, int retry_mode)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct hynix_nand *hynix = nand_get_manufacturer_data(chip);
 	const u8 *values;
 	int i, ret;
diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c
index 2f26dbeb5428..1a5505ccbe54 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -74,9 +74,8 @@ struct micron_nand {
 	struct micron_on_die_ecc ecc;
 };
 
-static int micron_nand_setup_read_retry(struct mtd_info *mtd, int retry_mode)
+static int micron_nand_setup_read_retry(struct nand_chip *chip, int retry_mode)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	u8 feature[ONFI_SUBFEATURE_PARAM_LEN] = {retry_mode};
 
 	return nand_set_features(chip, ONFI_FEATURE_ADDR_READ_RETRY, feature);
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 7c639070c512..14ce2b078206 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1303,7 +1303,7 @@ struct nand_chip {
 			    uint8_t *subfeature_para);
 	int (*get_features)(struct nand_chip *chip, int feature_addr,
 			    uint8_t *subfeature_para);
-	int (*setup_read_retry)(struct mtd_info *mtd, int retry_mode);
+	int (*setup_read_retry)(struct nand_chip *chip, int retry_mode);
 	int (*setup_data_interface)(struct mtd_info *mtd, int chipnr,
 				    const struct nand_data_interface *conf);
 
-- 
cgit v1.2.3


From 858838b87ef542c35b5401a6469d162d103d1d8f Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:33 +0200
Subject: mtd: rawnand: Pass a nand_chip object to chip->setup_data_interface()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the chip->setup_data_interface() hook.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/atmel/nand-controller.c | 3 +--
 drivers/mtd/nand/raw/denali.c                | 4 ++--
 drivers/mtd/nand/raw/fsmc_nand.c             | 3 +--
 drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c    | 3 +--
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.h   | 2 +-
 drivers/mtd/nand/raw/marvell_nand.c          | 3 +--
 drivers/mtd/nand/raw/mtk_nand.c              | 4 ++--
 drivers/mtd/nand/raw/mxc_nand.c              | 7 +++----
 drivers/mtd/nand/raw/nand_base.c             | 9 +++------
 drivers/mtd/nand/raw/s3c2410.c               | 3 ++-
 drivers/mtd/nand/raw/sunxi_nand.c            | 3 +--
 drivers/mtd/nand/raw/tango_nand.c            | 3 +--
 drivers/mtd/nand/raw/tegra_nand.c            | 3 +--
 include/linux/mtd/rawnand.h                  | 2 +-
 14 files changed, 21 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index d5939114f999..a38633a67ead 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -1448,10 +1448,9 @@ static int atmel_hsmc_nand_setup_data_interface(struct atmel_nand *nand,
 	return 0;
 }
 
-static int atmel_nand_setup_data_interface(struct mtd_info *mtd, int csline,
+static int atmel_nand_setup_data_interface(struct nand_chip *chip, int csline,
 					const struct nand_data_interface *conf)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct atmel_nand *nand = to_atmel_nand(chip);
 	struct atmel_nand_controller *nc;
 
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index bb4ad3b822ad..c5e35461d6b3 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -932,10 +932,10 @@ static int denali_erase(struct nand_chip *chip, int page)
 	return irq_status & INTR__ERASE_COMP ? 0 : -EIO;
 }
 
-static int denali_setup_data_interface(struct mtd_info *mtd, int chipnr,
+static int denali_setup_data_interface(struct nand_chip *chip, int chipnr,
 				       const struct nand_data_interface *conf)
 {
-	struct denali_nand_info *denali = mtd_to_denali(mtd);
+	struct denali_nand_info *denali = mtd_to_denali(nand_to_mtd(chip));
 	const struct nand_sdr_timings *timings;
 	unsigned long t_x, mult_x;
 	int acc_clks, re_2_we, re_2_re, we_2_re, addr_2_data;
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index 15bf533c907a..5e06fce4b295 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -340,10 +340,9 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host,
 	return 0;
 }
 
-static int fsmc_setup_data_interface(struct mtd_info *mtd, int csline,
+static int fsmc_setup_data_interface(struct nand_chip *nand, int csline,
 				     const struct nand_data_interface *conf)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct fsmc_nand_data *host = nand_get_controller_data(nand);
 	struct fsmc_nand_timings tims;
 	const struct nand_sdr_timings *sdrt;
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c
index 88ea2203e263..bd4cfac6b5aa 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-lib.c
@@ -471,10 +471,9 @@ void gpmi_nfc_apply_timings(struct gpmi_nand_data *this)
 	udelay(dll_wait_time_us);
 }
 
-int gpmi_setup_data_interface(struct mtd_info *mtd, int chipnr,
+int gpmi_setup_data_interface(struct nand_chip *chip, int chipnr,
 			      const struct nand_data_interface *conf)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct gpmi_nand_data *this = nand_get_controller_data(chip);
 	const struct nand_sdr_timings *sdr;
 
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.h
index 69cd0cbde4f2..d0b79bac2728 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.h
@@ -178,7 +178,7 @@ int gpmi_is_ready(struct gpmi_nand_data *, unsigned chip);
 int gpmi_send_command(struct gpmi_nand_data *);
 int gpmi_enable_clk(struct gpmi_nand_data *this);
 int gpmi_disable_clk(struct gpmi_nand_data *this);
-int gpmi_setup_data_interface(struct mtd_info *mtd, int chipnr,
+int gpmi_setup_data_interface(struct nand_chip *chip, int chipnr,
 			      const struct nand_data_interface *conf);
 void gpmi_nfc_apply_timings(struct gpmi_nand_data *this);
 int gpmi_read_data(struct gpmi_nand_data *, void *buf, int len);
diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c
index 1f4e75f8fa3e..22481e9bfefc 100644
--- a/drivers/mtd/nand/raw/marvell_nand.c
+++ b/drivers/mtd/nand/raw/marvell_nand.c
@@ -2250,11 +2250,10 @@ static struct nand_bbt_descr bbt_mirror_descr = {
 	.pattern = bbt_mirror_pattern
 };
 
-static int marvell_nfc_setup_data_interface(struct mtd_info *mtd, int chipnr,
+static int marvell_nfc_setup_data_interface(struct nand_chip *chip, int chipnr,
 					    const struct nand_data_interface
 					    *conf)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct marvell_nand_chip *marvell_nand = to_marvell_nand(chip);
 	struct marvell_nfc *nfc = to_marvell_nfc(chip->controller);
 	unsigned int period_ns = 1000000000 / clk_get_rate(nfc->core_clk) * 2;
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index cf8c42fb8feb..42f9dc2cd172 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -503,10 +503,10 @@ static void mtk_nfc_write_buf(struct nand_chip *chip, const u8 *buf, int len)
 		mtk_nfc_write_byte(chip, buf[i]);
 }
 
-static int mtk_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
+static int mtk_nfc_setup_data_interface(struct nand_chip *chip, int csline,
 					const struct nand_data_interface *conf)
 {
-	struct mtk_nfc *nfc = nand_get_controller_data(mtd_to_nand(mtd));
+	struct mtk_nfc *nfc = nand_get_controller_data(chip);
 	const struct nand_sdr_timings *timings;
 	u32 rate, tpoecs, tprecs, tc2r, tw2r, twh, twst, trlt;
 
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index ec150e19a368..895f85ee29db 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -137,7 +137,7 @@ struct mxc_nand_devtype_data {
 	u32 (*get_ecc_status)(struct mxc_nand_host *);
 	const struct mtd_ooblayout_ops *ooblayout;
 	void (*select_chip)(struct nand_chip *chip, int cs);
-	int (*setup_data_interface)(struct mtd_info *mtd, int csline,
+	int (*setup_data_interface)(struct nand_chip *chip, int csline,
 				    const struct nand_data_interface *conf);
 	void (*enable_hwecc)(struct nand_chip *chip, bool enable);
 
@@ -1139,11 +1139,10 @@ static void preset_v1(struct mtd_info *mtd)
 	writew(0x4, NFC_V1_V2_WRPROT);
 }
 
-static int mxc_nand_v2_setup_data_interface(struct mtd_info *mtd, int csline,
+static int mxc_nand_v2_setup_data_interface(struct nand_chip *chip, int csline,
 					const struct nand_data_interface *conf)
 {
-	struct nand_chip *nand_chip = mtd_to_nand(mtd);
-	struct mxc_nand_host *host = nand_get_controller_data(nand_chip);
+	struct mxc_nand_host *host = nand_get_controller_data(chip);
 	int tRC_min_ns, tRC_ps, ret;
 	unsigned long rate, rate_round;
 	const struct nand_sdr_timings *timings;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index a7575aa68c48..0a89ab663728 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1200,7 +1200,6 @@ EXPORT_SYMBOL_GPL(nand_set_features);
  */
 static int nand_reset_data_interface(struct nand_chip *chip, int chipnr)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
 	if (!chip->setup_data_interface)
@@ -1221,7 +1220,7 @@ static int nand_reset_data_interface(struct nand_chip *chip, int chipnr)
 	 */
 
 	onfi_fill_data_interface(chip, NAND_SDR_IFACE, 0);
-	ret = chip->setup_data_interface(mtd, chipnr, &chip->data_interface);
+	ret = chip->setup_data_interface(chip, chipnr, &chip->data_interface);
 	if (ret)
 		pr_err("Failed to configure data interface to SDR timing mode 0\n");
 
@@ -1243,7 +1242,6 @@ static int nand_reset_data_interface(struct nand_chip *chip, int chipnr)
  */
 static int nand_setup_data_interface(struct nand_chip *chip, int chipnr)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	u8 tmode_param[ONFI_SUBFEATURE_PARAM_LEN] = {
 		chip->onfi_timing_mode_default,
 	};
@@ -1263,7 +1261,7 @@ static int nand_setup_data_interface(struct nand_chip *chip, int chipnr)
 	}
 
 	/* Change the mode on the controller side */
-	ret = chip->setup_data_interface(mtd, chipnr, &chip->data_interface);
+	ret = chip->setup_data_interface(chip, chipnr, &chip->data_interface);
 	if (ret)
 		return ret;
 
@@ -1316,7 +1314,6 @@ err_reset_chip:
  */
 static int nand_init_data_interface(struct nand_chip *chip)
 {
-	struct mtd_info *mtd = nand_to_mtd(chip);
 	int modes, mode, ret;
 
 	if (!chip->setup_data_interface)
@@ -1345,7 +1342,7 @@ static int nand_init_data_interface(struct nand_chip *chip)
 		 * Pass NAND_DATA_IFACE_CHECK_ONLY to only check if the
 		 * controller supports the requested timings.
 		 */
-		ret = chip->setup_data_interface(mtd,
+		ret = chip->setup_data_interface(chip,
 						 NAND_DATA_IFACE_CHECK_ONLY,
 						 &chip->data_interface);
 		if (!ret) {
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 1d549f5e53f5..1f70eb35320b 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -820,9 +820,10 @@ static int s3c2410_nand_add_partition(struct s3c2410_nand_info *info,
 	return -ENODEV;
 }
 
-static int s3c2410_nand_setup_data_interface(struct mtd_info *mtd, int csline,
+static int s3c2410_nand_setup_data_interface(struct nand_chip *chip, int csline,
 					const struct nand_data_interface *conf)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	struct s3c2410_nand_info *info = s3c2410_nand_mtd_toinfo(mtd);
 	struct s3c2410_platform_nand *pdata = info->platform;
 	const struct nand_sdr_timings *timings;
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index fe30fb589ffb..a3700b79bdeb 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1468,10 +1468,9 @@ static int _sunxi_nand_lookup_timing(const s32 *lut, int lut_size, u32 duration,
 #define sunxi_nand_lookup_timing(l, p, c) \
 			_sunxi_nand_lookup_timing(l, ARRAY_SIZE(l), p, c)
 
-static int sunxi_nfc_setup_data_interface(struct mtd_info *mtd, int csline,
+static int sunxi_nfc_setup_data_interface(struct nand_chip *nand, int csline,
 					const struct nand_data_interface *conf)
 {
-	struct nand_chip *nand = mtd_to_nand(mtd);
 	struct sunxi_nand_chip *chip = to_sunxi_nand(nand);
 	struct sunxi_nfc *nfc = to_sunxi_nfc(chip->nand.controller);
 	const struct nand_sdr_timings *timings;
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index c21a0f2d26fc..bf7012099790 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -479,11 +479,10 @@ static u32 to_ticks(int kHz, int ps)
 	return DIV_ROUND_UP_ULL((u64)kHz * ps, NSEC_PER_SEC);
 }
 
-static int tango_set_timings(struct mtd_info *mtd, int csline,
+static int tango_set_timings(struct nand_chip *chip, int csline,
 			     const struct nand_data_interface *conf)
 {
 	const struct nand_sdr_timings *sdr = nand_get_sdr_timings(conf);
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct tango_nfc *nfc = to_tango_nfc(chip->controller);
 	struct tango_chip *tchip = to_tango_chip(chip);
 	u32 Trdy, Textw, Twc, Twpw, Tacc, Thold, Trpw, Textr;
diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c
index 1088741eed1d..9767e29d74e2 100644
--- a/drivers/mtd/nand/raw/tegra_nand.c
+++ b/drivers/mtd/nand/raw/tegra_nand.c
@@ -814,10 +814,9 @@ static void tegra_nand_setup_timing(struct tegra_nand_controller *ctrl,
 	writel_relaxed(reg, ctrl->regs + TIMING_2);
 }
 
-static int tegra_nand_setup_data_interface(struct mtd_info *mtd, int csline,
+static int tegra_nand_setup_data_interface(struct nand_chip *chip, int csline,
 					const struct nand_data_interface *conf)
 {
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	struct tegra_nand_controller *ctrl = to_tegra_ctrl(chip->controller);
 	const struct nand_sdr_timings *timings;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 14ce2b078206..5bfb9d543a8a 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1304,7 +1304,7 @@ struct nand_chip {
 	int (*get_features)(struct nand_chip *chip, int feature_addr,
 			    uint8_t *subfeature_para);
 	int (*setup_read_retry)(struct nand_chip *chip, int retry_mode);
-	int (*setup_data_interface)(struct mtd_info *mtd, int chipnr,
+	int (*setup_data_interface)(struct nand_chip *chip, int chipnr,
 				    const struct nand_data_interface *conf);
 
 	int chip_delay;
-- 
cgit v1.2.3


From 5740d4c4f9bbc97270993147b4756587f92d44c3 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:34 +0200
Subject: mtd: rawnand: Pass a nand_chip object to all nand_xxx_bbt() helpers

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the nand_xxx_bbt() helpers.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c |  6 +++---
 drivers/mtd/nand/raw/nand_bbt.c  | 16 +++++++---------
 include/linux/mtd/rawnand.h      |  6 +++---
 3 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 0a89ab663728..074de0c8c9dc 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -515,7 +515,7 @@ static int nand_block_markbad_lowlevel(struct mtd_info *mtd, loff_t ofs)
 
 	/* Mark block bad in BBT */
 	if (chip->bbt) {
-		res = nand_markbad_bbt(mtd, ofs);
+		res = nand_markbad_bbt(chip, ofs);
 		if (!ret)
 			ret = res;
 	}
@@ -565,7 +565,7 @@ static int nand_block_isreserved(struct mtd_info *mtd, loff_t ofs)
 	if (!chip->bbt)
 		return 0;
 	/* Return info from the table */
-	return nand_isreserved_bbt(mtd, ofs);
+	return nand_isreserved_bbt(chip, ofs);
 }
 
 /**
@@ -585,7 +585,7 @@ static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int allowbbt)
 		return chip->block_bad(chip, ofs);
 
 	/* Return info from the table */
-	return nand_isbad_bbt(mtd, ofs, allowbbt);
+	return nand_isbad_bbt(chip, ofs, allowbbt);
 }
 
 /**
diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c
index 76849a441518..7424be0547f8 100644
--- a/drivers/mtd/nand/raw/nand_bbt.c
+++ b/drivers/mtd/nand/raw/nand_bbt.c
@@ -1387,12 +1387,11 @@ EXPORT_SYMBOL(nand_create_bbt);
 
 /**
  * nand_isreserved_bbt - [NAND Interface] Check if a block is reserved
- * @mtd: MTD device structure
+ * @this: NAND chip object
  * @offs: offset in the device
  */
-int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs)
+int nand_isreserved_bbt(struct nand_chip *this, loff_t offs)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	int block;
 
 	block = (int)(offs >> this->bbt_erase_shift);
@@ -1401,13 +1400,12 @@ int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs)
 
 /**
  * nand_isbad_bbt - [NAND Interface] Check if a block is bad
- * @mtd: MTD device structure
+ * @this: NAND chip object
  * @offs: offset in the device
  * @allowbbt: allow access to bad block table region
  */
-int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt)
+int nand_isbad_bbt(struct nand_chip *this, loff_t offs, int allowbbt)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
 	int block, res;
 
 	block = (int)(offs >> this->bbt_erase_shift);
@@ -1429,12 +1427,12 @@ int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt)
 
 /**
  * nand_markbad_bbt - [NAND Interface] Mark a block bad in the BBT
- * @mtd: MTD device structure
+ * @this: NAND chip object
  * @offs: offset of the bad block
  */
-int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs)
+int nand_markbad_bbt(struct nand_chip *this, loff_t offs)
 {
-	struct nand_chip *this = mtd_to_nand(mtd);
+	struct mtd_info *mtd = nand_to_mtd(this);
 	int block, ret = 0;
 
 	block = (int)(offs >> this->bbt_erase_shift);
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 5bfb9d543a8a..e0d98ca7cb45 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1545,9 +1545,9 @@ extern const struct nand_manufacturer_ops amd_nand_manuf_ops;
 extern const struct nand_manufacturer_ops macronix_nand_manuf_ops;
 
 int nand_create_bbt(struct nand_chip *chip);
-int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs);
-int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs);
-int nand_isbad_bbt(struct mtd_info *mtd, loff_t offs, int allowbbt);
+int nand_markbad_bbt(struct nand_chip *chip, loff_t offs);
+int nand_isreserved_bbt(struct nand_chip *chip, loff_t offs);
+int nand_isbad_bbt(struct nand_chip *chip, loff_t offs, int allowbbt);
 int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
 		    int allowbbt);
 
-- 
cgit v1.2.3


From e4cdf9cb3254b5b4d9b1064c275d8c40f2d82e03 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Thu, 6 Sep 2018 14:05:35 +0200
Subject: mtd: rawnand: Pass a nand_chip object nand_erase_nand()

Let's make the raw NAND API consistent by patching all helpers and
hooks to take a nand_chip object instead of an mtd_info one or
remove the mtd_info object when both are passed.

Let's tackle the nand_erase_nand() helper.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 10 +++++-----
 drivers/mtd/nand/raw/nand_bbt.c  |  2 +-
 include/linux/mtd/rawnand.h      |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 074de0c8c9dc..ef4d90ed896d 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -505,7 +505,7 @@ static int nand_block_markbad_lowlevel(struct mtd_info *mtd, loff_t ofs)
 		memset(&einfo, 0, sizeof(einfo));
 		einfo.addr = ofs;
 		einfo.len = 1ULL << chip->phys_erase_shift;
-		nand_erase_nand(mtd, &einfo, 0);
+		nand_erase_nand(chip, &einfo, 0);
 
 		/* Write bad block marker to OOB */
 		nand_get_device(mtd, FL_WRITING);
@@ -4638,22 +4638,22 @@ static int single_erase(struct nand_chip *chip, int page)
  */
 static int nand_erase(struct mtd_info *mtd, struct erase_info *instr)
 {
-	return nand_erase_nand(mtd, instr, 0);
+	return nand_erase_nand(mtd_to_nand(mtd), instr, 0);
 }
 
 /**
  * nand_erase_nand - [INTERN] erase block(s)
- * @mtd: MTD device structure
+ * @chip: NAND chip object
  * @instr: erase instruction
  * @allowbbt: allow erasing the bbt area
  *
  * Erase one ore more blocks.
  */
-int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
+int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		    int allowbbt)
 {
+	struct mtd_info *mtd = nand_to_mtd(chip);
 	int page, status, pages_per_block, ret, chipnr;
-	struct nand_chip *chip = mtd_to_nand(mtd);
 	loff_t len;
 
 	pr_debug("%s: start = 0x%012llx, len = %llu\n",
diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c
index 7424be0547f8..9d73e086c5de 100644
--- a/drivers/mtd/nand/raw/nand_bbt.c
+++ b/drivers/mtd/nand/raw/nand_bbt.c
@@ -853,7 +853,7 @@ static int write_bbt(struct mtd_info *mtd, uint8_t *buf,
 		memset(&einfo, 0, sizeof(einfo));
 		einfo.addr = to;
 		einfo.len = 1 << this->bbt_erase_shift;
-		res = nand_erase_nand(mtd, &einfo, 1);
+		res = nand_erase_nand(this, &einfo, 1);
 		if (res < 0) {
 			pr_warn("nand_bbt: error while erasing BBT block %d\n",
 				res);
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index e0d98ca7cb45..e6360e1156e7 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1548,7 +1548,7 @@ int nand_create_bbt(struct nand_chip *chip);
 int nand_markbad_bbt(struct nand_chip *chip, loff_t offs);
 int nand_isreserved_bbt(struct nand_chip *chip, loff_t offs);
 int nand_isbad_bbt(struct nand_chip *chip, loff_t offs, int allowbbt);
-int nand_erase_nand(struct mtd_info *mtd, struct erase_info *instr,
+int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		    int allowbbt);
 
 /**
-- 
cgit v1.2.3


From 871a4073f438c61cc4cf7471476d9a8a29ccf620 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Sat, 4 Aug 2018 22:59:22 +0200
Subject: mtd: rawnand: Make maxchips an unsigned int

There's no good reason to make maxchips a signed integer, since only
positive values are valid. Make it an unsigned int.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 7 ++++---
 include/linux/mtd/rawnand.h      | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index ef4d90ed896d..a7e85127c131 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5915,11 +5915,12 @@ static int nand_dt_init(struct nand_chip *chip)
  * prevented dynamic allocations during this phase which was unconvenient and
  * as been banned for the benefit of the ->init_ecc()/cleanup_ecc() hooks.
  */
-static int nand_scan_ident(struct nand_chip *chip, int maxchips,
+static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
 			   struct nand_flash_dev *table)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
-	int i, nand_maf_id, nand_dev_id;
+	int nand_maf_id, nand_dev_id;
+	unsigned int i;
 	int ret;
 
 	/* Enforce the right timings for reset/detection */
@@ -6730,7 +6731,7 @@ static void nand_detach(struct nand_chip *chip)
  * The flash ID is read and the mtd/chip structures are filled with the
  * appropriate values.
  */
-int nand_scan_with_ids(struct nand_chip *chip, int maxchips,
+int nand_scan_with_ids(struct nand_chip *chip, unsigned int maxchips,
 		       struct nand_flash_dev *ids)
 {
 	int ret;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index e6360e1156e7..e3a96ee7e531 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -28,10 +28,10 @@ struct nand_chip;
 struct nand_flash_dev;
 
 /* Scan and identify a NAND device */
-int nand_scan_with_ids(struct nand_chip *chip, int max_chips,
+int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips,
 		       struct nand_flash_dev *ids);
 
-static inline int nand_scan(struct nand_chip *chip, int max_chips)
+static inline int nand_scan(struct nand_chip *chip, unsigned int max_chips)
 {
 	return nand_scan_with_ids(chip, max_chips, NULL);
 }
-- 
cgit v1.2.3


From 82fc5099744e5f30cd8c9ee13075f28fb37e9518 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:34 +0200
Subject: mtd: rawnand: Create a legacy struct and move ->IO_ADDR_{R, W} there

We regularly have new NAND controller drivers that are making use of
fields/hooks that we want to get rid of but can't because of all the
legacy drivers that we might break if we do.

So, instead of removing those fields/hooks, let's move them to a
sub-struct which is clearly documented as deprecated.

We start with the ->IO_ADDR_{R,W] fields.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/driver-api/mtdnand.rst | 24 ++++++++++++------------
 arch/arm/mach-ep93xx/snappercl15.c   |  5 +++--
 arch/arm/mach-ep93xx/ts72xx.c        |  6 +++---
 arch/arm/mach-imx/mach-qong.c        |  4 ++--
 arch/arm/mach-ixp4xx/ixdp425-setup.c |  2 +-
 arch/arm/mach-omap1/board-nand.c     |  2 +-
 arch/arm/mach-orion5x/ts78xx-setup.c |  6 +++---
 arch/arm/mach-pxa/balloon3.c         |  2 +-
 arch/arm/mach-pxa/em-x270.c          |  6 +++---
 arch/arm/mach-pxa/palmtx.c           |  2 +-
 arch/mips/alchemy/devboards/db1200.c |  6 +++---
 arch/mips/alchemy/devboards/db1300.c |  6 +++---
 arch/mips/alchemy/devboards/db1550.c |  6 +++---
 arch/mips/pnx833x/common/platform.c  |  2 +-
 arch/mips/rb532/devices.c            |  2 +-
 arch/sh/boards/mach-migor/setup.c    |  6 +++---
 drivers/mtd/nand/raw/ams-delta.c     |  8 ++++----
 drivers/mtd/nand/raw/au1550nd.c      | 26 +++++++++++++-------------
 drivers/mtd/nand/raw/cmx270_nand.c   | 16 ++++++++--------
 drivers/mtd/nand/raw/cs553x_nand.c   | 30 +++++++++++++++---------------
 drivers/mtd/nand/raw/davinci_nand.c  | 24 ++++++++++++------------
 drivers/mtd/nand/raw/fsl_upm.c       | 16 ++++++++--------
 drivers/mtd/nand/raw/gpio.c          | 10 +++++-----
 drivers/mtd/nand/raw/jz4740_nand.c   |  8 ++++----
 drivers/mtd/nand/raw/jz4780_nand.c   |  4 ++--
 drivers/mtd/nand/raw/lpc32xx_mlc.c   |  4 ++--
 drivers/mtd/nand/raw/lpc32xx_slc.c   |  4 ++--
 drivers/mtd/nand/raw/nand_base.c     | 12 ++++++------
 drivers/mtd/nand/raw/ndfc.c          |  4 ++--
 drivers/mtd/nand/raw/omap2.c         | 30 +++++++++++++++---------------
 drivers/mtd/nand/raw/orion_nand.c    |  6 +++---
 drivers/mtd/nand/raw/pasemi_nand.c   | 22 +++++++++++-----------
 drivers/mtd/nand/raw/plat_nand.c     |  4 ++--
 drivers/mtd/nand/raw/s3c2410.c       | 12 ++++++------
 drivers/mtd/nand/raw/sharpsl.c       |  6 +++---
 drivers/mtd/nand/raw/tmio_nand.c     |  6 +++---
 include/linux/mtd/rawnand.h          | 26 ++++++++++++++++++++------
 37 files changed, 190 insertions(+), 175 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst
index 5470a3d6bd9e..1d2403f1d8c5 100644
--- a/Documentation/driver-api/mtdnand.rst
+++ b/Documentation/driver-api/mtdnand.rst
@@ -180,10 +180,10 @@ by a chip select decoder.
     {
         struct nand_chip *this = mtd_to_nand(mtd);
         switch(cmd){
-            case NAND_CTL_SETCLE: this->IO_ADDR_W |= CLE_ADRR_BIT;  break;
-            case NAND_CTL_CLRCLE: this->IO_ADDR_W &= ~CLE_ADRR_BIT; break;
-            case NAND_CTL_SETALE: this->IO_ADDR_W |= ALE_ADRR_BIT;  break;
-            case NAND_CTL_CLRALE: this->IO_ADDR_W &= ~ALE_ADRR_BIT; break;
+            case NAND_CTL_SETCLE: this->legacy.IO_ADDR_W |= CLE_ADRR_BIT;  break;
+            case NAND_CTL_CLRCLE: this->legacy.IO_ADDR_W &= ~CLE_ADRR_BIT; break;
+            case NAND_CTL_SETALE: this->legacy.IO_ADDR_W |= ALE_ADRR_BIT;  break;
+            case NAND_CTL_CLRALE: this->legacy.IO_ADDR_W &= ~ALE_ADRR_BIT; break;
         }
     }
 
@@ -235,8 +235,8 @@ necessary information about the device.
         }
 
         /* Set address of NAND IO lines */
-        this->IO_ADDR_R = baseaddr;
-        this->IO_ADDR_W = baseaddr;
+        this->legacy.IO_ADDR_R = baseaddr;
+        this->legacy.IO_ADDR_W = baseaddr;
         /* Reference hardware control function */
         this->hwcontrol = board_hwcontrol;
         /* Set command delay time, see datasheet for correct value */
@@ -336,17 +336,17 @@ connected to an address decoder.
         struct nand_chip *this = mtd_to_nand(mtd);
 
         /* Deselect all chips */
-        this->IO_ADDR_R &= ~BOARD_NAND_ADDR_MASK;
-        this->IO_ADDR_W &= ~BOARD_NAND_ADDR_MASK;
+        this->legacy.IO_ADDR_R &= ~BOARD_NAND_ADDR_MASK;
+        this->legacy.IO_ADDR_W &= ~BOARD_NAND_ADDR_MASK;
         switch (chip) {
         case 0:
-            this->IO_ADDR_R |= BOARD_NAND_ADDR_CHIP0;
-            this->IO_ADDR_W |= BOARD_NAND_ADDR_CHIP0;
+            this->legacy.IO_ADDR_R |= BOARD_NAND_ADDR_CHIP0;
+            this->legacy.IO_ADDR_W |= BOARD_NAND_ADDR_CHIP0;
             break;
         ....
         case n:
-            this->IO_ADDR_R |= BOARD_NAND_ADDR_CHIPn;
-            this->IO_ADDR_W |= BOARD_NAND_ADDR_CHIPn;
+            this->legacy.IO_ADDR_R |= BOARD_NAND_ADDR_CHIPn;
+            this->legacy.IO_ADDR_W |= BOARD_NAND_ADDR_CHIPn;
             break;
         }
     }
diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c
index aa03ea79c5f5..1dad83a0bc5b 100644
--- a/arch/arm/mach-ep93xx/snappercl15.c
+++ b/arch/arm/mach-ep93xx/snappercl15.c
@@ -43,7 +43,7 @@
 #define SNAPPERCL15_NAND_CEN	(1 << 11) /* Chip enable (active low) */
 #define SNAPPERCL15_NAND_RDY	(1 << 14) /* Device ready */
 
-#define NAND_CTRL_ADDR(chip) 	(chip->IO_ADDR_W + 0x40)
+#define NAND_CTRL_ADDR(chip) 	(chip->legacy.IO_ADDR_W + 0x40)
 
 static void snappercl15_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 				      unsigned int ctrl)
@@ -69,7 +69,8 @@ static void snappercl15_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		__raw_writew((cmd & 0xff) | nand_state, chip->IO_ADDR_W);
+		__raw_writew((cmd & 0xff) | nand_state,
+			     chip->legacy.IO_ADDR_W);
 }
 
 static int snappercl15_nand_dev_ready(struct nand_chip *chip)
diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c
index 26259dd9e951..188bf02595c5 100644
--- a/arch/arm/mach-ep93xx/ts72xx.c
+++ b/arch/arm/mach-ep93xx/ts72xx.c
@@ -80,7 +80,7 @@ static void ts72xx_nand_hwcontrol(struct nand_chip *chip,
 				  int cmd, unsigned int ctrl)
 {
 	if (ctrl & NAND_CTRL_CHANGE) {
-		void __iomem *addr = chip->IO_ADDR_R;
+		void __iomem *addr = chip->legacy.IO_ADDR_R;
 		unsigned char bits;
 
 		addr += (1 << TS72XX_NAND_CONTROL_ADDR_LINE);
@@ -94,12 +94,12 @@ static void ts72xx_nand_hwcontrol(struct nand_chip *chip,
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		__raw_writeb(cmd, chip->IO_ADDR_W);
+		__raw_writeb(cmd, chip->legacy.IO_ADDR_W);
 }
 
 static int ts72xx_nand_device_ready(struct nand_chip *chip)
 {
-	void __iomem *addr = chip->IO_ADDR_R;
+	void __iomem *addr = chip->legacy.IO_ADDR_R;
 
 	addr += (1 << TS72XX_NAND_BUSY_ADDR_LINE);
 
diff --git a/arch/arm/mach-imx/mach-qong.c b/arch/arm/mach-imx/mach-qong.c
index ff015f603ac9..48972944bb95 100644
--- a/arch/arm/mach-imx/mach-qong.c
+++ b/arch/arm/mach-imx/mach-qong.c
@@ -136,9 +136,9 @@ static void qong_nand_cmd_ctrl(struct nand_chip *nand_chip, int cmd,
 		return;
 
 	if (ctrl & NAND_CLE)
-		writeb(cmd, nand_chip->IO_ADDR_W + (1 << 24));
+		writeb(cmd, nand_chip->legacy.IO_ADDR_W + (1 << 24));
 	else
-		writeb(cmd, nand_chip->IO_ADDR_W + (1 << 23));
+		writeb(cmd, nand_chip->legacy.IO_ADDR_W + (1 << 23));
 }
 
 /*
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index 7c39edc121ba..797e7edc7124 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -92,7 +92,7 @@ ixdp425_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl)
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		writeb(cmd, this->IO_ADDR_W + offset);
+		writeb(cmd, this->legacy.IO_ADDR_W + offset);
 }
 
 static struct platform_nand_data ixdp425_flash_nand_data = {
diff --git a/arch/arm/mach-omap1/board-nand.c b/arch/arm/mach-omap1/board-nand.c
index 59d56a30bc63..20923eb2d9b6 100644
--- a/arch/arm/mach-omap1/board-nand.c
+++ b/arch/arm/mach-omap1/board-nand.c
@@ -31,6 +31,6 @@ void omap1_nand_cmd_ctl(struct nand_chip *this, int cmd, unsigned int ctrl)
 	if (ctrl & NAND_ALE)
 		mask |= 0x04;
 
-	writeb(cmd, this->IO_ADDR_W + mask);
+	writeb(cmd, this->legacy.IO_ADDR_W + mask);
 }
 
diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c
index 48d85ddf7c31..aac2c6eb35e2 100644
--- a/arch/arm/mach-orion5x/ts78xx-setup.c
+++ b/arch/arm/mach-orion5x/ts78xx-setup.c
@@ -145,7 +145,7 @@ static void ts78xx_ts_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		writeb(cmd, this->IO_ADDR_W);
+		writeb(cmd, this->legacy.IO_ADDR_W);
 }
 
 static int ts78xx_ts_nand_dev_ready(struct nand_chip *chip)
@@ -156,7 +156,7 @@ static int ts78xx_ts_nand_dev_ready(struct nand_chip *chip)
 static void ts78xx_ts_nand_write_buf(struct nand_chip *chip,
 				     const uint8_t *buf, int len)
 {
-	void __iomem *io_base = chip->IO_ADDR_W;
+	void __iomem *io_base = chip->legacy.IO_ADDR_W;
 	unsigned long off = ((unsigned long)buf & 3);
 	int sz;
 
@@ -182,7 +182,7 @@ static void ts78xx_ts_nand_write_buf(struct nand_chip *chip,
 static void ts78xx_ts_nand_read_buf(struct nand_chip *chip,
 				    uint8_t *buf, int len)
 {
-	void __iomem *io_base = chip->IO_ADDR_R;
+	void __iomem *io_base = chip->legacy.IO_ADDR_R;
 	unsigned long off = ((unsigned long)buf & 3);
 	int sz;
 
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index 71fda90b9599..256e60c38a6d 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -597,7 +597,7 @@ static void balloon3_nand_cmd_ctl(struct nand_chip *this, int cmd,
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		writeb(cmd, this->IO_ADDR_W);
+		writeb(cmd, this->legacy.IO_ADDR_W);
 }
 
 static void balloon3_nand_select_chip(struct nand_chip *this, int chip)
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index ba1ec9992830..3acb945a2628 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -288,7 +288,7 @@ static void nand_cs_off(void)
 static void em_x270_nand_cmd_ctl(struct nand_chip *this, int dat,
 				 unsigned int ctrl)
 {
-	unsigned long nandaddr = (unsigned long)this->IO_ADDR_W;
+	unsigned long nandaddr = (unsigned long)this->legacy.IO_ADDR_W;
 
 	dsb();
 
@@ -308,9 +308,9 @@ static void em_x270_nand_cmd_ctl(struct nand_chip *this, int dat,
 	}
 
 	dsb();
-	this->IO_ADDR_W = (void __iomem *)nandaddr;
+	this->legacy.IO_ADDR_W = (void __iomem *)nandaddr;
 	if (dat != NAND_CMD_NONE)
-		writel(dat, this->IO_ADDR_W);
+		writel(dat, this->legacy.IO_ADDR_W);
 
 	dsb();
 }
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index ed9661e70b83..36ea32c1bbcc 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -250,7 +250,7 @@ static inline void palmtx_keys_init(void) {}
 static void palmtx_nand_cmd_ctl(struct nand_chip *this, int cmd,
 				unsigned int ctrl)
 {
-	char __iomem *nandaddr = this->IO_ADDR_W;
+	char __iomem *nandaddr = this->legacy.IO_ADDR_W;
 
 	if (cmd == NAND_CMD_NONE)
 		return;
diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c
index f043615c1a99..97dc74f7f41a 100644
--- a/arch/mips/alchemy/devboards/db1200.c
+++ b/arch/mips/alchemy/devboards/db1200.c
@@ -200,7 +200,7 @@ static struct i2c_board_info db1200_i2c_devs[] __initdata = {
 static void au1200_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 				 unsigned int ctrl)
 {
-	unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
+	unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W;
 
 	ioaddr &= 0xffffff00;
 
@@ -212,9 +212,9 @@ static void au1200_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 		/* assume we want to r/w real data  by default */
 		ioaddr += MEM_STNAND_DATA;
 	}
-	this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr;
+	this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr;
 	if (cmd != NAND_CMD_NONE) {
-		__raw_writeb(cmd, this->IO_ADDR_W);
+		__raw_writeb(cmd, this->legacy.IO_ADDR_W);
 		wmb();
 	}
 }
diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c
index 1201fa655e78..b813dc1c1682 100644
--- a/arch/mips/alchemy/devboards/db1300.c
+++ b/arch/mips/alchemy/devboards/db1300.c
@@ -152,7 +152,7 @@ static void __init db1300_gpio_config(void)
 static void au1300_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 				 unsigned int ctrl)
 {
-	unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
+	unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W;
 
 	ioaddr &= 0xffffff00;
 
@@ -164,9 +164,9 @@ static void au1300_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 		/* assume we want to r/w real data  by default */
 		ioaddr += MEM_STNAND_DATA;
 	}
-	this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr;
+	this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr;
 	if (cmd != NAND_CMD_NONE) {
-		__raw_writeb(cmd, this->IO_ADDR_W);
+		__raw_writeb(cmd, this->legacy.IO_ADDR_W);
 		wmb();
 	}
 }
diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c
index cae39cde5de6..65f6b7184fbe 100644
--- a/arch/mips/alchemy/devboards/db1550.c
+++ b/arch/mips/alchemy/devboards/db1550.c
@@ -129,7 +129,7 @@ static struct i2c_board_info db1550_i2c_devs[] __initdata = {
 static void au1550_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 				 unsigned int ctrl)
 {
-	unsigned long ioaddr = (unsigned long)this->IO_ADDR_W;
+	unsigned long ioaddr = (unsigned long)this->legacy.IO_ADDR_W;
 
 	ioaddr &= 0xffffff00;
 
@@ -141,9 +141,9 @@ static void au1550_nand_cmd_ctrl(struct nand_chip *this, int cmd,
 		/* assume we want to r/w real data  by default */
 		ioaddr += MEM_STNAND_DATA;
 	}
-	this->IO_ADDR_R = this->IO_ADDR_W = (void __iomem *)ioaddr;
+	this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = (void __iomem *)ioaddr;
 	if (cmd != NAND_CMD_NONE) {
-		__raw_writeb(cmd, this->IO_ADDR_W);
+		__raw_writeb(cmd, this->legacy.IO_ADDR_W);
 		wmb();
 	}
 }
diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c
index ca8a2889431e..33d0f070b33d 100644
--- a/arch/mips/pnx833x/common/platform.c
+++ b/arch/mips/pnx833x/common/platform.c
@@ -180,7 +180,7 @@ static struct platform_device pnx833x_sata_device = {
 static void
 pnx833x_flash_nand_cmd_ctrl(struct nand_chip *this, int cmd, unsigned int ctrl)
 {
-	unsigned long nandaddr = (unsigned long)this->IO_ADDR_W;
+	unsigned long nandaddr = (unsigned long)this->legacy.IO_ADDR_W;
 
 	if (cmd == NAND_CMD_NONE)
 		return;
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index 9173949892ed..02a9e042fb44 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -160,7 +160,7 @@ static void rb532_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl)
 		set_latch_u5(orbits, nandbits);
 	}
 	if (cmd != NAND_CMD_NONE)
-		writeb(cmd, chip->IO_ADDR_W);
+		writeb(cmd, chip->legacy.IO_ADDR_W);
 }
 
 static struct resource nand_slot0_res[] = {
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index 833f3e49027b..ebcc4d5a67ce 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -172,11 +172,11 @@ static void migor_nand_flash_cmd_ctl(struct nand_chip *chip, int cmd,
 		return;
 
 	if (ctrl & NAND_CLE)
-		writeb(cmd, chip->IO_ADDR_W + 0x00400000);
+		writeb(cmd, chip->legacy.IO_ADDR_W + 0x00400000);
 	else if (ctrl & NAND_ALE)
-		writeb(cmd, chip->IO_ADDR_W + 0x00800000);
+		writeb(cmd, chip->legacy.IO_ADDR_W + 0x00800000);
 	else
-		writeb(cmd, chip->IO_ADDR_W);
+		writeb(cmd, chip->legacy.IO_ADDR_W);
 }
 
 static int migor_nand_flash_ready(struct nand_chip *chip)
diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index 48413203dbc2..5bc8b29faf6d 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -68,7 +68,7 @@ static void ams_delta_write_byte(struct nand_chip *this, u_char byte)
 	void __iomem *io_base = (void __iomem *)nand_get_controller_data(this);
 
 	writew(0, io_base + OMAP_MPUIO_IO_CNTL);
-	writew(byte, this->IO_ADDR_W);
+	writew(byte, this->legacy.IO_ADDR_W);
 	gpio_set_value(AMS_DELTA_GPIO_PIN_NAND_NWE, 0);
 	ndelay(40);
 	gpio_set_value(AMS_DELTA_GPIO_PIN_NAND_NWE, 1);
@@ -82,7 +82,7 @@ static u_char ams_delta_read_byte(struct nand_chip *this)
 	gpio_set_value(AMS_DELTA_GPIO_PIN_NAND_NRE, 0);
 	ndelay(40);
 	writew(~0, io_base + OMAP_MPUIO_IO_CNTL);
-	res = readw(this->IO_ADDR_R);
+	res = readw(this->legacy.IO_ADDR_R);
 	gpio_set_value(AMS_DELTA_GPIO_PIN_NAND_NRE, 1);
 
 	return res;
@@ -208,8 +208,8 @@ static int ams_delta_init(struct platform_device *pdev)
 	nand_set_controller_data(this, (void *)io_base);
 
 	/* Set address of NAND IO lines */
-	this->IO_ADDR_R = io_base + OMAP_MPUIO_INPUT_LATCH;
-	this->IO_ADDR_W = io_base + OMAP_MPUIO_OUTPUT;
+	this->legacy.IO_ADDR_R = io_base + OMAP_MPUIO_INPUT_LATCH;
+	this->legacy.IO_ADDR_W = io_base + OMAP_MPUIO_OUTPUT;
 	this->read_byte = ams_delta_read_byte;
 	this->write_buf = ams_delta_write_buf;
 	this->read_buf = ams_delta_read_buf;
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index d0ec8606e769..b7bb2b2af4ef 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -35,7 +35,7 @@ struct au1550nd_ctx {
  */
 static u_char au_read_byte(struct nand_chip *this)
 {
-	u_char ret = readb(this->IO_ADDR_R);
+	u_char ret = readb(this->legacy.IO_ADDR_R);
 	wmb(); /* drain writebuffer */
 	return ret;
 }
@@ -49,7 +49,7 @@ static u_char au_read_byte(struct nand_chip *this)
  */
 static void au_write_byte(struct nand_chip *this, u_char byte)
 {
-	writeb(byte, this->IO_ADDR_W);
+	writeb(byte, this->legacy.IO_ADDR_W);
 	wmb(); /* drain writebuffer */
 }
 
@@ -61,7 +61,7 @@ static void au_write_byte(struct nand_chip *this, u_char byte)
  */
 static u_char au_read_byte16(struct nand_chip *this)
 {
-	u_char ret = (u_char) cpu_to_le16(readw(this->IO_ADDR_R));
+	u_char ret = (u_char) cpu_to_le16(readw(this->legacy.IO_ADDR_R));
 	wmb(); /* drain writebuffer */
 	return ret;
 }
@@ -75,7 +75,7 @@ static u_char au_read_byte16(struct nand_chip *this)
  */
 static void au_write_byte16(struct nand_chip *this, u_char byte)
 {
-	writew(le16_to_cpu((u16) byte), this->IO_ADDR_W);
+	writew(le16_to_cpu((u16) byte), this->legacy.IO_ADDR_W);
 	wmb(); /* drain writebuffer */
 }
 
@@ -92,7 +92,7 @@ static void au_write_buf(struct nand_chip *this, const u_char *buf, int len)
 	int i;
 
 	for (i = 0; i < len; i++) {
-		writeb(buf[i], this->IO_ADDR_W);
+		writeb(buf[i], this->legacy.IO_ADDR_W);
 		wmb(); /* drain writebuffer */
 	}
 }
@@ -110,7 +110,7 @@ static void au_read_buf(struct nand_chip *this, u_char *buf, int len)
 	int i;
 
 	for (i = 0; i < len; i++) {
-		buf[i] = readb(this->IO_ADDR_R);
+		buf[i] = readb(this->legacy.IO_ADDR_R);
 		wmb(); /* drain writebuffer */
 	}
 }
@@ -130,7 +130,7 @@ static void au_write_buf16(struct nand_chip *this, const u_char *buf, int len)
 	len >>= 1;
 
 	for (i = 0; i < len; i++) {
-		writew(p[i], this->IO_ADDR_W);
+		writew(p[i], this->legacy.IO_ADDR_W);
 		wmb(); /* drain writebuffer */
 	}
 
@@ -152,7 +152,7 @@ static void au_read_buf16(struct mtd_info *mtd, u_char *buf, int len)
 	len >>= 1;
 
 	for (i = 0; i < len; i++) {
-		p[i] = readw(this->IO_ADDR_R);
+		p[i] = readw(this->legacy.IO_ADDR_R);
 		wmb(); /* drain writebuffer */
 	}
 }
@@ -179,19 +179,19 @@ static void au1550_hwcontrol(struct mtd_info *mtd, int cmd)
 	switch (cmd) {
 
 	case NAND_CTL_SETCLE:
-		this->IO_ADDR_W = ctx->base + MEM_STNAND_CMD;
+		this->legacy.IO_ADDR_W = ctx->base + MEM_STNAND_CMD;
 		break;
 
 	case NAND_CTL_CLRCLE:
-		this->IO_ADDR_W = ctx->base + MEM_STNAND_DATA;
+		this->legacy.IO_ADDR_W = ctx->base + MEM_STNAND_DATA;
 		break;
 
 	case NAND_CTL_SETALE:
-		this->IO_ADDR_W = ctx->base + MEM_STNAND_ADDR;
+		this->legacy.IO_ADDR_W = ctx->base + MEM_STNAND_ADDR;
 		break;
 
 	case NAND_CTL_CLRALE:
-		this->IO_ADDR_W = ctx->base + MEM_STNAND_DATA;
+		this->legacy.IO_ADDR_W = ctx->base + MEM_STNAND_DATA;
 		/* FIXME: Nobody knows why this is necessary,
 		 * but it works only that way */
 		udelay(1);
@@ -208,7 +208,7 @@ static void au1550_hwcontrol(struct mtd_info *mtd, int cmd)
 		break;
 	}
 
-	this->IO_ADDR_R = this->IO_ADDR_W;
+	this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W;
 
 	wmb(); /* Drain the writebuffer */
 }
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index e8458036419b..b4ed69815bed 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -51,7 +51,7 @@ static const struct mtd_partition partition_info[] = {
 
 static u_char cmx270_read_byte(struct nand_chip *this)
 {
-	return (readl(this->IO_ADDR_R) >> 16);
+	return (readl(this->legacy.IO_ADDR_R) >> 16);
 }
 
 static void cmx270_write_buf(struct nand_chip *this, const u_char *buf,
@@ -60,7 +60,7 @@ static void cmx270_write_buf(struct nand_chip *this, const u_char *buf,
 	int i;
 
 	for (i=0; i<len; i++)
-		writel((*buf++ << 16), this->IO_ADDR_W);
+		writel((*buf++ << 16), this->legacy.IO_ADDR_W);
 }
 
 static void cmx270_read_buf(struct nand_chip *this, u_char *buf, int len)
@@ -68,7 +68,7 @@ static void cmx270_read_buf(struct nand_chip *this, u_char *buf, int len)
 	int i;
 
 	for (i=0; i<len; i++)
-		*buf++ = readl(this->IO_ADDR_R) >> 16;
+		*buf++ = readl(this->legacy.IO_ADDR_R) >> 16;
 }
 
 static inline void nand_cs_on(void)
@@ -89,7 +89,7 @@ static void nand_cs_off(void)
 static void cmx270_hwcontrol(struct nand_chip *this, int dat,
 			     unsigned int ctrl)
 {
-	unsigned int nandaddr = (unsigned int)this->IO_ADDR_W;
+	unsigned int nandaddr = (unsigned int)this->legacy.IO_ADDR_W;
 
 	dsb();
 
@@ -109,9 +109,9 @@ static void cmx270_hwcontrol(struct nand_chip *this, int dat,
 	}
 
 	dsb();
-	this->IO_ADDR_W = (void __iomem*)nandaddr;
+	this->legacy.IO_ADDR_W = (void __iomem*)nandaddr;
 	if (dat != NAND_CMD_NONE)
-		writel((dat << 16), this->IO_ADDR_W);
+		writel((dat << 16), this->legacy.IO_ADDR_W);
 
 	dsb();
 }
@@ -173,8 +173,8 @@ static int __init cmx270_init(void)
 	cmx270_nand_mtd->owner = THIS_MODULE;
 
 	/* insert callbacks */
-	this->IO_ADDR_R = cmx270_nand_io;
-	this->IO_ADDR_W = cmx270_nand_io;
+	this->legacy.IO_ADDR_R = cmx270_nand_io;
+	this->legacy.IO_ADDR_W = cmx270_nand_io;
 	this->cmd_ctrl = cmx270_hwcontrol;
 	this->dev_ready = cmx270_device_ready;
 
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index c1628c03282a..b03fb36e9e69 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -96,43 +96,43 @@
 static void cs553x_read_buf(struct nand_chip *this, u_char *buf, int len)
 {
 	while (unlikely(len > 0x800)) {
-		memcpy_fromio(buf, this->IO_ADDR_R, 0x800);
+		memcpy_fromio(buf, this->legacy.IO_ADDR_R, 0x800);
 		buf += 0x800;
 		len -= 0x800;
 	}
-	memcpy_fromio(buf, this->IO_ADDR_R, len);
+	memcpy_fromio(buf, this->legacy.IO_ADDR_R, len);
 }
 
 static void cs553x_write_buf(struct nand_chip *this, const u_char *buf, int len)
 {
 	while (unlikely(len > 0x800)) {
-		memcpy_toio(this->IO_ADDR_R, buf, 0x800);
+		memcpy_toio(this->legacy.IO_ADDR_R, buf, 0x800);
 		buf += 0x800;
 		len -= 0x800;
 	}
-	memcpy_toio(this->IO_ADDR_R, buf, len);
+	memcpy_toio(this->legacy.IO_ADDR_R, buf, len);
 }
 
 static unsigned char cs553x_read_byte(struct nand_chip *this)
 {
-	return readb(this->IO_ADDR_R);
+	return readb(this->legacy.IO_ADDR_R);
 }
 
 static void cs553x_write_byte(struct nand_chip *this, u_char byte)
 {
 	int i = 100000;
 
-	while (i && readb(this->IO_ADDR_R + MM_NAND_STS) & CS_NAND_CTLR_BUSY) {
+	while (i && readb(this->legacy.IO_ADDR_R + MM_NAND_STS) & CS_NAND_CTLR_BUSY) {
 		udelay(1);
 		i--;
 	}
-	writeb(byte, this->IO_ADDR_W + 0x801);
+	writeb(byte, this->legacy.IO_ADDR_W + 0x801);
 }
 
 static void cs553x_hwcontrol(struct nand_chip *this, int cmd,
 			     unsigned int ctrl)
 {
-	void __iomem *mmio_base = this->IO_ADDR_R;
+	void __iomem *mmio_base = this->legacy.IO_ADDR_R;
 	if (ctrl & NAND_CTRL_CHANGE) {
 		unsigned char ctl = (ctrl & ~NAND_CTRL_CHANGE ) ^ 0x01;
 		writeb(ctl, mmio_base + MM_NAND_CTL);
@@ -143,7 +143,7 @@ static void cs553x_hwcontrol(struct nand_chip *this, int cmd,
 
 static int cs553x_device_ready(struct nand_chip *this)
 {
-	void __iomem *mmio_base = this->IO_ADDR_R;
+	void __iomem *mmio_base = this->legacy.IO_ADDR_R;
 	unsigned char foo = readb(mmio_base + MM_NAND_STS);
 
 	return (foo & CS_NAND_STS_FLASH_RDY) && !(foo & CS_NAND_CTLR_BUSY);
@@ -151,7 +151,7 @@ static int cs553x_device_ready(struct nand_chip *this)
 
 static void cs_enable_hwecc(struct nand_chip *this, int mode)
 {
-	void __iomem *mmio_base = this->IO_ADDR_R;
+	void __iomem *mmio_base = this->legacy.IO_ADDR_R;
 
 	writeb(0x07, mmio_base + MM_NAND_ECC_CTL);
 }
@@ -160,7 +160,7 @@ static int cs_calculate_ecc(struct nand_chip *this, const u_char *dat,
 			    u_char *ecc_code)
 {
 	uint32_t ecc;
-	void __iomem *mmio_base = this->IO_ADDR_R;
+	void __iomem *mmio_base = this->legacy.IO_ADDR_R;
 
 	ecc = readl(mmio_base + MM_NAND_STS);
 
@@ -199,8 +199,8 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
 	new_mtd->owner = THIS_MODULE;
 
 	/* map physical address */
-	this->IO_ADDR_R = this->IO_ADDR_W = ioremap(adr, 4096);
-	if (!this->IO_ADDR_R) {
+	this->legacy.IO_ADDR_R = this->legacy.IO_ADDR_W = ioremap(adr, 4096);
+	if (!this->legacy.IO_ADDR_R) {
 		pr_warn("ioremap cs553x NAND @0x%08lx failed\n", adr);
 		err = -EIO;
 		goto out_mtd;
@@ -242,7 +242,7 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
 out_free:
 	kfree(new_mtd->name);
 out_ior:
-	iounmap(this->IO_ADDR_R);
+	iounmap(this->legacy.IO_ADDR_R);
 out_mtd:
 	kfree(this);
 out:
@@ -324,7 +324,7 @@ static void __exit cs553x_cleanup(void)
 			continue;
 
 		this = mtd_to_nand(mtd);
-		mmio_base = this->IO_ADDR_R;
+		mmio_base = this->legacy.IO_ADDR_R;
 
 		/* Release resources, unregister device */
 		nand_release(this);
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 4b261c73b240..1204b5120176 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -110,11 +110,11 @@ static void nand_davinci_hwcontrol(struct nand_chip *nand, int cmd,
 		else if ((ctrl & NAND_CTRL_ALE) == NAND_CTRL_ALE)
 			addr += info->mask_ale;
 
-		nand->IO_ADDR_W = addr;
+		nand->legacy.IO_ADDR_W = addr;
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		iowrite8(cmd, nand->IO_ADDR_W);
+		iowrite8(cmd, nand->legacy.IO_ADDR_W);
 }
 
 static void nand_davinci_select_chip(struct nand_chip *nand, int chip)
@@ -127,8 +127,8 @@ static void nand_davinci_select_chip(struct nand_chip *nand, int chip)
 	if (chip > 0)
 		info->current_cs += info->mask_chipsel;
 
-	info->chip.IO_ADDR_W = info->current_cs;
-	info->chip.IO_ADDR_R = info->chip.IO_ADDR_W;
+	info->chip.legacy.IO_ADDR_W = info->current_cs;
+	info->chip.legacy.IO_ADDR_R = info->chip.legacy.IO_ADDR_W;
 }
 
 /*----------------------------------------------------------------------*/
@@ -438,22 +438,22 @@ static void nand_davinci_read_buf(struct nand_chip *chip, uint8_t *buf,
 				  int len)
 {
 	if ((0x03 & ((uintptr_t)buf)) == 0 && (0x03 & len) == 0)
-		ioread32_rep(chip->IO_ADDR_R, buf, len >> 2);
+		ioread32_rep(chip->legacy.IO_ADDR_R, buf, len >> 2);
 	else if ((0x01 & ((uintptr_t)buf)) == 0 && (0x01 & len) == 0)
-		ioread16_rep(chip->IO_ADDR_R, buf, len >> 1);
+		ioread16_rep(chip->legacy.IO_ADDR_R, buf, len >> 1);
 	else
-		ioread8_rep(chip->IO_ADDR_R, buf, len);
+		ioread8_rep(chip->legacy.IO_ADDR_R, buf, len);
 }
 
 static void nand_davinci_write_buf(struct nand_chip *chip, const uint8_t *buf,
 				   int len)
 {
 	if ((0x03 & ((uintptr_t)buf)) == 0 && (0x03 & len) == 0)
-		iowrite32_rep(chip->IO_ADDR_R, buf, len >> 2);
+		iowrite32_rep(chip->legacy.IO_ADDR_R, buf, len >> 2);
 	else if ((0x01 & ((uintptr_t)buf)) == 0 && (0x01 & len) == 0)
-		iowrite16_rep(chip->IO_ADDR_R, buf, len >> 1);
+		iowrite16_rep(chip->legacy.IO_ADDR_R, buf, len >> 1);
 	else
-		iowrite8_rep(chip->IO_ADDR_R, buf, len);
+		iowrite8_rep(chip->legacy.IO_ADDR_R, buf, len);
 }
 
 /*
@@ -759,8 +759,8 @@ static int nand_davinci_probe(struct platform_device *pdev)
 	mtd->dev.parent		= &pdev->dev;
 	nand_set_flash_node(&info->chip, pdev->dev.of_node);
 
-	info->chip.IO_ADDR_R	= vaddr;
-	info->chip.IO_ADDR_W	= vaddr;
+	info->chip.legacy.IO_ADDR_R	= vaddr;
+	info->chip.legacy.IO_ADDR_W	= vaddr;
 	info->chip.chip_delay	= 0;
 	info->chip.select_chip	= nand_davinci_select_chip;
 
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index 48c5215f9a0e..f59fd57fc529 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -101,7 +101,7 @@ static void fun_cmd_ctrl(struct nand_chip *chip, int cmd, unsigned int ctrl)
 
 	mar = (cmd << (32 - fun->upm.width)) |
 		fun->mchip_offsets[fun->mchip_number];
-	fsl_upm_run_pattern(&fun->upm, chip->IO_ADDR_R, mar);
+	fsl_upm_run_pattern(&fun->upm, chip->legacy.IO_ADDR_R, mar);
 
 	if (fun->wait_flags & FSL_UPM_WAIT_RUN_PATTERN)
 		fun_wait_rnb(fun);
@@ -115,8 +115,8 @@ static void fun_select_chip(struct nand_chip *chip, int mchip_nr)
 		chip->cmd_ctrl(chip, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
 	} else if (mchip_nr >= 0 && mchip_nr < NAND_MAX_CHIPS) {
 		fun->mchip_number = mchip_nr;
-		chip->IO_ADDR_R = fun->io_base + fun->mchip_offsets[mchip_nr];
-		chip->IO_ADDR_W = chip->IO_ADDR_R;
+		chip->legacy.IO_ADDR_R = fun->io_base + fun->mchip_offsets[mchip_nr];
+		chip->legacy.IO_ADDR_W = chip->legacy.IO_ADDR_R;
 	} else {
 		BUG();
 	}
@@ -126,7 +126,7 @@ static uint8_t fun_read_byte(struct nand_chip *chip)
 {
 	struct fsl_upm_nand *fun = to_fsl_upm_nand(nand_to_mtd(chip));
 
-	return in_8(fun->chip.IO_ADDR_R);
+	return in_8(fun->chip.legacy.IO_ADDR_R);
 }
 
 static void fun_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
@@ -135,7 +135,7 @@ static void fun_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 	int i;
 
 	for (i = 0; i < len; i++)
-		buf[i] = in_8(fun->chip.IO_ADDR_R);
+		buf[i] = in_8(fun->chip.legacy.IO_ADDR_R);
 }
 
 static void fun_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
@@ -144,7 +144,7 @@ static void fun_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 	int i;
 
 	for (i = 0; i < len; i++) {
-		out_8(fun->chip.IO_ADDR_W, buf[i]);
+		out_8(fun->chip.legacy.IO_ADDR_W, buf[i]);
 		if (fun->wait_flags & FSL_UPM_WAIT_WRITE_BYTE)
 			fun_wait_rnb(fun);
 	}
@@ -160,8 +160,8 @@ static int fun_chip_init(struct fsl_upm_nand *fun,
 	int ret;
 	struct device_node *flash_np;
 
-	fun->chip.IO_ADDR_R = fun->io_base;
-	fun->chip.IO_ADDR_W = fun->io_base;
+	fun->chip.legacy.IO_ADDR_R = fun->io_base;
+	fun->chip.legacy.IO_ADDR_W = fun->io_base;
 	fun->chip.cmd_ctrl = fun_cmd_ctrl;
 	fun->chip.chip_delay = fun->chip_delay;
 	fun->chip.read_byte = fun_read_byte;
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 273437c1ae6c..bb43fad65362 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -90,7 +90,7 @@ static void gpio_nand_cmd_ctrl(struct nand_chip *chip, int cmd,
 	if (cmd == NAND_CMD_NONE)
 		return;
 
-	writeb(cmd, gpiomtd->nand_chip.IO_ADDR_W);
+	writeb(cmd, gpiomtd->nand_chip.legacy.IO_ADDR_W);
 	gpio_nand_dosync(gpiomtd);
 }
 
@@ -225,9 +225,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
 	chip = &gpiomtd->nand_chip;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	chip->IO_ADDR_R = devm_ioremap_resource(dev, res);
-	if (IS_ERR(chip->IO_ADDR_R))
-		return PTR_ERR(chip->IO_ADDR_R);
+	chip->legacy.IO_ADDR_R = devm_ioremap_resource(dev, res);
+	if (IS_ERR(chip->legacy.IO_ADDR_R))
+		return PTR_ERR(chip->legacy.IO_ADDR_R);
 
 	res = gpio_nand_get_io_sync(pdev);
 	if (res) {
@@ -274,7 +274,7 @@ static int gpio_nand_probe(struct platform_device *pdev)
 		chip->dev_ready = gpio_nand_devready;
 
 	nand_set_flash_node(chip, pdev->dev.of_node);
-	chip->IO_ADDR_W		= chip->IO_ADDR_R;
+	chip->legacy.IO_ADDR_W	= chip->legacy.IO_ADDR_R;
 	chip->ecc.mode		= NAND_ECC_SOFT;
 	chip->ecc.algo		= NAND_ECC_HAMMING;
 	chip->options		= gpiomtd->plat.options;
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 946a71cf816d..449180de92e2 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -91,8 +91,8 @@ static void jz_nand_select_chip(struct nand_chip *chip, int chipnr)
 		banknr = -1;
 	} else {
 		banknr = nand->banks[chipnr] - 1;
-		chip->IO_ADDR_R = nand->bank_base[banknr];
-		chip->IO_ADDR_W = nand->bank_base[banknr];
+		chip->legacy.IO_ADDR_R = nand->bank_base[banknr];
+		chip->legacy.IO_ADDR_W = nand->bank_base[banknr];
 	}
 	writel(ctrl, nand->base + JZ_REG_NAND_CTRL);
 
@@ -114,7 +114,7 @@ static void jz_nand_cmd_ctrl(struct nand_chip *chip, int dat,
 			bank_base += JZ_NAND_MEM_ADDR_OFFSET;
 		else if (ctrl & NAND_CLE)
 			bank_base += JZ_NAND_MEM_CMD_OFFSET;
-		chip->IO_ADDR_W = bank_base;
+		chip->legacy.IO_ADDR_W = bank_base;
 
 		reg = readl(nand->base + JZ_REG_NAND_CTRL);
 		if (ctrl & NAND_NCE)
@@ -124,7 +124,7 @@ static void jz_nand_cmd_ctrl(struct nand_chip *chip, int dat,
 		writel(reg, nand->base + JZ_REG_NAND_CTRL);
 	}
 	if (dat != NAND_CMD_NONE)
-		writeb(dat, chip->IO_ADDR_W);
+		writeb(dat, chip->legacy.IO_ADDR_W);
 }
 
 static int jz_nand_dev_ready(struct nand_chip *chip)
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index d54b2774f7f9..89909a17242d 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -275,8 +275,8 @@ static int jz4780_nand_init_chip(struct platform_device *pdev,
 		return -ENOMEM;
 	mtd->dev.parent = dev;
 
-	chip->IO_ADDR_R = cs->base + OFFSET_DATA;
-	chip->IO_ADDR_W = cs->base + OFFSET_DATA;
+	chip->legacy.IO_ADDR_R = cs->base + OFFSET_DATA;
+	chip->legacy.IO_ADDR_W = cs->base + OFFSET_DATA;
 	chip->chip_delay = RB_DELAY_US;
 	chip->options = NAND_NO_SUBPAGE_WRITE;
 	chip->select_chip = jz4780_nand_select_chip;
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index ae31f6ccbeb3..cc1c6e6c59e1 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -742,8 +742,8 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	nand_chip->cmd_ctrl = lpc32xx_nand_cmd_ctrl;
 	nand_chip->dev_ready = lpc32xx_nand_device_ready;
 	nand_chip->chip_delay = 25; /* us */
-	nand_chip->IO_ADDR_R = MLC_DATA(host->io_base);
-	nand_chip->IO_ADDR_W = MLC_DATA(host->io_base);
+	nand_chip->legacy.IO_ADDR_R = MLC_DATA(host->io_base);
+	nand_chip->legacy.IO_ADDR_W = MLC_DATA(host->io_base);
 
 	/* Init NAND controller */
 	lpc32xx_nand_setup(host);
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 26d27a81f814..8a6f109c43af 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -878,8 +878,8 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 		goto enable_wp;
 
 	/* Set NAND IO addresses and command/ready functions */
-	chip->IO_ADDR_R = SLC_DATA(host->io_base);
-	chip->IO_ADDR_W = SLC_DATA(host->io_base);
+	chip->legacy.IO_ADDR_R = SLC_DATA(host->io_base);
+	chip->legacy.IO_ADDR_W = SLC_DATA(host->io_base);
 	chip->cmd_ctrl = lpc32xx_nand_cmd_ctrl;
 	chip->dev_ready = lpc32xx_nand_device_ready;
 	chip->chip_delay = 20; /* 20us command delay time */
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index e63dfad8235b..1edaa9fdbce9 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -260,7 +260,7 @@ static void nand_release_device(struct mtd_info *mtd)
  */
 static uint8_t nand_read_byte(struct nand_chip *chip)
 {
-	return readb(chip->IO_ADDR_R);
+	return readb(chip->legacy.IO_ADDR_R);
 }
 
 /**
@@ -272,7 +272,7 @@ static uint8_t nand_read_byte(struct nand_chip *chip)
  */
 static uint8_t nand_read_byte16(struct nand_chip *chip)
 {
-	return (uint8_t) cpu_to_le16(readw(chip->IO_ADDR_R));
+	return (uint8_t) cpu_to_le16(readw(chip->legacy.IO_ADDR_R));
 }
 
 /**
@@ -348,7 +348,7 @@ static void nand_write_byte16(struct nand_chip *chip, uint8_t byte)
  */
 static void nand_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
 {
-	iowrite8_rep(chip->IO_ADDR_W, buf, len);
+	iowrite8_rep(chip->legacy.IO_ADDR_W, buf, len);
 }
 
 /**
@@ -361,7 +361,7 @@ static void nand_write_buf(struct nand_chip *chip, const uint8_t *buf, int len)
  */
 static void nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	ioread8_rep(chip->IO_ADDR_R, buf, len);
+	ioread8_rep(chip->legacy.IO_ADDR_R, buf, len);
 }
 
 /**
@@ -377,7 +377,7 @@ static void nand_write_buf16(struct nand_chip *chip, const uint8_t *buf,
 {
 	u16 *p = (u16 *) buf;
 
-	iowrite16_rep(chip->IO_ADDR_W, p, len >> 1);
+	iowrite16_rep(chip->legacy.IO_ADDR_W, p, len >> 1);
 }
 
 /**
@@ -392,7 +392,7 @@ static void nand_read_buf16(struct nand_chip *chip, uint8_t *buf, int len)
 {
 	u16 *p = (u16 *) buf;
 
-	ioread16_rep(chip->IO_ADDR_R, p, len >> 1);
+	ioread16_rep(chip->legacy.IO_ADDR_R, p, len >> 1);
 }
 
 /**
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index b96070a3afff..adc4060c65ad 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -142,8 +142,8 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int ret;
 
-	chip->IO_ADDR_R = ndfc->ndfcbase + NDFC_DATA;
-	chip->IO_ADDR_W = ndfc->ndfcbase + NDFC_DATA;
+	chip->legacy.IO_ADDR_R = ndfc->ndfcbase + NDFC_DATA;
+	chip->legacy.IO_ADDR_W = ndfc->ndfcbase + NDFC_DATA;
 	chip->cmd_ctrl = ndfc_hwcontrol;
 	chip->dev_ready = ndfc_ready;
 	chip->select_chip = ndfc_select_chip;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 6f0fec3596cc..627048886c95 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -275,7 +275,7 @@ static void omap_read_buf8(struct mtd_info *mtd, u_char *buf, int len)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 
-	ioread8_rep(nand->IO_ADDR_R, buf, len);
+	ioread8_rep(nand->legacy.IO_ADDR_R, buf, len);
 }
 
 /**
@@ -291,7 +291,7 @@ static void omap_write_buf8(struct mtd_info *mtd, const u_char *buf, int len)
 	bool status;
 
 	while (len--) {
-		iowrite8(*p++, info->nand.IO_ADDR_W);
+		iowrite8(*p++, info->nand.legacy.IO_ADDR_W);
 		/* wait until buffer is available for write */
 		do {
 			status = info->ops->nand_writebuffer_empty();
@@ -309,7 +309,7 @@ static void omap_read_buf16(struct mtd_info *mtd, u_char *buf, int len)
 {
 	struct nand_chip *nand = mtd_to_nand(mtd);
 
-	ioread16_rep(nand->IO_ADDR_R, buf, len / 2);
+	ioread16_rep(nand->legacy.IO_ADDR_R, buf, len / 2);
 }
 
 /**
@@ -327,7 +327,7 @@ static void omap_write_buf16(struct mtd_info *mtd, const u_char * buf, int len)
 	len >>= 1;
 
 	while (len--) {
-		iowrite16(*p++, info->nand.IO_ADDR_W);
+		iowrite16(*p++, info->nand.legacy.IO_ADDR_W);
 		/* wait until buffer is available for write */
 		do {
 			status = info->ops->nand_writebuffer_empty();
@@ -373,7 +373,7 @@ static void omap_read_buf_pref(struct nand_chip *chip, u_char *buf, int len)
 			r_count = readl(info->reg.gpmc_prefetch_status);
 			r_count = PREFETCH_STATUS_FIFO_CNT(r_count);
 			r_count = r_count >> 2;
-			ioread32_rep(info->nand.IO_ADDR_R, p, r_count);
+			ioread32_rep(info->nand.legacy.IO_ADDR_R, p, r_count);
 			p += r_count;
 			len -= r_count << 2;
 		} while (len);
@@ -401,7 +401,7 @@ static void omap_write_buf_pref(struct nand_chip *chip, const u_char *buf,
 
 	/* take care of subpage writes */
 	if (len % 2 != 0) {
-		writeb(*buf, info->nand.IO_ADDR_W);
+		writeb(*buf, info->nand.legacy.IO_ADDR_W);
 		p = (u16 *)(buf + 1);
 		len--;
 	}
@@ -421,7 +421,7 @@ static void omap_write_buf_pref(struct nand_chip *chip, const u_char *buf,
 			w_count = PREFETCH_STATUS_FIFO_CNT(w_count);
 			w_count = w_count >> 1;
 			for (i = 0; (i < w_count) && len; i++, len -= 2)
-				iowrite16(*p++, info->nand.IO_ADDR_W);
+				iowrite16(*p++, info->nand.legacy.IO_ADDR_W);
 		}
 		/* wait for data to flushed-out before reset the prefetch */
 		tim = 0;
@@ -585,14 +585,14 @@ static irqreturn_t omap_nand_irq(int this_irq, void *dev)
 			bytes = info->buf_len;
 		else if (!info->buf_len)
 			bytes = 0;
-		iowrite32_rep(info->nand.IO_ADDR_W,
-						(u32 *)info->buf, bytes >> 2);
+		iowrite32_rep(info->nand.legacy.IO_ADDR_W, (u32 *)info->buf,
+			      bytes >> 2);
 		info->buf = info->buf + bytes;
 		info->buf_len -= bytes;
 
 	} else {
-		ioread32_rep(info->nand.IO_ADDR_R,
-						(u32 *)info->buf, bytes >> 2);
+		ioread32_rep(info->nand.legacy.IO_ADDR_R, (u32 *)info->buf,
+			     bytes >> 2);
 		info->buf = info->buf + bytes;
 
 		if (this_irq == info->gpmc_irq_count)
@@ -2221,15 +2221,15 @@ static int omap_nand_probe(struct platform_device *pdev)
 	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	nand_chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(nand_chip->IO_ADDR_R))
-		return PTR_ERR(nand_chip->IO_ADDR_R);
+	nand_chip->legacy.IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(nand_chip->legacy.IO_ADDR_R))
+		return PTR_ERR(nand_chip->legacy.IO_ADDR_R);
 
 	info->phys_base = res->start;
 
 	nand_chip->controller = &omap_gpmc_controller;
 
-	nand_chip->IO_ADDR_W = nand_chip->IO_ADDR_R;
+	nand_chip->legacy.IO_ADDR_W = nand_chip->legacy.IO_ADDR_R;
 	nand_chip->cmd_ctrl  = omap_hwcontrol;
 
 	info->ready_gpiod = devm_gpiod_get_optional(&pdev->dev, "rb",
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index 92d8f249ee97..d73e3c7a3f3a 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -45,12 +45,12 @@ static void orion_nand_cmd_ctrl(struct nand_chip *nc, int cmd,
 	if (nc->options & NAND_BUSWIDTH_16)
 		offs <<= 1;
 
-	writeb(cmd, nc->IO_ADDR_W + offs);
+	writeb(cmd, nc->legacy.IO_ADDR_W + offs);
 }
 
 static void orion_nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len)
 {
-	void __iomem *io_base = chip->IO_ADDR_R;
+	void __iomem *io_base = chip->legacy.IO_ADDR_R;
 #if defined(__LINUX_ARM_ARCH__) && __LINUX_ARM_ARCH__ >= 5
 	uint64_t *buf64;
 #endif
@@ -136,7 +136,7 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 
 	nand_set_controller_data(nc, board);
 	nand_set_flash_node(nc, pdev->dev.of_node);
-	nc->IO_ADDR_R = nc->IO_ADDR_W = io_base;
+	nc->legacy.IO_ADDR_R = nc->legacy.IO_ADDR_W = io_base;
 	nc->cmd_ctrl = orion_nand_cmd_ctrl;
 	nc->read_buf = orion_nand_read_buf;
 	nc->ecc.mode = NAND_ECC_SOFT;
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index a1e3bf7a276b..1b367bf9ef53 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -46,22 +46,22 @@ static const char driver_name[] = "pasemi-nand";
 static void pasemi_read_buf(struct nand_chip *chip, u_char *buf, int len)
 {
 	while (len > 0x800) {
-		memcpy_fromio(buf, chip->IO_ADDR_R, 0x800);
+		memcpy_fromio(buf, chip->legacy.IO_ADDR_R, 0x800);
 		buf += 0x800;
 		len -= 0x800;
 	}
-	memcpy_fromio(buf, chip->IO_ADDR_R, len);
+	memcpy_fromio(buf, chip->legacy.IO_ADDR_R, len);
 }
 
 static void pasemi_write_buf(struct nand_chip *chip, const u_char *buf,
 			     int len)
 {
 	while (len > 0x800) {
-		memcpy_toio(chip->IO_ADDR_R, buf, 0x800);
+		memcpy_toio(chip->legacy.IO_ADDR_R, buf, 0x800);
 		buf += 0x800;
 		len -= 0x800;
 	}
-	memcpy_toio(chip->IO_ADDR_R, buf, len);
+	memcpy_toio(chip->legacy.IO_ADDR_R, buf, len);
 }
 
 static void pasemi_hwcontrol(struct nand_chip *chip, int cmd,
@@ -71,9 +71,9 @@ static void pasemi_hwcontrol(struct nand_chip *chip, int cmd,
 		return;
 
 	if (ctrl & NAND_CLE)
-		out_8(chip->IO_ADDR_W + (1 << CLE_PIN_CTL), cmd);
+		out_8(chip->legacy.IO_ADDR_W + (1 << CLE_PIN_CTL), cmd);
 	else
-		out_8(chip->IO_ADDR_W + (1 << ALE_PIN_CTL), cmd);
+		out_8(chip->legacy.IO_ADDR_W + (1 << ALE_PIN_CTL), cmd);
 
 	/* Push out posted writes */
 	eieio();
@@ -117,10 +117,10 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
 	/* Link the private data with the MTD structure */
 	pasemi_nand_mtd->dev.parent = dev;
 
-	chip->IO_ADDR_R = of_iomap(np, 0);
-	chip->IO_ADDR_W = chip->IO_ADDR_R;
+	chip->legacy.IO_ADDR_R = of_iomap(np, 0);
+	chip->legacy.IO_ADDR_W = chip->legacy.IO_ADDR_R;
 
-	if (!chip->IO_ADDR_R) {
+	if (!chip->legacy.IO_ADDR_R) {
 		err = -EIO;
 		goto out_mtd;
 	}
@@ -169,7 +169,7 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
  out_lpc:
 	release_region(lpcctl, 4);
  out_ior:
-	iounmap(chip->IO_ADDR_R);
+	iounmap(chip->legacy.IO_ADDR_R);
  out_mtd:
 	kfree(chip);
  out:
@@ -190,7 +190,7 @@ static int pasemi_nand_remove(struct platform_device *ofdev)
 
 	release_region(lpcctl, 4);
 
-	iounmap(chip->IO_ADDR_R);
+	iounmap(chip->legacy.IO_ADDR_R);
 
 	/* Free the MTD device structure */
 	kfree(chip);
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index d65e4084dea4..c06347531d26 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -60,8 +60,8 @@ static int plat_nand_probe(struct platform_device *pdev)
 	mtd = nand_to_mtd(&data->chip);
 	mtd->dev.parent = &pdev->dev;
 
-	data->chip.IO_ADDR_R = data->io_base;
-	data->chip.IO_ADDR_W = data->io_base;
+	data->chip.legacy.IO_ADDR_R = data->io_base;
+	data->chip.legacy.IO_ADDR_W = data->io_base;
 	data->chip.cmd_ctrl = pdata->ctrl.cmd_ctrl;
 	data->chip.dev_ready = pdata->ctrl.dev_ready;
 	data->chip.select_chip = pdata->ctrl.select_chip;
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 1f70eb35320b..473abf10eeec 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -681,7 +681,7 @@ static int s3c2440_nand_calculate_ecc(struct nand_chip *chip,
 
 static void s3c2410_nand_read_buf(struct nand_chip *this, u_char *buf, int len)
 {
-	readsb(this->IO_ADDR_R, buf, len);
+	readsb(this->legacy.IO_ADDR_R, buf, len);
 }
 
 static void s3c2440_nand_read_buf(struct nand_chip *this, u_char *buf, int len)
@@ -703,7 +703,7 @@ static void s3c2440_nand_read_buf(struct nand_chip *this, u_char *buf, int len)
 static void s3c2410_nand_write_buf(struct nand_chip *this, const u_char *buf,
 				   int len)
 {
-	writesb(this->IO_ADDR_W, buf, len);
+	writesb(this->legacy.IO_ADDR_W, buf, len);
 }
 
 static void s3c2440_nand_write_buf(struct nand_chip *this, const u_char *buf,
@@ -881,7 +881,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 
 	switch (info->cpu_type) {
 	case TYPE_S3C2410:
-		chip->IO_ADDR_W = regs + S3C2410_NFDATA;
+		chip->legacy.IO_ADDR_W = regs + S3C2410_NFDATA;
 		info->sel_reg   = regs + S3C2410_NFCONF;
 		info->sel_bit	= S3C2410_NFCONF_nFCE;
 		chip->cmd_ctrl  = s3c2410_nand_hwcontrol;
@@ -889,7 +889,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		break;
 
 	case TYPE_S3C2440:
-		chip->IO_ADDR_W = regs + S3C2440_NFDATA;
+		chip->legacy.IO_ADDR_W = regs + S3C2440_NFDATA;
 		info->sel_reg   = regs + S3C2440_NFCONT;
 		info->sel_bit	= S3C2440_NFCONT_nFCE;
 		chip->cmd_ctrl  = s3c2440_nand_hwcontrol;
@@ -899,7 +899,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		break;
 
 	case TYPE_S3C2412:
-		chip->IO_ADDR_W = regs + S3C2440_NFDATA;
+		chip->legacy.IO_ADDR_W = regs + S3C2440_NFDATA;
 		info->sel_reg   = regs + S3C2440_NFCONT;
 		info->sel_bit	= S3C2412_NFCONT_nFCE0;
 		chip->cmd_ctrl  = s3c2440_nand_hwcontrol;
@@ -911,7 +911,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		break;
 	}
 
-	chip->IO_ADDR_R = chip->IO_ADDR_W;
+	chip->legacy.IO_ADDR_R = chip->legacy.IO_ADDR_W;
 
 	nmtd->info	   = info;
 	nmtd->set	   = set;
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 31abbe33798e..d9cdd11fbd3a 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -75,7 +75,7 @@ static void sharpsl_nand_hwcontrol(struct nand_chip *chip, int cmd,
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		writeb(cmd, chip->IO_ADDR_W);
+		writeb(cmd, chip->legacy.IO_ADDR_W);
 }
 
 static int sharpsl_nand_dev_ready(struct nand_chip *chip)
@@ -153,8 +153,8 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
 	writeb(readb(sharpsl->io + FLASHCTL) | FLWP, sharpsl->io + FLASHCTL);
 
 	/* Set address of NAND IO lines */
-	this->IO_ADDR_R = sharpsl->io + FLASHIO;
-	this->IO_ADDR_W = sharpsl->io + FLASHIO;
+	this->legacy.IO_ADDR_R = sharpsl->io + FLASHIO;
+	this->legacy.IO_ADDR_W = sharpsl->io + FLASHIO;
 	/* Set address of hardware control function */
 	this->cmd_ctrl = sharpsl_nand_hwcontrol;
 	this->dev_ready = sharpsl_nand_dev_ready;
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index f44621672779..0b23587bf47c 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -155,7 +155,7 @@ static void tmio_nand_hwcontrol(struct nand_chip *chip, int cmd,
 	}
 
 	if (cmd != NAND_CMD_NONE)
-		tmio_iowrite8(cmd, chip->IO_ADDR_W);
+		tmio_iowrite8(cmd, chip->legacy.IO_ADDR_W);
 }
 
 static int tmio_nand_dev_ready(struct nand_chip *chip)
@@ -399,8 +399,8 @@ static int tmio_probe(struct platform_device *dev)
 		return retval;
 
 	/* Set address of NAND IO lines */
-	nand_chip->IO_ADDR_R = tmio->fcr;
-	nand_chip->IO_ADDR_W = tmio->fcr;
+	nand_chip->legacy.IO_ADDR_R = tmio->fcr;
+	nand_chip->legacy.IO_ADDR_W = tmio->fcr;
 
 	/* Set address of hardware control function */
 	nand_chip->cmd_ctrl = tmio_nand_hwcontrol;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index e3a96ee7e531..6b1dc8fef89d 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1172,13 +1172,27 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
 			   const struct nand_op_parser *parser,
 			   const struct nand_operation *op, bool check_only);
 
+/**
+ * struct nand_legacy - NAND chip legacy fields/hooks
+ * @IO_ADDR_R: address to read the 8 I/O lines of the flash device
+ * @IO_ADDR_W: address to write the 8 I/O lines of the flash device
+ *
+ * If you look at this structure you're already wrong. These fields/hooks are
+ * all deprecated.
+ */
+struct nand_legacy {
+	void __iomem *IO_ADDR_R;
+	void __iomem *IO_ADDR_W;
+};
+
 /**
  * struct nand_chip - NAND Private Flash Chip Data
  * @mtd:		MTD device registered to the MTD framework
- * @IO_ADDR_R:		[BOARDSPECIFIC] address to read the 8 I/O lines of the
- *			flash device
- * @IO_ADDR_W:		[BOARDSPECIFIC] address to write the 8 I/O lines of the
- *			flash device.
+ * @legacy:		All legacy fields/hooks. If you develop a new driver,
+ *			don't even try to use any of these fields/hooks, and if
+ *			you're modifying an existing driver that is using those
+ *			fields/hooks, you should consider reworking the driver
+ *			avoid using them.
  * @read_byte:		[REPLACEABLE] read one byte from the chip
  * @write_byte:		[REPLACEABLE] write a single byte to the chip on the
  *			low 8 I/O lines
@@ -1280,8 +1294,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
 
 struct nand_chip {
 	struct mtd_info mtd;
-	void __iomem *IO_ADDR_R;
-	void __iomem *IO_ADDR_W;
+
+	struct nand_legacy legacy;
 
 	uint8_t (*read_byte)(struct nand_chip *chip);
 	void (*write_byte)(struct nand_chip *chip, uint8_t byte);
-- 
cgit v1.2.3


From 716bbbabcc68c2b0e1b805d369c0bd58f4fdea30 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:35 +0200
Subject: mtd: rawnand: Deprecate ->{read, write}_{byte, buf}() hooks

All those hooks have been replaced by ->exec_op(). Move them to the
nand_legacy struct.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/ams-delta.c                 |  6 +--
 drivers/mtd/nand/raw/atmel/nand-controller.c     |  8 ++--
 drivers/mtd/nand/raw/au1550nd.c                  |  6 +--
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  6 +--
 drivers/mtd/nand/raw/brcmnand/brcmnand.c         |  6 +--
 drivers/mtd/nand/raw/cafe_nand.c                 | 10 ++---
 drivers/mtd/nand/raw/cmx270_nand.c               |  6 +--
 drivers/mtd/nand/raw/cs553x_nand.c               |  6 +--
 drivers/mtd/nand/raw/davinci_nand.c              |  4 +-
 drivers/mtd/nand/raw/denali.c                    | 12 +++---
 drivers/mtd/nand/raw/diskonchip.c                | 28 ++++++-------
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |  6 +--
 drivers/mtd/nand/raw/fsl_ifc_nand.c              | 12 +++---
 drivers/mtd/nand/raw/fsl_upm.c                   |  6 +--
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       | 14 +++----
 drivers/mtd/nand/raw/hisi504_nand.c              | 10 ++---
 drivers/mtd/nand/raw/lpc32xx_slc.c               | 16 ++++----
 drivers/mtd/nand/raw/mpc5121_nfc.c               |  6 +--
 drivers/mtd/nand/raw/mtk_nand.c                  |  8 ++--
 drivers/mtd/nand/raw/mxc_nand.c                  | 10 ++---
 drivers/mtd/nand/raw/nand_base.c                 | 50 ++++++++++++------------
 drivers/mtd/nand/raw/nand_hynix.c                |  2 +-
 drivers/mtd/nand/raw/nandsim.c                   |  8 ++--
 drivers/mtd/nand/raw/ndfc.c                      |  4 +-
 drivers/mtd/nand/raw/nuc900_nand.c               |  6 +--
 drivers/mtd/nand/raw/omap2.c                     | 22 +++++------
 drivers/mtd/nand/raw/orion_nand.c                |  2 +-
 drivers/mtd/nand/raw/oxnas_nand.c                |  6 +--
 drivers/mtd/nand/raw/pasemi_nand.c               |  4 +-
 drivers/mtd/nand/raw/plat_nand.c                 |  4 +-
 drivers/mtd/nand/raw/qcom_nandc.c                | 17 ++++----
 drivers/mtd/nand/raw/r852.c                      |  6 +--
 drivers/mtd/nand/raw/s3c2410.c                   |  8 ++--
 drivers/mtd/nand/raw/sh_flctl.c                  | 10 ++---
 drivers/mtd/nand/raw/socrates_nand.c             |  6 +--
 drivers/mtd/nand/raw/sunxi_nand.c                |  6 +--
 drivers/mtd/nand/raw/tango_nand.c                |  6 +--
 drivers/mtd/nand/raw/tmio_nand.c                 |  6 +--
 drivers/mtd/nand/raw/txx9ndfmc.c                 |  6 +--
 drivers/mtd/nand/raw/xway_nand.c                 |  6 +--
 drivers/staging/mt29f_spinand/mt29f_spinand.c    |  8 ++--
 include/linux/mtd/rawnand.h                      | 21 +++++-----
 42 files changed, 200 insertions(+), 200 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index 5bc8b29faf6d..6616f473aeb2 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -210,9 +210,9 @@ static int ams_delta_init(struct platform_device *pdev)
 	/* Set address of NAND IO lines */
 	this->legacy.IO_ADDR_R = io_base + OMAP_MPUIO_INPUT_LATCH;
 	this->legacy.IO_ADDR_W = io_base + OMAP_MPUIO_OUTPUT;
-	this->read_byte = ams_delta_read_byte;
-	this->write_buf = ams_delta_write_buf;
-	this->read_buf = ams_delta_read_buf;
+	this->legacy.read_byte = ams_delta_read_byte;
+	this->legacy.write_buf = ams_delta_write_buf;
+	this->legacy.read_buf = ams_delta_read_buf;
 	this->cmd_ctrl = ams_delta_hwcontrol;
 	if (gpio_request(AMS_DELTA_GPIO_PIN_NAND_RB, "nand_rdy") == 0) {
 		this->dev_ready = ams_delta_nand_ready;
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index a38633a67ead..9b2876b5a9c2 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -1473,10 +1473,10 @@ static void atmel_nand_init(struct atmel_nand_controller *nc,
 	nand->base.controller = &nc->base;
 
 	chip->cmd_ctrl = atmel_nand_cmd_ctrl;
-	chip->read_byte = atmel_nand_read_byte;
-	chip->write_byte = atmel_nand_write_byte;
-	chip->read_buf = atmel_nand_read_buf;
-	chip->write_buf = atmel_nand_write_buf;
+	chip->legacy.read_byte = atmel_nand_read_byte;
+	chip->legacy.write_byte = atmel_nand_write_byte;
+	chip->legacy.read_buf = atmel_nand_read_buf;
+	chip->legacy.write_buf = atmel_nand_write_buf;
 	chip->select_chip = atmel_nand_select_chip;
 
 	if (nc->mck && nc->caps->ops->setup_data_interface)
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index b7bb2b2af4ef..0db5dc61b155 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -440,10 +440,10 @@ static int au1550nd_probe(struct platform_device *pdev)
 	if (pd->devwidth)
 		this->options |= NAND_BUSWIDTH_16;
 
-	this->read_byte = (pd->devwidth) ? au_read_byte16 : au_read_byte;
+	this->legacy.read_byte = (pd->devwidth) ? au_read_byte16 : au_read_byte;
 	ctx->write_byte = (pd->devwidth) ? au_write_byte16 : au_write_byte;
-	this->write_buf = (pd->devwidth) ? au_write_buf16 : au_write_buf;
-	this->read_buf = (pd->devwidth) ? au_read_buf16 : au_read_buf;
+	this->legacy.write_buf = (pd->devwidth) ? au_write_buf16 : au_write_buf;
+	this->legacy.read_buf = (pd->devwidth) ? au_read_buf16 : au_read_buf;
 
 	ret = nand_scan(this, 1);
 	if (ret) {
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 59e1b88aae38..ea41d1b95c81 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -387,9 +387,9 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 	nand_chip->cmd_ctrl = bcm47xxnflash_ops_bcm4706_cmd_ctrl;
 	nand_chip->dev_ready = bcm47xxnflash_ops_bcm4706_dev_ready;
 	b47n->nand_chip.cmdfunc = bcm47xxnflash_ops_bcm4706_cmdfunc;
-	b47n->nand_chip.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
-	b47n->nand_chip.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
-	b47n->nand_chip.write_buf = bcm47xxnflash_ops_bcm4706_write_buf;
+	b47n->nand_chip.legacy.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
+	b47n->nand_chip.legacy.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
+	b47n->nand_chip.legacy.write_buf = bcm47xxnflash_ops_bcm4706_write_buf;
 	b47n->nand_chip.set_features = nand_get_set_features_notsupp;
 	b47n->nand_chip.get_features = nand_get_set_features_notsupp;
 
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 851db4a7d3e4..4eb9244cc108 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -2273,9 +2273,9 @@ static int brcmnand_init_cs(struct brcmnand_host *host, struct device_node *dn)
 	chip->cmd_ctrl = brcmnand_cmd_ctrl;
 	chip->cmdfunc = brcmnand_cmdfunc;
 	chip->waitfunc = brcmnand_waitfunc;
-	chip->read_byte = brcmnand_read_byte;
-	chip->read_buf = brcmnand_read_buf;
-	chip->write_buf = brcmnand_write_buf;
+	chip->legacy.read_byte = brcmnand_read_byte;
+	chip->legacy.read_buf = brcmnand_read_buf;
+	chip->legacy.write_buf = brcmnand_write_buf;
 
 	chip->ecc.mode = NAND_ECC_HW;
 	chip->ecc.read_page = brcmnand_read_page;
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 801045d77872..95754d1f12b8 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -378,7 +378,7 @@ static int cafe_nand_read_page(struct nand_chip *chip, uint8_t *buf,
 		     cafe_readl(cafe, NAND_ECC_SYN01));
 
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
-	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	if (checkecc && cafe_readl(cafe, NAND_ECC_RESULT) & (1<<18)) {
 		unsigned short syn[8], pat[4];
@@ -537,7 +537,7 @@ static int cafe_nand_write_page_lowlevel(struct nand_chip *chip,
 	struct cafe_priv *cafe = nand_get_controller_data(chip);
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
-	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	/* Set up ECC autogeneration */
 	cafe->ctl2 |= (1<<30);
@@ -705,9 +705,9 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 
 	cafe->nand.cmdfunc = cafe_nand_cmdfunc;
 	cafe->nand.dev_ready = cafe_device_ready;
-	cafe->nand.read_byte = cafe_read_byte;
-	cafe->nand.read_buf = cafe_read_buf;
-	cafe->nand.write_buf = cafe_write_buf;
+	cafe->nand.legacy.read_byte = cafe_read_byte;
+	cafe->nand.legacy.read_buf = cafe_read_buf;
+	cafe->nand.legacy.write_buf = cafe_write_buf;
 	cafe->nand.select_chip = cafe_select_chip;
 	cafe->nand.set_features = nand_get_set_features_notsupp;
 	cafe->nand.get_features = nand_get_set_features_notsupp;
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index b4ed69815bed..18f10ae92dfc 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -184,9 +184,9 @@ static int __init cmx270_init(void)
 	this->ecc.algo = NAND_ECC_HAMMING;
 
 	/* read/write functions */
-	this->read_byte = cmx270_read_byte;
-	this->read_buf = cmx270_read_buf;
-	this->write_buf = cmx270_write_buf;
+	this->legacy.read_byte = cmx270_read_byte;
+	this->legacy.read_buf = cmx270_read_buf;
+	this->legacy.write_buf = cmx270_write_buf;
 
 	/* Scan to find existence of the device */
 	ret = nand_scan(this, 1);
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index b03fb36e9e69..8a3230041678 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -208,9 +208,9 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
 
 	this->cmd_ctrl = cs553x_hwcontrol;
 	this->dev_ready = cs553x_device_ready;
-	this->read_byte = cs553x_read_byte;
-	this->read_buf = cs553x_read_buf;
-	this->write_buf = cs553x_write_buf;
+	this->legacy.read_byte = cs553x_read_byte;
+	this->legacy.read_buf = cs553x_read_buf;
+	this->legacy.write_buf = cs553x_write_buf;
 
 	this->chip_delay = 0;
 
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 1204b5120176..b7701caa5f94 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -785,8 +785,8 @@ static int nand_davinci_probe(struct platform_device *pdev)
 	info->chip.dev_ready	= nand_davinci_dev_ready;
 
 	/* Speed up buffer I/O */
-	info->chip.read_buf     = nand_davinci_read_buf;
-	info->chip.write_buf    = nand_davinci_write_buf;
+	info->chip.legacy.read_buf     = nand_davinci_read_buf;
+	info->chip.legacy.write_buf    = nand_davinci_write_buf;
 
 	/* Use board-specific ECC config */
 	info->chip.ecc.mode	= pdata->ecc_mode;
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index c5e35461d6b3..2d963ed6643c 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -1262,11 +1262,11 @@ static int denali_attach_chip(struct nand_chip *chip)
 	mtd_set_ooblayout(mtd, &denali_ooblayout_ops);
 
 	if (chip->options & NAND_BUSWIDTH_16) {
-		chip->read_buf = denali_read_buf16;
-		chip->write_buf = denali_write_buf16;
+		chip->legacy.read_buf = denali_read_buf16;
+		chip->legacy.write_buf = denali_write_buf16;
 	} else {
-		chip->read_buf = denali_read_buf;
-		chip->write_buf = denali_write_buf;
+		chip->legacy.read_buf = denali_read_buf;
+		chip->legacy.write_buf = denali_write_buf;
 	}
 	chip->ecc.read_page = denali_read_page;
 	chip->ecc.read_page_raw = denali_read_page_raw;
@@ -1343,8 +1343,8 @@ int denali_init(struct denali_nand_info *denali)
 		mtd->name = "denali-nand";
 
 	chip->select_chip = denali_select_chip;
-	chip->read_byte = denali_read_byte;
-	chip->write_byte = denali_write_byte;
+	chip->legacy.read_byte = denali_read_byte;
+	chip->legacy.write_byte = denali_write_byte;
 	chip->cmd_ctrl = denali_cmd_ctrl;
 	chip->dev_ready = denali_dev_ready;
 	chip->waitfunc = denali_waitfunc;
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 0b305c19a9a3..ec3336f071e7 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -382,8 +382,8 @@ static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr)
 	 */
 	udelay(50);
 
-	ret = this->read_byte(this) << 8;
-	ret |= this->read_byte(this);
+	ret = this->legacy.read_byte(this) << 8;
+	ret |= this->legacy.read_byte(this);
 
 	if (doc->ChipID == DOC_ChipID_Doc2k && try_dword && !nr) {
 		/* First chip probe. See if we get same results by 32-bit access */
@@ -404,7 +404,7 @@ static uint16_t __init doc200x_ident_chip(struct mtd_info *mtd, int nr)
 		ident.dword = readl(docptr + DoC_2k_CDSN_IO);
 		if (((ident.byte[0] << 8) | ident.byte[1]) == ret) {
 			pr_info("DiskOnChip 2000 responds to DWORD access\n");
-			this->read_buf = &doc2000_readbuf_dword;
+			this->legacy.read_buf = &doc2000_readbuf_dword;
 		}
 	}
 
@@ -442,7 +442,7 @@ static int doc200x_wait(struct nand_chip *this)
 	DoC_WaitReady(doc);
 	nand_status_op(this, NULL);
 	DoC_WaitReady(doc);
-	status = (int)this->read_byte(this);
+	status = (int)this->legacy.read_byte(this);
 
 	return status;
 }
@@ -721,7 +721,7 @@ static void doc2001plus_command(struct nand_chip *this, unsigned command,
 		WriteDOC(NAND_CMD_STATUS, docptr, Mplus_FlashCmd);
 		WriteDOC(0, docptr, Mplus_WritePipeTerm);
 		WriteDOC(0, docptr, Mplus_WritePipeTerm);
-		while (!(this->read_byte(this) & 0x40)) ;
+		while (!(this->legacy.read_byte(this) & 0x40)) ;
 		return;
 
 		/* This applies to read commands */
@@ -1339,9 +1339,9 @@ static inline int __init doc2000_init(struct mtd_info *mtd)
 	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 
-	this->read_byte = doc2000_read_byte;
-	this->write_buf = doc2000_writebuf;
-	this->read_buf = doc2000_readbuf;
+	this->legacy.read_byte = doc2000_read_byte;
+	this->legacy.write_buf = doc2000_writebuf;
+	this->legacy.read_buf = doc2000_readbuf;
 	doc->late_init = nftl_scan_bbt;
 
 	doc->CDSNControl = CDSN_CTRL_FLASH_IO | CDSN_CTRL_ECC_IO;
@@ -1355,9 +1355,9 @@ static inline int __init doc2001_init(struct mtd_info *mtd)
 	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 
-	this->read_byte = doc2001_read_byte;
-	this->write_buf = doc2001_writebuf;
-	this->read_buf = doc2001_readbuf;
+	this->legacy.read_byte = doc2001_read_byte;
+	this->legacy.write_buf = doc2001_writebuf;
+	this->legacy.read_buf = doc2001_readbuf;
 
 	ReadDOC(doc->virtadr, ChipID);
 	ReadDOC(doc->virtadr, ChipID);
@@ -1385,9 +1385,9 @@ static inline int __init doc2001plus_init(struct mtd_info *mtd)
 	struct nand_chip *this = mtd_to_nand(mtd);
 	struct doc_priv *doc = nand_get_controller_data(this);
 
-	this->read_byte = doc2001plus_read_byte;
-	this->write_buf = doc2001plus_writebuf;
-	this->read_buf = doc2001plus_readbuf;
+	this->legacy.read_byte = doc2001plus_read_byte;
+	this->legacy.write_buf = doc2001plus_writebuf;
+	this->legacy.read_buf = doc2001plus_readbuf;
 	doc->late_init = inftl_scan_bbt;
 	this->cmd_ctrl = NULL;
 	this->select_chip = doc2001plus_select_chip;
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 98da5f9f04ac..5e7b912f63b3 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -776,9 +776,9 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 
 	/* fill in nand_chip structure */
 	/* set up function call table */
-	chip->read_byte = fsl_elbc_read_byte;
-	chip->write_buf = fsl_elbc_write_buf;
-	chip->read_buf = fsl_elbc_read_buf;
+	chip->legacy.read_byte = fsl_elbc_read_byte;
+	chip->legacy.write_buf = fsl_elbc_write_buf;
+	chip->legacy.read_buf = fsl_elbc_read_buf;
 	chip->select_chip = fsl_elbc_select_chip;
 	chip->cmdfunc = fsl_elbc_cmdfunc;
 	chip->waitfunc = fsl_elbc_wait;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index cdcd82d1f8bc..ed3b90f2fb50 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -858,12 +858,12 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	/* set up function call table */
 	if ((ifc_in32(&ifc_global->cspr_cs[priv->bank].cspr))
 		& CSPR_PORT_SIZE_16)
-		chip->read_byte = fsl_ifc_read_byte16;
+		chip->legacy.read_byte = fsl_ifc_read_byte16;
 	else
-		chip->read_byte = fsl_ifc_read_byte;
+		chip->legacy.read_byte = fsl_ifc_read_byte;
 
-	chip->write_buf = fsl_ifc_write_buf;
-	chip->read_buf = fsl_ifc_read_buf;
+	chip->legacy.write_buf = fsl_ifc_write_buf;
+	chip->legacy.read_buf = fsl_ifc_read_buf;
 	chip->select_chip = fsl_ifc_select_chip;
 	chip->cmdfunc = fsl_ifc_cmdfunc;
 	chip->waitfunc = fsl_ifc_wait;
@@ -881,10 +881,10 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 
 	if (ifc_in32(&ifc_global->cspr_cs[priv->bank].cspr)
 		& CSPR_PORT_SIZE_16) {
-		chip->read_byte = fsl_ifc_read_byte16;
+		chip->legacy.read_byte = fsl_ifc_read_byte16;
 		chip->options |= NAND_BUSWIDTH_16;
 	} else {
-		chip->read_byte = fsl_ifc_read_byte;
+		chip->legacy.read_byte = fsl_ifc_read_byte;
 	}
 
 	chip->controller = &ifc_nand_ctrl->controller;
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index f59fd57fc529..db194ad12074 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -164,9 +164,9 @@ static int fun_chip_init(struct fsl_upm_nand *fun,
 	fun->chip.legacy.IO_ADDR_W = fun->io_base;
 	fun->chip.cmd_ctrl = fun_cmd_ctrl;
 	fun->chip.chip_delay = fun->chip_delay;
-	fun->chip.read_byte = fun_read_byte;
-	fun->chip.read_buf = fun_read_buf;
-	fun->chip.write_buf = fun_write_buf;
+	fun->chip.legacy.read_byte = fun_read_byte;
+	fun->chip.legacy.read_buf = fun_read_buf;
+	fun->chip.legacy.write_buf = fun_write_buf;
 	fun->chip.ecc.mode = NAND_ECC_SOFT;
 	fun->chip.ecc.algo = NAND_ECC_HAMMING;
 	if (fun->mchip_count > 1)
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 1ed594a155ed..54f1a84c6520 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1330,7 +1330,7 @@ static int gpmi_ecc_read_oob(struct nand_chip *chip, int page)
 
 	/* Read out the conventional OOB. */
 	nand_read_page_op(chip, page, mtd->writesize, NULL, 0);
-	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	/*
 	 * Now, we want to make sure the block mark is correct. In the
@@ -1340,7 +1340,7 @@ static int gpmi_ecc_read_oob(struct nand_chip *chip, int page)
 	if (GPMI_IS_MX23(this)) {
 		/* Read the block mark into the first byte of the OOB buffer. */
 		nand_read_page_op(chip, page, 0, NULL, 0);
-		chip->oob_poi[0] = chip->read_byte(chip);
+		chip->oob_poi[0] = chip->legacy.read_byte(chip);
 	}
 
 	return 0;
@@ -1628,7 +1628,7 @@ static int mx23_check_transcription_stamp(struct gpmi_nand_data *this)
 		 * and starts in the 12th byte of the page.
 		 */
 		nand_read_page_op(chip, page, 12, NULL, 0);
-		chip->read_buf(chip, buffer, strlen(fingerprint));
+		chip->legacy.read_buf(chip, buffer, strlen(fingerprint));
 
 		/* Look for the fingerprint. */
 		if (!memcmp(buffer, fingerprint, strlen(fingerprint))) {
@@ -1764,7 +1764,7 @@ static int mx23_boot_init(struct gpmi_nand_data  *this)
 		/* Send the command to read the conventional block mark. */
 		chip->select_chip(chip, chipnr);
 		nand_read_page_op(chip, page, mtd->writesize, NULL, 0);
-		block_mark = chip->read_byte(chip);
+		block_mark = chip->legacy.read_byte(chip);
 		chip->select_chip(chip, -1);
 
 		/*
@@ -1904,9 +1904,9 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 	chip->setup_data_interface = gpmi_setup_data_interface;
 	chip->cmd_ctrl		= gpmi_cmd_ctrl;
 	chip->dev_ready		= gpmi_dev_ready;
-	chip->read_byte		= gpmi_read_byte;
-	chip->read_buf		= gpmi_read_buf;
-	chip->write_buf		= gpmi_write_buf;
+	chip->legacy.read_byte	= gpmi_read_byte;
+	chip->legacy.read_buf	= gpmi_read_buf;
+	chip->legacy.write_buf	= gpmi_write_buf;
 	chip->badblock_pattern	= &gpmi_bbt_descr;
 	chip->block_markbad	= gpmi_block_markbad;
 	chip->options		|= NAND_NO_SUBPAGE_WRITE;
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 928a320c8517..1046f51bbcd2 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -533,7 +533,7 @@ static int hisi_nand_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
 	int stat_1, stat_2;
 
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
-	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	/* errors which can not be corrected by ECC */
 	if (host->irq_status & HINFC504_INTS_UE) {
@@ -581,7 +581,7 @@ static int hisi_nand_write_page_hwecc(struct nand_chip *chip,
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
 	if (oob_required)
-		chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
+		chip->legacy.write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -784,9 +784,9 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 	nand_set_flash_node(chip, np);
 	chip->cmdfunc		= hisi_nfc_cmdfunc;
 	chip->select_chip	= hisi_nfc_select_chip;
-	chip->read_byte		= hisi_nfc_read_byte;
-	chip->write_buf		= hisi_nfc_write_buf;
-	chip->read_buf		= hisi_nfc_read_buf;
+	chip->legacy.read_byte	= hisi_nfc_read_byte;
+	chip->legacy.write_buf	= hisi_nfc_write_buf;
+	chip->legacy.read_buf	= hisi_nfc_read_buf;
 	chip->chip_delay	= HINFC504_CHIP_DELAY;
 	chip->set_features	= nand_get_set_features_notsupp;
 	chip->get_features	= nand_get_set_features_notsupp;
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 8a6f109c43af..6dd5348b8f03 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -624,7 +624,7 @@ static int lpc32xx_nand_read_page_syndrome(struct nand_chip *chip, uint8_t *buf,
 	status = lpc32xx_xfer(mtd, buf, chip->ecc.steps, 1);
 
 	/* Get OOB data */
-	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	/* Convert to stored ECC format */
 	lpc32xx_slc_ecc_copy(tmpecc, (uint32_t *) host->ecc_buf, chip->ecc.steps);
@@ -665,8 +665,8 @@ static int lpc32xx_nand_read_page_raw_syndrome(struct nand_chip *chip,
 	nand_read_page_op(chip, page, 0, NULL, 0);
 
 	/* Raw reads can just use the FIFO interface */
-	chip->read_buf(chip, buf, chip->ecc.size * chip->ecc.steps);
-	chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.read_buf(chip, buf, chip->ecc.size * chip->ecc.steps);
+	chip->legacy.read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return 0;
 }
@@ -704,7 +704,7 @@ static int lpc32xx_nand_write_page_syndrome(struct nand_chip *chip,
 	lpc32xx_slc_ecc_copy(pb, (uint32_t *)host->ecc_buf, chip->ecc.steps);
 
 	/* Write ECC data to device */
-	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -722,7 +722,7 @@ static int lpc32xx_nand_write_page_raw_syndrome(struct nand_chip *chip,
 	/* Raw writes can just use the FIFO interface */
 	nand_prog_page_begin_op(chip, page, 0, buf,
 				chip->ecc.size * chip->ecc.steps);
-	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -891,9 +891,9 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 
 	/* NAND callbacks for LPC32xx SLC hardware */
 	chip->ecc.mode = NAND_ECC_HW_SYNDROME;
-	chip->read_byte = lpc32xx_nand_read_byte;
-	chip->read_buf = lpc32xx_nand_read_buf;
-	chip->write_buf = lpc32xx_nand_write_buf;
+	chip->legacy.read_byte = lpc32xx_nand_read_byte;
+	chip->legacy.read_buf = lpc32xx_nand_read_buf;
+	chip->legacy.write_buf = lpc32xx_nand_write_buf;
 	chip->ecc.read_page_raw = lpc32xx_nand_read_page_raw_syndrome;
 	chip->ecc.read_page = lpc32xx_nand_read_page_syndrome;
 	chip->ecc.write_page_raw = lpc32xx_nand_write_page_raw_syndrome;
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index bd027674898d..4c2925e819f1 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -694,9 +694,9 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	mtd->name = "MPC5121 NAND";
 	chip->dev_ready = mpc5121_nfc_dev_ready;
 	chip->cmdfunc = mpc5121_nfc_command;
-	chip->read_byte = mpc5121_nfc_read_byte;
-	chip->read_buf = mpc5121_nfc_read_buf;
-	chip->write_buf = mpc5121_nfc_write_buf;
+	chip->legacy.read_byte = mpc5121_nfc_read_byte;
+	chip->legacy.read_buf = mpc5121_nfc_read_buf;
+	chip->legacy.write_buf = mpc5121_nfc_write_buf;
 	chip->select_chip = mpc5121_nfc_select_chip;
 	chip->set_features	= nand_get_set_features_notsupp;
 	chip->get_features	= nand_get_set_features_notsupp;
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 42f9dc2cd172..fe150e993acf 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -1334,10 +1334,10 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 	nand->options |= NAND_USE_BOUNCE_BUFFER | NAND_SUBPAGE_READ;
 	nand->dev_ready = mtk_nfc_dev_ready;
 	nand->select_chip = mtk_nfc_select_chip;
-	nand->write_byte = mtk_nfc_write_byte;
-	nand->write_buf = mtk_nfc_write_buf;
-	nand->read_byte = mtk_nfc_read_byte;
-	nand->read_buf = mtk_nfc_read_buf;
+	nand->legacy.write_byte = mtk_nfc_write_byte;
+	nand->legacy.write_buf = mtk_nfc_write_buf;
+	nand->legacy.read_byte = mtk_nfc_read_byte;
+	nand->legacy.read_buf = mtk_nfc_read_buf;
 	nand->cmd_ctrl = mtk_nfc_cmd_ctrl;
 	nand->setup_data_interface = mtk_nfc_setup_data_interface;
 
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 895f85ee29db..f7e439f578da 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1402,7 +1402,7 @@ static int mxc_nand_set_features(struct nand_chip *chip, int addr,
 	host->buf_start = 0;
 
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
-		chip->write_byte(chip, subfeature_param[i]);
+		chip->legacy.write_byte(chip, subfeature_param[i]);
 
 	memcpy32_toio(host->main_area0, host->data_buf, mtd->writesize);
 	host->devtype_data->send_cmd(host, NAND_CMD_SET_FEATURES, false);
@@ -1426,7 +1426,7 @@ static int mxc_nand_get_features(struct nand_chip *chip, int addr,
 	host->buf_start = 0;
 
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
-		*subfeature_param++ = chip->read_byte(chip);
+		*subfeature_param++ = chip->legacy.read_byte(chip);
 
 	return 0;
 }
@@ -1775,9 +1775,9 @@ static int mxcnd_probe(struct platform_device *pdev)
 	nand_set_flash_node(this, pdev->dev.of_node),
 	this->dev_ready = mxc_nand_dev_ready;
 	this->cmdfunc = mxc_nand_command;
-	this->read_byte = mxc_nand_read_byte;
-	this->write_buf = mxc_nand_write_buf;
-	this->read_buf = mxc_nand_read_buf;
+	this->legacy.read_byte = mxc_nand_read_byte;
+	this->legacy.write_buf = mxc_nand_write_buf;
+	this->legacy.read_buf = mxc_nand_read_buf;
 	this->set_features = mxc_nand_set_features;
 	this->get_features = mxc_nand_get_features;
 
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 1edaa9fdbce9..016ef405474c 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -305,7 +305,7 @@ static void nand_select_chip(struct nand_chip *chip, int chipnr)
  */
 static void nand_write_byte(struct nand_chip *chip, uint8_t byte)
 {
-	chip->write_buf(chip, &byte, 1);
+	chip->legacy.write_buf(chip, &byte, 1);
 }
 
 /**
@@ -335,7 +335,7 @@ static void nand_write_byte16(struct nand_chip *chip, uint8_t byte)
 	 * neither an address nor a command transfer. Let's assume a 0 on the
 	 * upper I/O lines is OK.
 	 */
-	chip->write_buf(chip, (uint8_t *)&word, 2);
+	chip->legacy.write_buf(chip, (uint8_t *)&word, 2);
 }
 
 /**
@@ -1524,7 +1524,7 @@ int nand_read_page_op(struct nand_chip *chip, unsigned int page,
 
 	chip->cmdfunc(chip, NAND_CMD_READ0, offset_in_page, page);
 	if (len)
-		chip->read_buf(chip, buf, len);
+		chip->legacy.read_buf(chip, buf, len);
 
 	return 0;
 }
@@ -1572,7 +1572,7 @@ static int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf,
 
 	chip->cmdfunc(chip, NAND_CMD_PARAM, page, -1);
 	for (i = 0; i < len; i++)
-		p[i] = chip->read_byte(chip);
+		p[i] = chip->legacy.read_byte(chip);
 
 	return 0;
 }
@@ -1635,7 +1635,7 @@ int nand_change_read_column_op(struct nand_chip *chip,
 
 	chip->cmdfunc(chip, NAND_CMD_RNDOUT, offset_in_page, -1);
 	if (len)
-		chip->read_buf(chip, buf, len);
+		chip->legacy.read_buf(chip, buf, len);
 
 	return 0;
 }
@@ -1672,7 +1672,7 @@ int nand_read_oob_op(struct nand_chip *chip, unsigned int page,
 
 	chip->cmdfunc(chip, NAND_CMD_READOOB, offset_in_oob, page);
 	if (len)
-		chip->read_buf(chip, buf, len);
+		chip->legacy.read_buf(chip, buf, len);
 
 	return 0;
 }
@@ -1785,7 +1785,7 @@ int nand_prog_page_begin_op(struct nand_chip *chip, unsigned int page,
 	chip->cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page, page);
 
 	if (buf)
-		chip->write_buf(chip, buf, len);
+		chip->legacy.write_buf(chip, buf, len);
 
 	return 0;
 }
@@ -1869,7 +1869,7 @@ int nand_prog_page_op(struct nand_chip *chip, unsigned int page,
 						len, true);
 	} else {
 		chip->cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page, page);
-		chip->write_buf(chip, buf, len);
+		chip->legacy.write_buf(chip, buf, len);
 		chip->cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
 		status = chip->waitfunc(chip);
 	}
@@ -1938,7 +1938,7 @@ int nand_change_write_column_op(struct nand_chip *chip,
 
 	chip->cmdfunc(chip, NAND_CMD_RNDIN, offset_in_page, -1);
 	if (len)
-		chip->write_buf(chip, buf, len);
+		chip->legacy.write_buf(chip, buf, len);
 
 	return 0;
 }
@@ -1986,7 +1986,7 @@ int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf,
 	chip->cmdfunc(chip, NAND_CMD_READID, addr, -1);
 
 	for (i = 0; i < len; i++)
-		id[i] = chip->read_byte(chip);
+		id[i] = chip->legacy.read_byte(chip);
 
 	return 0;
 }
@@ -2023,7 +2023,7 @@ int nand_status_op(struct nand_chip *chip, u8 *status)
 
 	chip->cmdfunc(chip, NAND_CMD_STATUS, -1, -1);
 	if (status)
-		*status = chip->read_byte(chip);
+		*status = chip->legacy.read_byte(chip);
 
 	return 0;
 }
@@ -2151,7 +2151,7 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature,
 
 	chip->cmdfunc(chip, NAND_CMD_SET_FEATURES, feature, -1);
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
-		chip->write_byte(chip, params[i]);
+		chip->legacy.write_byte(chip, params[i]);
 
 	ret = chip->waitfunc(chip);
 	if (ret < 0)
@@ -2199,7 +2199,7 @@ static int nand_get_features_op(struct nand_chip *chip, u8 feature,
 
 	chip->cmdfunc(chip, NAND_CMD_GET_FEATURES, feature, -1);
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
-		params[i] = chip->read_byte(chip);
+		params[i] = chip->legacy.read_byte(chip);
 
 	return 0;
 }
@@ -2291,9 +2291,9 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
 		unsigned int i;
 
 		for (i = 0; i < len; i++)
-			p[i] = chip->read_byte(chip);
+			p[i] = chip->legacy.read_byte(chip);
 	} else {
-		chip->read_buf(chip, buf, len);
+		chip->legacy.read_buf(chip, buf, len);
 	}
 
 	return 0;
@@ -2335,9 +2335,9 @@ int nand_write_data_op(struct nand_chip *chip, const void *buf,
 		unsigned int i;
 
 		for (i = 0; i < len; i++)
-			chip->write_byte(chip, p[i]);
+			chip->legacy.write_byte(chip, p[i]);
 	} else {
-		chip->write_buf(chip, buf, len);
+		chip->legacy.write_buf(chip, buf, len);
 	}
 
 	return 0;
@@ -4936,18 +4936,18 @@ static void nand_set_defaults(struct nand_chip *chip)
 		chip->get_features = nand_default_get_features;
 
 	/* If called twice, pointers that depend on busw may need to be reset */
-	if (!chip->read_byte || chip->read_byte == nand_read_byte)
-		chip->read_byte = busw ? nand_read_byte16 : nand_read_byte;
+	if (!chip->legacy.read_byte || chip->legacy.read_byte == nand_read_byte)
+		chip->legacy.read_byte = busw ? nand_read_byte16 : nand_read_byte;
 	if (!chip->block_bad)
 		chip->block_bad = nand_block_bad;
 	if (!chip->block_markbad)
 		chip->block_markbad = nand_default_block_markbad;
-	if (!chip->write_buf || chip->write_buf == nand_write_buf)
-		chip->write_buf = busw ? nand_write_buf16 : nand_write_buf;
-	if (!chip->write_byte || chip->write_byte == nand_write_byte)
-		chip->write_byte = busw ? nand_write_byte16 : nand_write_byte;
-	if (!chip->read_buf || chip->read_buf == nand_read_buf)
-		chip->read_buf = busw ? nand_read_buf16 : nand_read_buf;
+	if (!chip->legacy.write_buf || chip->legacy.write_buf == nand_write_buf)
+		chip->legacy.write_buf = busw ? nand_write_buf16 : nand_write_buf;
+	if (!chip->legacy.write_byte || chip->legacy.write_byte == nand_write_byte)
+		chip->legacy.write_byte = busw ? nand_write_byte16 : nand_write_byte;
+	if (!chip->legacy.read_buf || chip->legacy.read_buf == nand_read_buf)
+		chip->legacy.read_buf = busw ? nand_read_buf16 : nand_read_buf;
 
 	if (!chip->controller) {
 		chip->controller = &chip->dummy_controller;
diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c
index bb1c4f8ce785..6a2f3efad153 100644
--- a/drivers/mtd/nand/raw/nand_hynix.c
+++ b/drivers/mtd/nand/raw/nand_hynix.c
@@ -108,7 +108,7 @@ static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val)
 	}
 
 	chip->cmdfunc(chip, NAND_CMD_NONE, column, -1);
-	chip->write_byte(chip, val);
+	chip->legacy.write_byte(chip, val);
 
 	return 0;
 }
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index f750783d5d6a..f3219122e311 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2156,7 +2156,7 @@ static void ns_nand_read_buf(struct nand_chip *chip, u_char *buf, int len)
 		int i;
 
 		for (i = 0; i < len; i++)
-			buf[i] = chip->read_byte(chip);
+			buf[i] = chip->legacy.read_byte(chip);
 
 		return;
 	}
@@ -2250,10 +2250,10 @@ static int __init ns_init_module(void)
 	 * Register simulator's callbacks.
 	 */
 	chip->cmd_ctrl	 = ns_hwcontrol;
-	chip->read_byte  = ns_nand_read_byte;
+	chip->legacy.read_byte  = ns_nand_read_byte;
 	chip->dev_ready  = ns_device_ready;
-	chip->write_buf  = ns_nand_write_buf;
-	chip->read_buf   = ns_nand_read_buf;
+	chip->legacy.write_buf  = ns_nand_write_buf;
+	chip->legacy.read_buf   = ns_nand_read_buf;
 	chip->ecc.mode   = NAND_ECC_SOFT;
 	chip->ecc.algo   = NAND_ECC_HAMMING;
 	/* The NAND_SKIP_BBTSCAN option is necessary for 'overridesize' */
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index adc4060c65ad..7377dc752431 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -149,8 +149,8 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
 	chip->select_chip = ndfc_select_chip;
 	chip->chip_delay = 50;
 	chip->controller = &ndfc->ndfc_control;
-	chip->read_buf = ndfc_read_buf;
-	chip->write_buf = ndfc_write_buf;
+	chip->legacy.read_buf = ndfc_read_buf;
+	chip->legacy.write_buf = ndfc_write_buf;
 	chip->ecc.correct = nand_correct_data;
 	chip->ecc.hwctl = ndfc_enable_hwecc;
 	chip->ecc.calculate = ndfc_calculate_ecc;
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 3aae5fda5399..71b0a41bc497 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -256,9 +256,9 @@ static int nuc900_nand_probe(struct platform_device *pdev)
 
 	chip->cmdfunc		= nuc900_nand_command_lp;
 	chip->dev_ready		= nuc900_nand_devready;
-	chip->read_byte		= nuc900_nand_read_byte;
-	chip->write_buf		= nuc900_nand_write_buf;
-	chip->read_buf		= nuc900_nand_read_buf;
+	chip->legacy.read_byte	= nuc900_nand_read_byte;
+	chip->legacy.write_buf	= nuc900_nand_write_buf;
+	chip->legacy.read_buf	= nuc900_nand_read_buf;
 	chip->chip_delay	= 50;
 	chip->options		= 0;
 	chip->ecc.mode		= NAND_ECC_SOFT;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 627048886c95..3ab4a2c5fc2d 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -1539,7 +1539,7 @@ static int omap_write_page_bch(struct nand_chip *chip, const uint8_t *buf,
 	chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 	/* Write data */
-	chip->write_buf(chip, buf, mtd->writesize);
+	chip->legacy.write_buf(chip, buf, mtd->writesize);
 
 	/* Update ecc vector from GPMC result registers */
 	omap_calculate_ecc_bch_multi(mtd, buf, &ecc_calc[0]);
@@ -1550,7 +1550,7 @@ static int omap_write_page_bch(struct nand_chip *chip, const uint8_t *buf,
 		return ret;
 
 	/* Write ecc vector to OOB area */
-	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -1591,7 +1591,7 @@ static int omap_write_subpage_bch(struct nand_chip *chip, u32 offset,
 	chip->ecc.hwctl(chip, NAND_ECC_WRITE);
 
 	/* Write data */
-	chip->write_buf(chip, buf, mtd->writesize);
+	chip->legacy.write_buf(chip, buf, mtd->writesize);
 
 	for (step = 0; step < ecc_steps; step++) {
 		/* mask ECC of un-touched subpages by padding 0xFF */
@@ -1616,7 +1616,7 @@ static int omap_write_subpage_bch(struct nand_chip *chip, u32 offset,
 		return ret;
 
 	/* write OOB buffer to NAND device */
-	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.write_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	return nand_prog_page_end_op(chip);
 }
@@ -1650,7 +1650,7 @@ static int omap_read_page_bch(struct nand_chip *chip, uint8_t *buf,
 	chip->ecc.hwctl(chip, NAND_ECC_READ);
 
 	/* Read data */
-	chip->read_buf(chip, buf, mtd->writesize);
+	chip->legacy.read_buf(chip, buf, mtd->writesize);
 
 	/* Read oob bytes */
 	nand_change_read_column_op(chip,
@@ -1933,8 +1933,8 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
 	/* Re-populate low-level callbacks based on xfer modes */
 	switch (info->xfer_type) {
 	case NAND_OMAP_PREFETCH_POLLED:
-		chip->read_buf = omap_read_buf_pref;
-		chip->write_buf = omap_write_buf_pref;
+		chip->legacy.read_buf = omap_read_buf_pref;
+		chip->legacy.write_buf = omap_write_buf_pref;
 		break;
 
 	case NAND_OMAP_POLLED:
@@ -1966,8 +1966,8 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
 					err);
 				return err;
 			}
-			chip->read_buf = omap_read_buf_dma_pref;
-			chip->write_buf = omap_write_buf_dma_pref;
+			chip->legacy.read_buf = omap_read_buf_dma_pref;
+			chip->legacy.write_buf = omap_write_buf_dma_pref;
 		}
 		break;
 
@@ -2002,8 +2002,8 @@ static int omap_nand_attach_chip(struct nand_chip *chip)
 			return err;
 		}
 
-		chip->read_buf = omap_read_buf_irq_pref;
-		chip->write_buf = omap_write_buf_irq_pref;
+		chip->legacy.read_buf = omap_read_buf_irq_pref;
+		chip->legacy.write_buf = omap_write_buf_irq_pref;
 
 		break;
 
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index d73e3c7a3f3a..f0d0054b4a7a 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -138,7 +138,7 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	nand_set_flash_node(nc, pdev->dev.of_node);
 	nc->legacy.IO_ADDR_R = nc->legacy.IO_ADDR_W = io_base;
 	nc->cmd_ctrl = orion_nand_cmd_ctrl;
-	nc->read_buf = orion_nand_read_buf;
+	nc->legacy.read_buf = orion_nand_read_buf;
 	nc->ecc.mode = NAND_ECC_SOFT;
 	nc->ecc.algo = NAND_ECC_HAMMING;
 
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index ab32df146505..16df274f4cd6 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -133,9 +133,9 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 		mtd->priv = chip;
 
 		chip->cmd_ctrl = oxnas_nand_cmd_ctrl;
-		chip->read_buf = oxnas_nand_read_buf;
-		chip->read_byte = oxnas_nand_read_byte;
-		chip->write_buf = oxnas_nand_write_buf;
+		chip->legacy.read_buf = oxnas_nand_read_buf;
+		chip->legacy.read_byte = oxnas_nand_read_byte;
+		chip->legacy.write_buf = oxnas_nand_write_buf;
 		chip->chip_delay = 30;
 
 		/* Scan to find existence of the device */
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index 1b367bf9ef53..ca158dabe8b9 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -141,8 +141,8 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
 
 	chip->cmd_ctrl = pasemi_hwcontrol;
 	chip->dev_ready = pasemi_device_ready;
-	chip->read_buf = pasemi_read_buf;
-	chip->write_buf = pasemi_write_buf;
+	chip->legacy.read_buf = pasemi_read_buf;
+	chip->legacy.write_buf = pasemi_write_buf;
 	chip->chip_delay = 0;
 	chip->ecc.mode = NAND_ECC_SOFT;
 	chip->ecc.algo = NAND_ECC_HAMMING;
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index c06347531d26..971e387a8a75 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -65,8 +65,8 @@ static int plat_nand_probe(struct platform_device *pdev)
 	data->chip.cmd_ctrl = pdata->ctrl.cmd_ctrl;
 	data->chip.dev_ready = pdata->ctrl.dev_ready;
 	data->chip.select_chip = pdata->ctrl.select_chip;
-	data->chip.write_buf = pdata->ctrl.write_buf;
-	data->chip.read_buf = pdata->ctrl.read_buf;
+	data->chip.legacy.write_buf = pdata->ctrl.write_buf;
+	data->chip.legacy.read_buf = pdata->ctrl.read_buf;
 	data->chip.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
 	data->chip.bbt_options |= pdata->chip.bbt_options;
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 9037dddff99a..7b487a2ffa8e 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -349,7 +349,8 @@ struct nandc_regs {
  * @data_buffer:		our local DMA buffer for page read/writes,
  *				used when we can't use the buffer provided
  *				by upper layers directly
- * @buf_size/count/start:	markers for chip->read_buf/write_buf functions
+ * @buf_size/count/start:	markers for chip->legacy.read_buf/write_buf
+ *				functions
  * @reg_read_buf:		local buffer for reading back registers via DMA
  * @reg_read_dma:		contains dma address for register read buffer
  * @reg_read_pos:		marker for data read in reg_read_buf
@@ -2275,10 +2276,10 @@ static int qcom_nandc_block_markbad(struct nand_chip *chip, loff_t ofs)
 }
 
 /*
- * the three functions below implement chip->read_byte(), chip->read_buf()
- * and chip->write_buf() respectively. these aren't used for
- * reading/writing page data, they are used for smaller data like reading
- * id, status etc
+ * the three functions below implement chip->legacy.read_byte(),
+ * chip->legacy.read_buf() and chip->legacy.write_buf() respectively. these
+ * aren't used for reading/writing page data, they are used for smaller data
+ * like reading	id, status etc
  */
 static uint8_t qcom_nandc_read_byte(struct nand_chip *chip)
 {
@@ -2804,9 +2805,9 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc,
 
 	chip->cmdfunc		= qcom_nandc_command;
 	chip->select_chip	= qcom_nandc_select_chip;
-	chip->read_byte		= qcom_nandc_read_byte;
-	chip->read_buf		= qcom_nandc_read_buf;
-	chip->write_buf		= qcom_nandc_write_buf;
+	chip->legacy.read_byte	= qcom_nandc_read_byte;
+	chip->legacy.read_buf	= qcom_nandc_read_buf;
+	chip->legacy.write_buf	= qcom_nandc_write_buf;
 	chip->set_features	= nand_get_set_features_notsupp;
 	chip->get_features	= nand_get_set_features_notsupp;
 
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 2c30e97ab2a4..05b669d34cec 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -858,9 +858,9 @@ static int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 	chip->dev_ready = r852_ready;
 
 	/* I/O */
-	chip->read_byte = r852_read_byte;
-	chip->read_buf = r852_read_buf;
-	chip->write_buf = r852_write_buf;
+	chip->legacy.read_byte = r852_read_byte;
+	chip->legacy.read_buf = r852_read_buf;
+	chip->legacy.write_buf = r852_write_buf;
 
 	/* ecc */
 	chip->ecc.mode = NAND_ECC_HW_SYNDROME;
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 473abf10eeec..d0670ebd5b68 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -864,8 +864,8 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 
 	nand_set_flash_node(chip, set->of_node);
 
-	chip->write_buf    = s3c2410_nand_write_buf;
-	chip->read_buf     = s3c2410_nand_read_buf;
+	chip->legacy.write_buf    = s3c2410_nand_write_buf;
+	chip->legacy.read_buf     = s3c2410_nand_read_buf;
 	chip->select_chip  = s3c2410_nand_select_chip;
 	chip->chip_delay   = 50;
 	nand_set_controller_data(chip, nmtd);
@@ -894,8 +894,8 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		info->sel_bit	= S3C2440_NFCONT_nFCE;
 		chip->cmd_ctrl  = s3c2440_nand_hwcontrol;
 		chip->dev_ready = s3c2440_nand_devready;
-		chip->read_buf  = s3c2440_nand_read_buf;
-		chip->write_buf	= s3c2440_nand_write_buf;
+		chip->legacy.read_buf  = s3c2440_nand_read_buf;
+		chip->legacy.write_buf	= s3c2440_nand_write_buf;
 		break;
 
 	case TYPE_S3C2412:
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index 4b1c7e435937..d83d53810590 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -618,7 +618,7 @@ static int flctl_read_page_hwecc(struct nand_chip *chip, uint8_t *buf,
 
 	nand_read_page_op(chip, page, 0, buf, mtd->writesize);
 	if (oob_required)
-		chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
+		chip->legacy.read_buf(chip, chip->oob_poi, mtd->oobsize);
 	return 0;
 }
 
@@ -628,7 +628,7 @@ static int flctl_write_page_hwecc(struct nand_chip *chip, const uint8_t *buf,
 	struct mtd_info *mtd = nand_to_mtd(chip);
 
 	nand_prog_page_begin_op(chip, page, 0, buf, mtd->writesize);
-	chip->write_buf(chip, chip->oob_poi, mtd->oobsize);
+	chip->legacy.write_buf(chip, chip->oob_poi, mtd->oobsize);
 	return nand_prog_page_end_op(chip);
 }
 
@@ -1180,9 +1180,9 @@ static int flctl_probe(struct platform_device *pdev)
 	/* 20 us command delay time */
 	nand->chip_delay = 20;
 
-	nand->read_byte = flctl_read_byte;
-	nand->write_buf = flctl_write_buf;
-	nand->read_buf = flctl_read_buf;
+	nand->legacy.read_byte = flctl_read_byte;
+	nand->legacy.write_buf = flctl_write_buf;
+	nand->legacy.read_buf = flctl_read_buf;
 	nand->select_chip = flctl_select_chip;
 	nand->cmdfunc = flctl_cmdfunc;
 	nand->set_features = nand_get_set_features_notsupp;
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index aa42b4ea4d23..006224a40f3b 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -153,9 +153,9 @@ static int socrates_nand_probe(struct platform_device *ofdev)
 	mtd->dev.parent = &ofdev->dev;
 
 	nand_chip->cmd_ctrl = socrates_nand_cmd_ctrl;
-	nand_chip->read_byte = socrates_nand_read_byte;
-	nand_chip->write_buf = socrates_nand_write_buf;
-	nand_chip->read_buf = socrates_nand_read_buf;
+	nand_chip->legacy.read_byte = socrates_nand_read_byte;
+	nand_chip->legacy.write_buf = socrates_nand_write_buf;
+	nand_chip->legacy.read_buf = socrates_nand_read_buf;
 	nand_chip->dev_ready = socrates_nand_device_ready;
 
 	nand_chip->ecc.mode = NAND_ECC_SOFT;	/* enable ECC */
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index a3700b79bdeb..6e1317bde1b3 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1924,9 +1924,9 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
 	nand_set_flash_node(nand, np);
 	nand->select_chip = sunxi_nfc_select_chip;
 	nand->cmd_ctrl = sunxi_nfc_cmd_ctrl;
-	nand->read_buf = sunxi_nfc_read_buf;
-	nand->write_buf = sunxi_nfc_write_buf;
-	nand->read_byte = sunxi_nfc_read_byte;
+	nand->legacy.read_buf = sunxi_nfc_read_buf;
+	nand->legacy.write_buf = sunxi_nfc_write_buf;
+	nand->legacy.read_byte = sunxi_nfc_read_byte;
 	nand->setup_data_interface = sunxi_nfc_setup_data_interface;
 
 	mtd = nand_to_mtd(nand);
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index bf7012099790..379f2ed284cb 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -564,9 +564,9 @@ static int chip_init(struct device *dev, struct device_node *np)
 	ecc = &chip->ecc;
 	mtd = nand_to_mtd(chip);
 
-	chip->read_byte = tango_read_byte;
-	chip->write_buf = tango_write_buf;
-	chip->read_buf = tango_read_buf;
+	chip->legacy.read_byte = tango_read_byte;
+	chip->legacy.write_buf = tango_write_buf;
+	chip->legacy.read_buf = tango_read_buf;
 	chip->select_chip = tango_select_chip;
 	chip->cmd_ctrl = tango_cmd_ctrl;
 	chip->dev_ready = tango_dev_ready;
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 0b23587bf47c..94e659158f16 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -405,9 +405,9 @@ static int tmio_probe(struct platform_device *dev)
 	/* Set address of hardware control function */
 	nand_chip->cmd_ctrl = tmio_nand_hwcontrol;
 	nand_chip->dev_ready = tmio_nand_dev_ready;
-	nand_chip->read_byte = tmio_nand_read_byte;
-	nand_chip->write_buf = tmio_nand_write_buf;
-	nand_chip->read_buf = tmio_nand_read_buf;
+	nand_chip->legacy.read_byte = tmio_nand_read_byte;
+	nand_chip->legacy.write_buf = tmio_nand_write_buf;
+	nand_chip->legacy.read_buf = tmio_nand_read_buf;
 
 	/* set eccmode using hardware ECC */
 	nand_chip->ecc.mode = NAND_ECC_HW;
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index c84b2ad84cf7..7fb1575c6e2b 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -324,9 +324,9 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 		mtd = nand_to_mtd(chip);
 		mtd->dev.parent = &dev->dev;
 
-		chip->read_byte = txx9ndfmc_read_byte;
-		chip->read_buf = txx9ndfmc_read_buf;
-		chip->write_buf = txx9ndfmc_write_buf;
+		chip->legacy.read_byte = txx9ndfmc_read_byte;
+		chip->legacy.read_buf = txx9ndfmc_read_buf;
+		chip->legacy.write_buf = txx9ndfmc_write_buf;
 		chip->cmd_ctrl = txx9ndfmc_cmd_ctrl;
 		chip->dev_ready = txx9ndfmc_dev_ready;
 		chip->ecc.calculate = txx9ndfmc_calculate_ecc;
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index 3d91e98df5a8..87ec034bdd3e 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -177,9 +177,9 @@ static int xway_nand_probe(struct platform_device *pdev)
 	data->chip.cmd_ctrl = xway_cmd_ctrl;
 	data->chip.dev_ready = xway_dev_ready;
 	data->chip.select_chip = xway_select_chip;
-	data->chip.write_buf = xway_write_buf;
-	data->chip.read_buf = xway_read_buf;
-	data->chip.read_byte = xway_read_byte;
+	data->chip.legacy.write_buf = xway_write_buf;
+	data->chip.legacy.read_buf = xway_read_buf;
+	data->chip.legacy.read_byte = xway_read_byte;
 	data->chip.chip_delay = 30;
 
 	data->chip.ecc.mode = NAND_ECC_SOFT;
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index f2e14f972319..4856593b626e 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -657,7 +657,7 @@ static int spinand_read_page_hwecc(struct nand_chip *chip, u8 *buf,
 
 	nand_read_page_op(chip, page, 0, p, eccsize * eccsteps);
 	if (oob_required)
-		chip->read_buf(chip, chip->oob_poi, mtd->oobsize);
+		chip->legacy.read_buf(chip, chip->oob_poi, mtd->oobsize);
 
 	while (1) {
 		retval = spinand_read_status(info->spi, &status);
@@ -915,9 +915,9 @@ static int spinand_probe(struct spi_device *spi_nand)
 
 	nand_set_flash_node(chip, spi_nand->dev.of_node);
 	nand_set_controller_data(chip, info);
-	chip->read_buf	= spinand_read_buf;
-	chip->write_buf	= spinand_write_buf;
-	chip->read_byte	= spinand_read_byte;
+	chip->legacy.read_buf	= spinand_read_buf;
+	chip->legacy.write_buf	= spinand_write_buf;
+	chip->legacy.read_byte	= spinand_read_byte;
 	chip->cmdfunc	= spinand_cmdfunc;
 	chip->waitfunc	= spinand_wait;
 	chip->options	|= NAND_CACHEPRG;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 6b1dc8fef89d..f961efd2eacc 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1176,6 +1176,10 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * struct nand_legacy - NAND chip legacy fields/hooks
  * @IO_ADDR_R: address to read the 8 I/O lines of the flash device
  * @IO_ADDR_W: address to write the 8 I/O lines of the flash device
+ * @read_byte: read one byte from the chip
+ * @write_byte: write a single byte to the chip on the low 8 I/O lines
+ * @write_buf: write data from the buffer to the chip
+ * @read_buf: read data from the chip into the buffer
  *
  * If you look at this structure you're already wrong. These fields/hooks are
  * all deprecated.
@@ -1183,6 +1187,10 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
 struct nand_legacy {
 	void __iomem *IO_ADDR_R;
 	void __iomem *IO_ADDR_W;
+	u8 (*read_byte)(struct nand_chip *chip);
+	void (*write_byte)(struct nand_chip *chip, u8 byte);
+	void (*write_buf)(struct nand_chip *chip, const u8 *buf, int len);
+	void (*read_buf)(struct nand_chip *chip, u8 *buf, int len);
 };
 
 /**
@@ -1193,11 +1201,6 @@ struct nand_legacy {
  *			you're modifying an existing driver that is using those
  *			fields/hooks, you should consider reworking the driver
  *			avoid using them.
- * @read_byte:		[REPLACEABLE] read one byte from the chip
- * @write_byte:		[REPLACEABLE] write a single byte to the chip on the
- *			low 8 I/O lines
- * @write_buf:		[REPLACEABLE] write data from the buffer to the chip
- * @read_buf:		[REPLACEABLE] read data from the chip into the buffer
  * @select_chip:	[REPLACEABLE] select chip nr
  * @block_bad:		[REPLACEABLE] check if a block is bad, using OOB markers
  * @block_markbad:	[REPLACEABLE] mark a block bad
@@ -1213,8 +1216,8 @@ struct nand_legacy {
  *			ready.
  * @exec_op:		controller specific method to execute NAND operations.
  *			This method replaces ->cmdfunc(),
- *			->{read,write}_{buf,byte,word}(), ->dev_ready() and
- *			->waifunc().
+ *			->legacy.{read,write}_{buf,byte,word}(), ->dev_ready()
+ *			and ->waifunc().
  * @setup_read_retry:	[FLASHSPECIFIC] flash (vendor) specific function for
  *			setting the read-retry mode. Mostly needed for MLC NAND.
  * @ecc:		[BOARDSPECIFIC] ECC control structure
@@ -1297,10 +1300,6 @@ struct nand_chip {
 
 	struct nand_legacy legacy;
 
-	uint8_t (*read_byte)(struct nand_chip *chip);
-	void (*write_byte)(struct nand_chip *chip, uint8_t byte);
-	void (*write_buf)(struct nand_chip *chip, const uint8_t *buf, int len);
-	void (*read_buf)(struct nand_chip *chip, uint8_t *buf, int len);
 	void (*select_chip)(struct nand_chip *chip, int cs);
 	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
-- 
cgit v1.2.3


From bf6065c6c08fa3ed7bdf8d28b8062ce8e58c1543 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:36 +0200
Subject: mtd: rawnand: Deprecate ->cmd_ctrl() and ->cmdfunc()

Those hooks have been replaced by ->exec_op(). Move them to the
nand_legacy struct.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/ams-delta.c                 |   2 +-
 drivers/mtd/nand/raw/atmel/nand-controller.c     |   4 +-
 drivers/mtd/nand/raw/au1550nd.c                  |   2 +-
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |   6 +-
 drivers/mtd/nand/raw/brcmnand/brcmnand.c         |   4 +-
 drivers/mtd/nand/raw/cafe_nand.c                 |   2 +-
 drivers/mtd/nand/raw/cmx270_nand.c               |   2 +-
 drivers/mtd/nand/raw/cs553x_nand.c               |   2 +-
 drivers/mtd/nand/raw/davinci_nand.c              |   2 +-
 drivers/mtd/nand/raw/denali.c                    |   2 +-
 drivers/mtd/nand/raw/diskonchip.c                |   6 +-
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |   2 +-
 drivers/mtd/nand/raw/fsl_ifc_nand.c              |   2 +-
 drivers/mtd/nand/raw/fsl_upm.c                   |   4 +-
 drivers/mtd/nand/raw/gpio.c                      |   2 +-
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       |   2 +-
 drivers/mtd/nand/raw/hisi504_nand.c              |   2 +-
 drivers/mtd/nand/raw/jz4740_nand.c               |   2 +-
 drivers/mtd/nand/raw/jz4780_nand.c               |   2 +-
 drivers/mtd/nand/raw/lpc32xx_mlc.c               |   2 +-
 drivers/mtd/nand/raw/lpc32xx_slc.c               |   2 +-
 drivers/mtd/nand/raw/mpc5121_nfc.c               |   2 +-
 drivers/mtd/nand/raw/mtk_nand.c                  |   2 +-
 drivers/mtd/nand/raw/mxc_nand.c                  |   2 +-
 drivers/mtd/nand/raw/nand_base.c                 | 138 ++++++++++++-----------
 drivers/mtd/nand/raw/nand_hynix.c                |   4 +-
 drivers/mtd/nand/raw/nandsim.c                   |   2 +-
 drivers/mtd/nand/raw/ndfc.c                      |   2 +-
 drivers/mtd/nand/raw/nuc900_nand.c               |   2 +-
 drivers/mtd/nand/raw/omap2.c                     |   2 +-
 drivers/mtd/nand/raw/orion_nand.c                |   2 +-
 drivers/mtd/nand/raw/oxnas_nand.c                |   2 +-
 drivers/mtd/nand/raw/pasemi_nand.c               |   2 +-
 drivers/mtd/nand/raw/plat_nand.c                 |   2 +-
 drivers/mtd/nand/raw/qcom_nandc.c                |  14 +--
 drivers/mtd/nand/raw/r852.c                      |   2 +-
 drivers/mtd/nand/raw/s3c2410.c                   |   6 +-
 drivers/mtd/nand/raw/sh_flctl.c                  |   2 +-
 drivers/mtd/nand/raw/sharpsl.c                   |   2 +-
 drivers/mtd/nand/raw/socrates_nand.c             |   2 +-
 drivers/mtd/nand/raw/sunxi_nand.c                |   2 +-
 drivers/mtd/nand/raw/tango_nand.c                |   2 +-
 drivers/mtd/nand/raw/tmio_nand.c                 |   2 +-
 drivers/mtd/nand/raw/txx9ndfmc.c                 |   2 +-
 drivers/mtd/nand/raw/xway_nand.c                 |   2 +-
 drivers/staging/mt29f_spinand/mt29f_spinand.c    |   2 +-
 include/linux/mtd/rawnand.h                      |  21 ++--
 47 files changed, 144 insertions(+), 137 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index 6616f473aeb2..756f6339d457 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -213,7 +213,7 @@ static int ams_delta_init(struct platform_device *pdev)
 	this->legacy.read_byte = ams_delta_read_byte;
 	this->legacy.write_buf = ams_delta_write_buf;
 	this->legacy.read_buf = ams_delta_read_buf;
-	this->cmd_ctrl = ams_delta_hwcontrol;
+	this->legacy.cmd_ctrl = ams_delta_hwcontrol;
 	if (gpio_request(AMS_DELTA_GPIO_PIN_NAND_RB, "nand_rdy") == 0) {
 		this->dev_ready = ams_delta_nand_ready;
 	} else {
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 9b2876b5a9c2..37f617ec178e 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -1472,7 +1472,7 @@ static void atmel_nand_init(struct atmel_nand_controller *nc,
 	mtd->dev.parent = nc->dev;
 	nand->base.controller = &nc->base;
 
-	chip->cmd_ctrl = atmel_nand_cmd_ctrl;
+	chip->legacy.cmd_ctrl = atmel_nand_cmd_ctrl;
 	chip->legacy.read_byte = atmel_nand_read_byte;
 	chip->legacy.write_byte = atmel_nand_write_byte;
 	chip->legacy.read_buf = atmel_nand_read_buf;
@@ -1524,7 +1524,7 @@ static void atmel_hsmc_nand_init(struct atmel_nand_controller *nc,
 	atmel_nand_init(nc, nand);
 
 	/* Overload some methods for the HSMC controller. */
-	chip->cmd_ctrl = atmel_hsmc_nand_cmd_ctrl;
+	chip->legacy.cmd_ctrl = atmel_hsmc_nand_cmd_ctrl;
 	chip->select_chip = atmel_hsmc_nand_select_chip;
 }
 
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 0db5dc61b155..5d45f13288fc 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -430,7 +430,7 @@ static int au1550nd_probe(struct platform_device *pdev)
 
 	this->dev_ready = au1550_device_ready;
 	this->select_chip = au1550_select_chip;
-	this->cmdfunc = au1550_command;
+	this->legacy.cmdfunc = au1550_command;
 
 	/* 30 us command delay time */
 	this->chip_delay = 30;
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index ea41d1b95c81..2bd389b49b4a 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -227,7 +227,7 @@ static void bcm47xxnflash_ops_bcm4706_cmdfunc(struct nand_chip *nand_chip,
 
 	switch (command) {
 	case NAND_CMD_RESET:
-		nand_chip->cmd_ctrl(nand_chip, command, NAND_CTRL_CLE);
+		nand_chip->legacy.cmd_ctrl(nand_chip, command, NAND_CTRL_CLE);
 
 		ndelay(100);
 		nand_wait_ready(nand_chip);
@@ -384,9 +384,9 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 	u32 val;
 
 	b47n->nand_chip.select_chip = bcm47xxnflash_ops_bcm4706_select_chip;
-	nand_chip->cmd_ctrl = bcm47xxnflash_ops_bcm4706_cmd_ctrl;
+	nand_chip->legacy.cmd_ctrl = bcm47xxnflash_ops_bcm4706_cmd_ctrl;
 	nand_chip->dev_ready = bcm47xxnflash_ops_bcm4706_dev_ready;
-	b47n->nand_chip.cmdfunc = bcm47xxnflash_ops_bcm4706_cmdfunc;
+	b47n->nand_chip.legacy.cmdfunc = bcm47xxnflash_ops_bcm4706_cmdfunc;
 	b47n->nand_chip.legacy.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
 	b47n->nand_chip.legacy.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
 	b47n->nand_chip.legacy.write_buf = bcm47xxnflash_ops_bcm4706_write_buf;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 4eb9244cc108..162ec11ab251 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -2270,8 +2270,8 @@ static int brcmnand_init_cs(struct brcmnand_host *host, struct device_node *dn)
 	mtd->owner = THIS_MODULE;
 	mtd->dev.parent = &pdev->dev;
 
-	chip->cmd_ctrl = brcmnand_cmd_ctrl;
-	chip->cmdfunc = brcmnand_cmdfunc;
+	chip->legacy.cmd_ctrl = brcmnand_cmd_ctrl;
+	chip->legacy.cmdfunc = brcmnand_cmdfunc;
 	chip->waitfunc = brcmnand_waitfunc;
 	chip->legacy.read_byte = brcmnand_read_byte;
 	chip->legacy.read_buf = brcmnand_read_buf;
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 95754d1f12b8..0f734442fd3d 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -703,7 +703,7 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 		goto out_ior;
 	}
 
-	cafe->nand.cmdfunc = cafe_nand_cmdfunc;
+	cafe->nand.legacy.cmdfunc = cafe_nand_cmdfunc;
 	cafe->nand.dev_ready = cafe_device_ready;
 	cafe->nand.legacy.read_byte = cafe_read_byte;
 	cafe->nand.legacy.read_buf = cafe_read_buf;
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index 18f10ae92dfc..c543f073d971 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -175,7 +175,7 @@ static int __init cmx270_init(void)
 	/* insert callbacks */
 	this->legacy.IO_ADDR_R = cmx270_nand_io;
 	this->legacy.IO_ADDR_W = cmx270_nand_io;
-	this->cmd_ctrl = cmx270_hwcontrol;
+	this->legacy.cmd_ctrl = cmx270_hwcontrol;
 	this->dev_ready = cmx270_device_ready;
 
 	/* 15 us command delay time */
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 8a3230041678..bd75ec4e5508 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -206,7 +206,7 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
 		goto out_mtd;
 	}
 
-	this->cmd_ctrl = cs553x_hwcontrol;
+	this->legacy.cmd_ctrl = cs553x_hwcontrol;
 	this->dev_ready = cs553x_device_ready;
 	this->legacy.read_byte = cs553x_read_byte;
 	this->legacy.read_buf = cs553x_read_buf;
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index b7701caa5f94..5fcd8c30293a 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -781,7 +781,7 @@ static int nand_davinci_probe(struct platform_device *pdev)
 	info->mask_cle		= pdata->mask_cle ? : MASK_CLE;
 
 	/* Set address of hardware control function */
-	info->chip.cmd_ctrl	= nand_davinci_hwcontrol;
+	info->chip.legacy.cmd_ctrl	= nand_davinci_hwcontrol;
 	info->chip.dev_ready	= nand_davinci_dev_ready;
 
 	/* Speed up buffer I/O */
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index 2d963ed6643c..c11547bfb2e7 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -1345,7 +1345,7 @@ int denali_init(struct denali_nand_info *denali)
 	chip->select_chip = denali_select_chip;
 	chip->legacy.read_byte = denali_read_byte;
 	chip->legacy.write_byte = denali_write_byte;
-	chip->cmd_ctrl = denali_cmd_ctrl;
+	chip->legacy.cmd_ctrl = denali_cmd_ctrl;
 	chip->dev_ready = denali_dev_ready;
 	chip->waitfunc = denali_waitfunc;
 
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index ec3336f071e7..2604e80b5475 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -1389,9 +1389,9 @@ static inline int __init doc2001plus_init(struct mtd_info *mtd)
 	this->legacy.write_buf = doc2001plus_writebuf;
 	this->legacy.read_buf = doc2001plus_readbuf;
 	doc->late_init = inftl_scan_bbt;
-	this->cmd_ctrl = NULL;
+	this->legacy.cmd_ctrl = NULL;
 	this->select_chip = doc2001plus_select_chip;
-	this->cmdfunc = doc2001plus_command;
+	this->legacy.cmdfunc = doc2001plus_command;
 	this->ecc.hwctl = doc2001plus_enable_hwecc;
 
 	doc->chips_per_floor = 1;
@@ -1569,7 +1569,7 @@ static int __init doc_probe(unsigned long physadr)
 
 	nand_set_controller_data(nand, doc);
 	nand->select_chip	= doc200x_select_chip;
-	nand->cmd_ctrl		= doc200x_hwcontrol;
+	nand->legacy.cmd_ctrl		= doc200x_hwcontrol;
 	nand->dev_ready		= doc200x_dev_ready;
 	nand->waitfunc		= doc200x_wait;
 	nand->block_bad		= doc200x_block_bad;
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 5e7b912f63b3..fa21d00c3443 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -780,7 +780,7 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 	chip->legacy.write_buf = fsl_elbc_write_buf;
 	chip->legacy.read_buf = fsl_elbc_read_buf;
 	chip->select_chip = fsl_elbc_select_chip;
-	chip->cmdfunc = fsl_elbc_cmdfunc;
+	chip->legacy.cmdfunc = fsl_elbc_cmdfunc;
 	chip->waitfunc = fsl_elbc_wait;
 	chip->set_features = nand_get_set_features_notsupp;
 	chip->get_features = nand_get_set_features_notsupp;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index ed3b90f2fb50..20c86b6503a8 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -865,7 +865,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	chip->legacy.write_buf = fsl_ifc_write_buf;
 	chip->legacy.read_buf = fsl_ifc_read_buf;
 	chip->select_chip = fsl_ifc_select_chip;
-	chip->cmdfunc = fsl_ifc_cmdfunc;
+	chip->legacy.cmdfunc = fsl_ifc_cmdfunc;
 	chip->waitfunc = fsl_ifc_wait;
 	chip->set_features = nand_get_set_features_notsupp;
 	chip->get_features = nand_get_set_features_notsupp;
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index db194ad12074..23baef375bfb 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -112,7 +112,7 @@ static void fun_select_chip(struct nand_chip *chip, int mchip_nr)
 	struct fsl_upm_nand *fun = to_fsl_upm_nand(nand_to_mtd(chip));
 
 	if (mchip_nr == -1) {
-		chip->cmd_ctrl(chip, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
 	} else if (mchip_nr >= 0 && mchip_nr < NAND_MAX_CHIPS) {
 		fun->mchip_number = mchip_nr;
 		chip->legacy.IO_ADDR_R = fun->io_base + fun->mchip_offsets[mchip_nr];
@@ -162,7 +162,7 @@ static int fun_chip_init(struct fsl_upm_nand *fun,
 
 	fun->chip.legacy.IO_ADDR_R = fun->io_base;
 	fun->chip.legacy.IO_ADDR_W = fun->io_base;
-	fun->chip.cmd_ctrl = fun_cmd_ctrl;
+	fun->chip.legacy.cmd_ctrl = fun_cmd_ctrl;
 	fun->chip.chip_delay = fun->chip_delay;
 	fun->chip.legacy.read_byte = fun_read_byte;
 	fun->chip.legacy.read_buf = fun_read_buf;
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index bb43fad65362..1e6e81047978 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -279,7 +279,7 @@ static int gpio_nand_probe(struct platform_device *pdev)
 	chip->ecc.algo		= NAND_ECC_HAMMING;
 	chip->options		= gpiomtd->plat.options;
 	chip->chip_delay	= gpiomtd->plat.chip_delay;
-	chip->cmd_ctrl		= gpio_nand_cmd_ctrl;
+	chip->legacy.cmd_ctrl	= gpio_nand_cmd_ctrl;
 
 	mtd			= nand_to_mtd(chip);
 	mtd->dev.parent		= dev;
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index 54f1a84c6520..fa37d21e5f16 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1902,7 +1902,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 	nand_set_flash_node(chip, this->pdev->dev.of_node);
 	chip->select_chip	= gpmi_select_chip;
 	chip->setup_data_interface = gpmi_setup_data_interface;
-	chip->cmd_ctrl		= gpmi_cmd_ctrl;
+	chip->legacy.cmd_ctrl	= gpmi_cmd_ctrl;
 	chip->dev_ready		= gpmi_dev_ready;
 	chip->legacy.read_byte	= gpmi_read_byte;
 	chip->legacy.read_buf	= gpmi_read_buf;
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 1046f51bbcd2..6e17239983db 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -782,7 +782,7 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 
 	nand_set_controller_data(chip, host);
 	nand_set_flash_node(chip, np);
-	chip->cmdfunc		= hisi_nfc_cmdfunc;
+	chip->legacy.cmdfunc	= hisi_nfc_cmdfunc;
 	chip->select_chip	= hisi_nfc_select_chip;
 	chip->legacy.read_byte	= hisi_nfc_read_byte;
 	chip->legacy.write_buf	= hisi_nfc_write_buf;
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 449180de92e2..ae0c268a5cb6 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -426,7 +426,7 @@ static int jz_nand_probe(struct platform_device *pdev)
 	chip->ecc.options	= NAND_ECC_GENERIC_ERASED_CHECK;
 
 	chip->chip_delay = 50;
-	chip->cmd_ctrl = jz_nand_cmd_ctrl;
+	chip->legacy.cmd_ctrl = jz_nand_cmd_ctrl;
 	chip->select_chip = jz_nand_select_chip;
 	chip->dummy_controller.ops = &jz_nand_controller_ops;
 
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 89909a17242d..7faf5da6f5ea 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -280,7 +280,7 @@ static int jz4780_nand_init_chip(struct platform_device *pdev,
 	chip->chip_delay = RB_DELAY_US;
 	chip->options = NAND_NO_SUBPAGE_WRITE;
 	chip->select_chip = jz4780_nand_select_chip;
-	chip->cmd_ctrl = jz4780_nand_cmd_ctrl;
+	chip->legacy.cmd_ctrl = jz4780_nand_cmd_ctrl;
 	chip->ecc.mode = NAND_ECC_HW;
 	chip->controller = &nfc->controller;
 	nand_set_flash_node(chip, np);
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index cc1c6e6c59e1..bf3b7aa8a401 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -739,7 +739,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	if (res)
 		goto put_clk;
 
-	nand_chip->cmd_ctrl = lpc32xx_nand_cmd_ctrl;
+	nand_chip->legacy.cmd_ctrl = lpc32xx_nand_cmd_ctrl;
 	nand_chip->dev_ready = lpc32xx_nand_device_ready;
 	nand_chip->chip_delay = 25; /* us */
 	nand_chip->legacy.IO_ADDR_R = MLC_DATA(host->io_base);
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 6dd5348b8f03..1c3f437c42a1 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -880,7 +880,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	/* Set NAND IO addresses and command/ready functions */
 	chip->legacy.IO_ADDR_R = SLC_DATA(host->io_base);
 	chip->legacy.IO_ADDR_W = SLC_DATA(host->io_base);
-	chip->cmd_ctrl = lpc32xx_nand_cmd_ctrl;
+	chip->legacy.cmd_ctrl = lpc32xx_nand_cmd_ctrl;
 	chip->dev_ready = lpc32xx_nand_device_ready;
 	chip->chip_delay = 20; /* 20us command delay time */
 
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index 4c2925e819f1..1af4c777f887 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -693,7 +693,7 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 
 	mtd->name = "MPC5121 NAND";
 	chip->dev_ready = mpc5121_nfc_dev_ready;
-	chip->cmdfunc = mpc5121_nfc_command;
+	chip->legacy.cmdfunc = mpc5121_nfc_command;
 	chip->legacy.read_byte = mpc5121_nfc_read_byte;
 	chip->legacy.read_buf = mpc5121_nfc_read_buf;
 	chip->legacy.write_buf = mpc5121_nfc_write_buf;
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index fe150e993acf..0cfdca39a269 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -1338,7 +1338,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 	nand->legacy.write_buf = mtk_nfc_write_buf;
 	nand->legacy.read_byte = mtk_nfc_read_byte;
 	nand->legacy.read_buf = mtk_nfc_read_buf;
-	nand->cmd_ctrl = mtk_nfc_cmd_ctrl;
+	nand->legacy.cmd_ctrl = mtk_nfc_cmd_ctrl;
 	nand->setup_data_interface = mtk_nfc_setup_data_interface;
 
 	/* set default mode in case dt entry is missing */
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index f7e439f578da..146e95153289 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1774,7 +1774,7 @@ static int mxcnd_probe(struct platform_device *pdev)
 	nand_set_controller_data(this, host);
 	nand_set_flash_node(this, pdev->dev.of_node),
 	this->dev_ready = mxc_nand_dev_ready;
-	this->cmdfunc = mxc_nand_command;
+	this->legacy.cmdfunc = mxc_nand_command;
 	this->legacy.read_byte = mxc_nand_read_byte;
 	this->legacy.write_buf = mxc_nand_write_buf;
 	this->legacy.read_buf = mxc_nand_read_buf;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 016ef405474c..f3d6cd52f7eb 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -286,7 +286,8 @@ static void nand_select_chip(struct nand_chip *chip, int chipnr)
 {
 	switch (chipnr) {
 	case -1:
-		chip->cmd_ctrl(chip, NAND_CMD_NONE, 0 | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
+				      0 | NAND_CTRL_CHANGE);
 		break;
 	case 0:
 		break;
@@ -759,11 +760,11 @@ static void nand_command(struct nand_chip *chip, unsigned int command,
 			column -= 256;
 			readcmd = NAND_CMD_READ1;
 		}
-		chip->cmd_ctrl(chip, readcmd, ctrl);
+		chip->legacy.cmd_ctrl(chip, readcmd, ctrl);
 		ctrl &= ~NAND_CTRL_CHANGE;
 	}
 	if (command != NAND_CMD_NONE)
-		chip->cmd_ctrl(chip, command, ctrl);
+		chip->legacy.cmd_ctrl(chip, command, ctrl);
 
 	/* Address cycle, when necessary */
 	ctrl = NAND_CTRL_ALE | NAND_CTRL_CHANGE;
@@ -773,17 +774,18 @@ static void nand_command(struct nand_chip *chip, unsigned int command,
 		if (chip->options & NAND_BUSWIDTH_16 &&
 				!nand_opcode_8bits(command))
 			column >>= 1;
-		chip->cmd_ctrl(chip, column, ctrl);
+		chip->legacy.cmd_ctrl(chip, column, ctrl);
 		ctrl &= ~NAND_CTRL_CHANGE;
 	}
 	if (page_addr != -1) {
-		chip->cmd_ctrl(chip, page_addr, ctrl);
+		chip->legacy.cmd_ctrl(chip, page_addr, ctrl);
 		ctrl &= ~NAND_CTRL_CHANGE;
-		chip->cmd_ctrl(chip, page_addr >> 8, ctrl);
+		chip->legacy.cmd_ctrl(chip, page_addr >> 8, ctrl);
 		if (chip->options & NAND_ROW_ADDR_3)
-			chip->cmd_ctrl(chip, page_addr >> 16, ctrl);
+			chip->legacy.cmd_ctrl(chip, page_addr >> 16, ctrl);
 	}
-	chip->cmd_ctrl(chip, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
+	chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
+			      NAND_NCE | NAND_CTRL_CHANGE);
 
 	/*
 	 * Program and erase have their own busy handlers status and sequential
@@ -805,10 +807,10 @@ static void nand_command(struct nand_chip *chip, unsigned int command,
 		if (chip->dev_ready)
 			break;
 		udelay(chip->chip_delay);
-		chip->cmd_ctrl(chip, NAND_CMD_STATUS,
-			       NAND_CTRL_CLE | NAND_CTRL_CHANGE);
-		chip->cmd_ctrl(chip,
-			       NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_STATUS,
+				      NAND_CTRL_CLE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
+				      NAND_NCE | NAND_CTRL_CHANGE);
 		/* EZ-NAND can take upto 250ms as per ONFi v4.0 */
 		nand_wait_status_ready(mtd, 250);
 		return;
@@ -886,8 +888,8 @@ static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 
 	/* Command latch cycle */
 	if (command != NAND_CMD_NONE)
-		chip->cmd_ctrl(chip, command,
-			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, command,
+				      NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
 
 	if (column != -1 || page_addr != -1) {
 		int ctrl = NAND_CTRL_CHANGE | NAND_NCE | NAND_ALE;
@@ -898,23 +900,24 @@ static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 			if (chip->options & NAND_BUSWIDTH_16 &&
 					!nand_opcode_8bits(command))
 				column >>= 1;
-			chip->cmd_ctrl(chip, column, ctrl);
+			chip->legacy.cmd_ctrl(chip, column, ctrl);
 			ctrl &= ~NAND_CTRL_CHANGE;
 
 			/* Only output a single addr cycle for 8bits opcodes. */
 			if (!nand_opcode_8bits(command))
-				chip->cmd_ctrl(chip, column >> 8, ctrl);
+				chip->legacy.cmd_ctrl(chip, column >> 8, ctrl);
 		}
 		if (page_addr != -1) {
-			chip->cmd_ctrl(chip, page_addr, ctrl);
-			chip->cmd_ctrl(chip, page_addr >> 8,
-				       NAND_NCE | NAND_ALE);
+			chip->legacy.cmd_ctrl(chip, page_addr, ctrl);
+			chip->legacy.cmd_ctrl(chip, page_addr >> 8,
+					     NAND_NCE | NAND_ALE);
 			if (chip->options & NAND_ROW_ADDR_3)
-				chip->cmd_ctrl(chip, page_addr >> 16,
-					       NAND_NCE | NAND_ALE);
+				chip->legacy.cmd_ctrl(chip, page_addr >> 16,
+						      NAND_NCE | NAND_ALE);
 		}
 	}
-	chip->cmd_ctrl(chip, NAND_CMD_NONE, NAND_NCE | NAND_CTRL_CHANGE);
+	chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
+			      NAND_NCE | NAND_CTRL_CHANGE);
 
 	/*
 	 * Program and erase have their own busy handlers status, sequential
@@ -941,20 +944,20 @@ static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 		if (chip->dev_ready)
 			break;
 		udelay(chip->chip_delay);
-		chip->cmd_ctrl(chip, NAND_CMD_STATUS,
-			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
-		chip->cmd_ctrl(chip, NAND_CMD_NONE,
-			       NAND_NCE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_STATUS,
+				      NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
+				      NAND_NCE | NAND_CTRL_CHANGE);
 		/* EZ-NAND can take upto 250ms as per ONFi v4.0 */
 		nand_wait_status_ready(mtd, 250);
 		return;
 
 	case NAND_CMD_RNDOUT:
 		/* No ready / busy check necessary */
-		chip->cmd_ctrl(chip, NAND_CMD_RNDOUTSTART,
-			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
-		chip->cmd_ctrl(chip, NAND_CMD_NONE,
-			       NAND_NCE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_RNDOUTSTART,
+				      NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
+				      NAND_NCE | NAND_CTRL_CHANGE);
 
 		nand_ccs_delay(chip);
 		return;
@@ -969,10 +972,10 @@ static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 		if (column == -1 && page_addr == -1)
 			return;
 
-		chip->cmd_ctrl(chip, NAND_CMD_READSTART,
-			       NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
-		chip->cmd_ctrl(chip, NAND_CMD_NONE,
-			       NAND_NCE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_READSTART,
+				      NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
+		chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
+				      NAND_NCE | NAND_CTRL_CHANGE);
 
 		/* This applies to read commands */
 	default:
@@ -1522,7 +1525,7 @@ int nand_read_page_op(struct nand_chip *chip, unsigned int page,
 						 buf, len);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_READ0, offset_in_page, page);
+	chip->legacy.cmdfunc(chip, NAND_CMD_READ0, offset_in_page, page);
 	if (len)
 		chip->legacy.read_buf(chip, buf, len);
 
@@ -1570,7 +1573,7 @@ static int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_PARAM, page, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_PARAM, page, -1);
 	for (i = 0; i < len; i++)
 		p[i] = chip->legacy.read_byte(chip);
 
@@ -1633,7 +1636,7 @@ int nand_change_read_column_op(struct nand_chip *chip,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_RNDOUT, offset_in_page, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_RNDOUT, offset_in_page, -1);
 	if (len)
 		chip->legacy.read_buf(chip, buf, len);
 
@@ -1670,7 +1673,7 @@ int nand_read_oob_op(struct nand_chip *chip, unsigned int page,
 					 mtd->writesize + offset_in_oob,
 					 buf, len);
 
-	chip->cmdfunc(chip, NAND_CMD_READOOB, offset_in_oob, page);
+	chip->legacy.cmdfunc(chip, NAND_CMD_READOOB, offset_in_oob, page);
 	if (len)
 		chip->legacy.read_buf(chip, buf, len);
 
@@ -1782,7 +1785,7 @@ int nand_prog_page_begin_op(struct nand_chip *chip, unsigned int page,
 		return nand_exec_prog_page_op(chip, page, offset_in_page, buf,
 					      len, false);
 
-	chip->cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page, page);
+	chip->legacy.cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page, page);
 
 	if (buf)
 		chip->legacy.write_buf(chip, buf, len);
@@ -1823,7 +1826,7 @@ int nand_prog_page_end_op(struct nand_chip *chip)
 		if (ret)
 			return ret;
 	} else {
-		chip->cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
+		chip->legacy.cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
 		ret = chip->waitfunc(chip);
 		if (ret < 0)
 			return ret;
@@ -1868,9 +1871,10 @@ int nand_prog_page_op(struct nand_chip *chip, unsigned int page,
 		status = nand_exec_prog_page_op(chip, page, offset_in_page, buf,
 						len, true);
 	} else {
-		chip->cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page, page);
+		chip->legacy.cmdfunc(chip, NAND_CMD_SEQIN, offset_in_page,
+				     page);
 		chip->legacy.write_buf(chip, buf, len);
-		chip->cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
+		chip->legacy.cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
 		status = chip->waitfunc(chip);
 	}
 
@@ -1936,7 +1940,7 @@ int nand_change_write_column_op(struct nand_chip *chip,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_RNDIN, offset_in_page, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_RNDIN, offset_in_page, -1);
 	if (len)
 		chip->legacy.write_buf(chip, buf, len);
 
@@ -1983,7 +1987,7 @@ int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_READID, addr, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_READID, addr, -1);
 
 	for (i = 0; i < len; i++)
 		id[i] = chip->legacy.read_byte(chip);
@@ -2021,7 +2025,7 @@ int nand_status_op(struct nand_chip *chip, u8 *status)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_STATUS, -1, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_STATUS, -1, -1);
 	if (status)
 		*status = chip->legacy.read_byte(chip);
 
@@ -2051,7 +2055,7 @@ int nand_exit_status_op(struct nand_chip *chip)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_READ0, -1, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_READ0, -1, -1);
 
 	return 0;
 }
@@ -2099,8 +2103,8 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock)
 		if (ret)
 			return ret;
 	} else {
-		chip->cmdfunc(chip, NAND_CMD_ERASE1, -1, page);
-		chip->cmdfunc(chip, NAND_CMD_ERASE2, -1, -1);
+		chip->legacy.cmdfunc(chip, NAND_CMD_ERASE1, -1, page);
+		chip->legacy.cmdfunc(chip, NAND_CMD_ERASE2, -1, -1);
 
 		ret = chip->waitfunc(chip);
 		if (ret < 0)
@@ -2149,7 +2153,7 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_SET_FEATURES, feature, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_SET_FEATURES, feature, -1);
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
 		chip->legacy.write_byte(chip, params[i]);
 
@@ -2197,7 +2201,7 @@ static int nand_get_features_op(struct nand_chip *chip, u8 feature,
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_GET_FEATURES, feature, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_GET_FEATURES, feature, -1);
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
 		params[i] = chip->legacy.read_byte(chip);
 
@@ -2250,7 +2254,7 @@ int nand_reset_op(struct nand_chip *chip)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_RESET, -1, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_RESET, -1, -1);
 
 	return 0;
 }
@@ -4919,8 +4923,8 @@ static void nand_set_defaults(struct nand_chip *chip)
 		chip->chip_delay = 20;
 
 	/* check, if a user supplied command function given */
-	if (!chip->cmdfunc && !chip->exec_op)
-		chip->cmdfunc = nand_command;
+	if (!chip->legacy.cmdfunc && !chip->exec_op)
+		chip->legacy.cmdfunc = nand_command;
 
 	/* check, if a user supplied wait function given */
 	if (chip->waitfunc == NULL)
@@ -5213,11 +5217,13 @@ static int nand_flash_detect_onfi(struct nand_chip *chip)
 		/*
 		 * The nand_flash_detect_ext_param_page() uses the
 		 * Change Read Column command which maybe not supported
-		 * by the chip->cmdfunc. So try to update the chip->cmdfunc
-		 * now. We do not replace user supplied command function.
+		 * by the chip->legacy.cmdfunc. So try to update the
+		 * chip->legacy.cmdfunc now. We do not replace user supplied
+		 * command function.
 		 */
-		if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
-			chip->cmdfunc = nand_command_lp;
+		if (mtd->writesize > 512 &&
+		    chip->legacy.cmdfunc == nand_command)
+			chip->legacy.cmdfunc = nand_command_lp;
 
 		/* The Extended Parameter Page is supported since ONFI 2.1. */
 		if (nand_flash_detect_ext_param_page(chip, p))
@@ -5736,8 +5742,8 @@ ident_done:
 	chip->erase = single_erase;
 
 	/* Do not replace user supplied command function! */
-	if (mtd->writesize > 512 && chip->cmdfunc == nand_command)
-		chip->cmdfunc = nand_command_lp;
+	if (mtd->writesize > 512 && chip->legacy.cmdfunc == nand_command)
+		chip->legacy.cmdfunc = nand_command_lp;
 
 	pr_info("device found, Manufacturer ID: 0x%02x, Chip ID: 0x%02x\n",
 		maf_id, dev_id);
@@ -5934,16 +5940,18 @@ static int nand_scan_ident(struct nand_chip *chip, unsigned int maxchips,
 		mtd->name = dev_name(mtd->dev.parent);
 
 	/*
-	 * ->cmdfunc() is legacy and will only be used if ->exec_op() is not
-	 * populated.
+	 * ->legacy.cmdfunc() is legacy and will only be used if ->exec_op() is
+	 * not populated.
 	 */
 	if (!chip->exec_op) {
 		/*
-		 * Default functions assigned for ->cmdfunc() and
-		 * ->select_chip() both expect ->cmd_ctrl() to be populated.
+		 * Default functions assigned for ->legacy.cmdfunc() and
+		 * ->select_chip() both expect ->legacy.cmd_ctrl() to be
+		 *  populated.
 		 */
-		if ((!chip->cmdfunc || !chip->select_chip) && !chip->cmd_ctrl) {
-			pr_err("->cmd_ctrl() should be provided\n");
+		if ((!chip->legacy.cmdfunc || !chip->select_chip) &&
+		    !chip->legacy.cmd_ctrl) {
+			pr_err("->legacy.cmd_ctrl() should be provided\n");
 			return -EINVAL;
 		}
 	}
diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c
index 6a2f3efad153..7eec0d96909a 100644
--- a/drivers/mtd/nand/raw/nand_hynix.c
+++ b/drivers/mtd/nand/raw/nand_hynix.c
@@ -88,7 +88,7 @@ static int hynix_nand_cmd_op(struct nand_chip *chip, u8 cmd)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, cmd, -1, -1);
+	chip->legacy.cmdfunc(chip, cmd, -1, -1);
 
 	return 0;
 }
@@ -107,7 +107,7 @@ static int hynix_nand_reg_write_op(struct nand_chip *chip, u8 addr, u8 val)
 		return nand_exec_op(chip, &op);
 	}
 
-	chip->cmdfunc(chip, NAND_CMD_NONE, column, -1);
+	chip->legacy.cmdfunc(chip, NAND_CMD_NONE, column, -1);
 	chip->legacy.write_byte(chip, val);
 
 	return 0;
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index f3219122e311..360c3a7c69d7 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2249,7 +2249,7 @@ static int __init ns_init_module(void)
 	/*
 	 * Register simulator's callbacks.
 	 */
-	chip->cmd_ctrl	 = ns_hwcontrol;
+	chip->legacy.cmd_ctrl	 = ns_hwcontrol;
 	chip->legacy.read_byte  = ns_nand_read_byte;
 	chip->dev_ready  = ns_device_ready;
 	chip->legacy.write_buf  = ns_nand_write_buf;
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 7377dc752431..4e58b64ac690 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -144,7 +144,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
 
 	chip->legacy.IO_ADDR_R = ndfc->ndfcbase + NDFC_DATA;
 	chip->legacy.IO_ADDR_W = ndfc->ndfcbase + NDFC_DATA;
-	chip->cmd_ctrl = ndfc_hwcontrol;
+	chip->legacy.cmd_ctrl = ndfc_hwcontrol;
 	chip->dev_ready = ndfc_ready;
 	chip->select_chip = ndfc_select_chip;
 	chip->chip_delay = 50;
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 71b0a41bc497..32946dc5c4b8 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -254,7 +254,7 @@ static int nuc900_nand_probe(struct platform_device *pdev)
 		return -ENOENT;
 	clk_enable(nuc900_nand->clk);
 
-	chip->cmdfunc		= nuc900_nand_command_lp;
+	chip->legacy.cmdfunc	= nuc900_nand_command_lp;
 	chip->dev_ready		= nuc900_nand_devready;
 	chip->legacy.read_byte	= nuc900_nand_read_byte;
 	chip->legacy.write_buf	= nuc900_nand_write_buf;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 3ab4a2c5fc2d..af427a72c7ca 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -2230,7 +2230,7 @@ static int omap_nand_probe(struct platform_device *pdev)
 	nand_chip->controller = &omap_gpmc_controller;
 
 	nand_chip->legacy.IO_ADDR_W = nand_chip->legacy.IO_ADDR_R;
-	nand_chip->cmd_ctrl  = omap_hwcontrol;
+	nand_chip->legacy.cmd_ctrl  = omap_hwcontrol;
 
 	info->ready_gpiod = devm_gpiod_get_optional(&pdev->dev, "rb",
 						    GPIOD_IN);
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index f0d0054b4a7a..bf288c3c930b 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -137,7 +137,7 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	nand_set_controller_data(nc, board);
 	nand_set_flash_node(nc, pdev->dev.of_node);
 	nc->legacy.IO_ADDR_R = nc->legacy.IO_ADDR_W = io_base;
-	nc->cmd_ctrl = orion_nand_cmd_ctrl;
+	nc->legacy.cmd_ctrl = orion_nand_cmd_ctrl;
 	nc->legacy.read_buf = orion_nand_read_buf;
 	nc->ecc.mode = NAND_ECC_SOFT;
 	nc->ecc.algo = NAND_ECC_HAMMING;
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index 16df274f4cd6..fb0ebb296f6c 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -132,7 +132,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 		mtd->dev.parent = &pdev->dev;
 		mtd->priv = chip;
 
-		chip->cmd_ctrl = oxnas_nand_cmd_ctrl;
+		chip->legacy.cmd_ctrl = oxnas_nand_cmd_ctrl;
 		chip->legacy.read_buf = oxnas_nand_read_buf;
 		chip->legacy.read_byte = oxnas_nand_read_byte;
 		chip->legacy.write_buf = oxnas_nand_write_buf;
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index ca158dabe8b9..d99d7b63e545 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -139,7 +139,7 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
 		goto out_ior;
 	}
 
-	chip->cmd_ctrl = pasemi_hwcontrol;
+	chip->legacy.cmd_ctrl = pasemi_hwcontrol;
 	chip->dev_ready = pasemi_device_ready;
 	chip->legacy.read_buf = pasemi_read_buf;
 	chip->legacy.write_buf = pasemi_write_buf;
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 971e387a8a75..c66c7f942179 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -62,7 +62,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 
 	data->chip.legacy.IO_ADDR_R = data->io_base;
 	data->chip.legacy.IO_ADDR_W = data->io_base;
-	data->chip.cmd_ctrl = pdata->ctrl.cmd_ctrl;
+	data->chip.legacy.cmd_ctrl = pdata->ctrl.cmd_ctrl;
 	data->chip.dev_ready = pdata->ctrl.dev_ready;
 	data->chip.select_chip = pdata->ctrl.select_chip;
 	data->chip.legacy.write_buf = pdata->ctrl.write_buf;
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 7b487a2ffa8e..bd187139416f 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -1155,8 +1155,8 @@ static void config_nand_cw_write(struct qcom_nand_controller *nandc)
 }
 
 /*
- * the following functions are used within chip->cmdfunc() to perform different
- * NAND_CMD_* commands
+ * the following functions are used within chip->legacy.cmdfunc() to
+ * perform different NAND_CMD_* commands
  */
 
 /* sets up descriptors for NAND_CMD_PARAM */
@@ -1436,10 +1436,10 @@ static void post_command(struct qcom_nand_host *host, int command)
 }
 
 /*
- * Implements chip->cmdfunc. It's  only used for a limited set of commands.
- * The rest of the commands wouldn't be called by upper layers. For example,
- * NAND_CMD_READOOB would never be called because we have our own versions
- * of read_oob ops for nand_ecc_ctrl.
+ * Implements chip->legacy.cmdfunc. It's  only used for a limited set of
+ * commands. The rest of the commands wouldn't be called by upper layers.
+ * For example, NAND_CMD_READOOB would never be called because we have our own
+ * versions of read_oob ops for nand_ecc_ctrl.
  */
 static void qcom_nandc_command(struct nand_chip *chip, unsigned int command,
 			       int column, int page_addr)
@@ -2803,7 +2803,7 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc,
 	mtd->owner = THIS_MODULE;
 	mtd->dev.parent = dev;
 
-	chip->cmdfunc		= qcom_nandc_command;
+	chip->legacy.cmdfunc	= qcom_nandc_command;
 	chip->select_chip	= qcom_nandc_select_chip;
 	chip->legacy.read_byte	= qcom_nandc_read_byte;
 	chip->legacy.read_buf	= qcom_nandc_read_buf;
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 05b669d34cec..6e602586cb14 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -853,7 +853,7 @@ static int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 		goto error4;
 
 	/* commands */
-	chip->cmd_ctrl = r852_cmdctl;
+	chip->legacy.cmd_ctrl = r852_cmdctl;
 	chip->waitfunc = r852_wait;
 	chip->dev_ready = r852_ready;
 
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index d0670ebd5b68..f232d683f32d 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -884,7 +884,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		chip->legacy.IO_ADDR_W = regs + S3C2410_NFDATA;
 		info->sel_reg   = regs + S3C2410_NFCONF;
 		info->sel_bit	= S3C2410_NFCONF_nFCE;
-		chip->cmd_ctrl  = s3c2410_nand_hwcontrol;
+		chip->legacy.cmd_ctrl  = s3c2410_nand_hwcontrol;
 		chip->dev_ready = s3c2410_nand_devready;
 		break;
 
@@ -892,7 +892,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		chip->legacy.IO_ADDR_W = regs + S3C2440_NFDATA;
 		info->sel_reg   = regs + S3C2440_NFCONT;
 		info->sel_bit	= S3C2440_NFCONT_nFCE;
-		chip->cmd_ctrl  = s3c2440_nand_hwcontrol;
+		chip->legacy.cmd_ctrl  = s3c2440_nand_hwcontrol;
 		chip->dev_ready = s3c2440_nand_devready;
 		chip->legacy.read_buf  = s3c2440_nand_read_buf;
 		chip->legacy.write_buf	= s3c2440_nand_write_buf;
@@ -902,7 +902,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		chip->legacy.IO_ADDR_W = regs + S3C2440_NFDATA;
 		info->sel_reg   = regs + S3C2440_NFCONT;
 		info->sel_bit	= S3C2412_NFCONT_nFCE0;
-		chip->cmd_ctrl  = s3c2440_nand_hwcontrol;
+		chip->legacy.cmd_ctrl  = s3c2440_nand_hwcontrol;
 		chip->dev_ready = s3c2412_nand_devready;
 
 		if (readl(regs + S3C2410_NFCONF) & S3C2412_NFCONF_NANDBOOT)
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index d83d53810590..71658fbd99a3 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -1184,7 +1184,7 @@ static int flctl_probe(struct platform_device *pdev)
 	nand->legacy.write_buf = flctl_write_buf;
 	nand->legacy.read_buf = flctl_read_buf;
 	nand->select_chip = flctl_select_chip;
-	nand->cmdfunc = flctl_cmdfunc;
+	nand->legacy.cmdfunc = flctl_cmdfunc;
 	nand->set_features = nand_get_set_features_notsupp;
 	nand->get_features = nand_get_set_features_notsupp;
 
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index d9cdd11fbd3a..a626fb7af8d1 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -156,7 +156,7 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
 	this->legacy.IO_ADDR_R = sharpsl->io + FLASHIO;
 	this->legacy.IO_ADDR_W = sharpsl->io + FLASHIO;
 	/* Set address of hardware control function */
-	this->cmd_ctrl = sharpsl_nand_hwcontrol;
+	this->legacy.cmd_ctrl = sharpsl_nand_hwcontrol;
 	this->dev_ready = sharpsl_nand_dev_ready;
 	/* 15 us command delay time */
 	this->chip_delay = 15;
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 006224a40f3b..0ca81fa956b9 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -152,7 +152,7 @@ static int socrates_nand_probe(struct platform_device *ofdev)
 	mtd->name = "socrates_nand";
 	mtd->dev.parent = &ofdev->dev;
 
-	nand_chip->cmd_ctrl = socrates_nand_cmd_ctrl;
+	nand_chip->legacy.cmd_ctrl = socrates_nand_cmd_ctrl;
 	nand_chip->legacy.read_byte = socrates_nand_read_byte;
 	nand_chip->legacy.write_buf = socrates_nand_write_buf;
 	nand_chip->legacy.read_buf = socrates_nand_read_buf;
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 6e1317bde1b3..48bb28872298 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1923,7 +1923,7 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
 	nand->ecc.mode = NAND_ECC_HW;
 	nand_set_flash_node(nand, np);
 	nand->select_chip = sunxi_nfc_select_chip;
-	nand->cmd_ctrl = sunxi_nfc_cmd_ctrl;
+	nand->legacy.cmd_ctrl = sunxi_nfc_cmd_ctrl;
 	nand->legacy.read_buf = sunxi_nfc_read_buf;
 	nand->legacy.write_buf = sunxi_nfc_write_buf;
 	nand->legacy.read_byte = sunxi_nfc_read_byte;
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index 379f2ed284cb..f0285c0b3089 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -568,7 +568,7 @@ static int chip_init(struct device *dev, struct device_node *np)
 	chip->legacy.write_buf = tango_write_buf;
 	chip->legacy.read_buf = tango_read_buf;
 	chip->select_chip = tango_select_chip;
-	chip->cmd_ctrl = tango_cmd_ctrl;
+	chip->legacy.cmd_ctrl = tango_cmd_ctrl;
 	chip->dev_ready = tango_dev_ready;
 	chip->setup_data_interface = tango_set_timings;
 	chip->options = NAND_USE_BOUNCE_BUFFER |
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 94e659158f16..5037359754eb 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -403,7 +403,7 @@ static int tmio_probe(struct platform_device *dev)
 	nand_chip->legacy.IO_ADDR_W = tmio->fcr;
 
 	/* Set address of hardware control function */
-	nand_chip->cmd_ctrl = tmio_nand_hwcontrol;
+	nand_chip->legacy.cmd_ctrl = tmio_nand_hwcontrol;
 	nand_chip->dev_ready = tmio_nand_dev_ready;
 	nand_chip->legacy.read_byte = tmio_nand_read_byte;
 	nand_chip->legacy.write_buf = tmio_nand_write_buf;
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 7fb1575c6e2b..9eab56a45a5e 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -327,7 +327,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 		chip->legacy.read_byte = txx9ndfmc_read_byte;
 		chip->legacy.read_buf = txx9ndfmc_read_buf;
 		chip->legacy.write_buf = txx9ndfmc_write_buf;
-		chip->cmd_ctrl = txx9ndfmc_cmd_ctrl;
+		chip->legacy.cmd_ctrl = txx9ndfmc_cmd_ctrl;
 		chip->dev_ready = txx9ndfmc_dev_ready;
 		chip->ecc.calculate = txx9ndfmc_calculate_ecc;
 		chip->ecc.correct = txx9ndfmc_correct_data;
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index 87ec034bdd3e..ef351a4c507f 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -174,7 +174,7 @@ static int xway_nand_probe(struct platform_device *pdev)
 	mtd = nand_to_mtd(&data->chip);
 	mtd->dev.parent = &pdev->dev;
 
-	data->chip.cmd_ctrl = xway_cmd_ctrl;
+	data->chip.legacy.cmd_ctrl = xway_cmd_ctrl;
 	data->chip.dev_ready = xway_dev_ready;
 	data->chip.select_chip = xway_select_chip;
 	data->chip.legacy.write_buf = xway_write_buf;
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index 4856593b626e..fbfa024f683d 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -918,7 +918,7 @@ static int spinand_probe(struct spi_device *spi_nand)
 	chip->legacy.read_buf	= spinand_read_buf;
 	chip->legacy.write_buf	= spinand_write_buf;
 	chip->legacy.read_byte	= spinand_read_byte;
-	chip->cmdfunc	= spinand_cmdfunc;
+	chip->legacy.cmdfunc	= spinand_cmdfunc;
 	chip->waitfunc	= spinand_wait;
 	chip->options	|= NAND_CACHEPRG;
 	chip->select_chip = spinand_select_chip;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index f961efd2eacc..5ef2004a37a2 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -199,10 +199,10 @@ enum nand_ecc_algo {
 #define NAND_USE_BOUNCE_BUFFER	0x00100000
 
 /*
- * In case your controller is implementing ->cmd_ctrl() and is relying on the
- * default ->cmdfunc() implementation, you may want to let the core handle the
- * tCCS delay which is required when a column change (RNDIN or RNDOUT) is
- * requested.
+ * In case your controller is implementing ->legacy.cmd_ctrl() and is relying
+ * on the default ->cmdfunc() implementation, you may want to let the core
+ * handle the tCCS delay which is required when a column change (RNDIN or
+ * RNDOUT) is requested.
  * If your controller already takes care of this delay, you don't need to set
  * this flag.
  */
@@ -1180,6 +1180,9 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * @write_byte: write a single byte to the chip on the low 8 I/O lines
  * @write_buf: write data from the buffer to the chip
  * @read_buf: read data from the chip into the buffer
+ * @cmd_ctrl: hardware specific function for controlling ALE/CLE/nCE. Also used
+ *	      to write command and address
+ * @cmdfunc: hardware specific function for writing commands to the chip.
  *
  * If you look at this structure you're already wrong. These fields/hooks are
  * all deprecated.
@@ -1191,6 +1194,9 @@ struct nand_legacy {
 	void (*write_byte)(struct nand_chip *chip, u8 byte);
 	void (*write_buf)(struct nand_chip *chip, const u8 *buf, int len);
 	void (*read_buf)(struct nand_chip *chip, u8 *buf, int len);
+	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
+	void (*cmdfunc)(struct nand_chip *chip, unsigned command, int column,
+			int page_addr);
 };
 
 /**
@@ -1204,14 +1210,10 @@ struct nand_legacy {
  * @select_chip:	[REPLACEABLE] select chip nr
  * @block_bad:		[REPLACEABLE] check if a block is bad, using OOB markers
  * @block_markbad:	[REPLACEABLE] mark a block bad
- * @cmd_ctrl:		[BOARDSPECIFIC] hardwarespecific function for controlling
- *			ALE/CLE/nCE. Also used to write command and address
  * @dev_ready:		[BOARDSPECIFIC] hardwarespecific function for accessing
  *			device ready/busy line. If set to NULL no access to
  *			ready/busy is available and the ready/busy information
  *			is read from the chip status register.
- * @cmdfunc:		[REPLACEABLE] hardwarespecific function for writing
- *			commands to the chip.
  * @waitfunc:		[REPLACEABLE] hardwarespecific function for wait on
  *			ready.
  * @exec_op:		controller specific method to execute NAND operations.
@@ -1303,10 +1305,7 @@ struct nand_chip {
 	void (*select_chip)(struct nand_chip *chip, int cs);
 	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
-	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
 	int (*dev_ready)(struct nand_chip *chip);
-	void (*cmdfunc)(struct nand_chip *chip, unsigned command, int column,
-			int page_addr);
 	int (*waitfunc)(struct nand_chip *chip);
 	int (*exec_op)(struct nand_chip *chip,
 		       const struct nand_operation *op,
-- 
cgit v1.2.3


From 8395b753d7cad2beb03d374621cc8851f1cb4e01 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:37 +0200
Subject: mtd: rawnand: Deprecate ->dev_ready() and ->waitfunc()

Those hooks have been replaced by ->exec_op(). Move them to the
nand_legacy struct.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/driver-api/mtdnand.rst             |  4 +--
 drivers/mtd/nand/raw/ams-delta.c                 |  4 +--
 drivers/mtd/nand/raw/atmel/nand-controller.c     |  8 +++---
 drivers/mtd/nand/raw/au1550nd.c                  |  7 +++--
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  2 +-
 drivers/mtd/nand/raw/brcmnand/brcmnand.c         |  2 +-
 drivers/mtd/nand/raw/cafe_nand.c                 |  2 +-
 drivers/mtd/nand/raw/cmx270_nand.c               |  2 +-
 drivers/mtd/nand/raw/cs553x_nand.c               |  2 +-
 drivers/mtd/nand/raw/davinci_nand.c              |  2 +-
 drivers/mtd/nand/raw/denali.c                    |  7 +++--
 drivers/mtd/nand/raw/diskonchip.c                | 10 +++----
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |  2 +-
 drivers/mtd/nand/raw/fsl_ifc_nand.c              |  2 +-
 drivers/mtd/nand/raw/fsl_upm.c                   |  2 +-
 drivers/mtd/nand/raw/gpio.c                      |  2 +-
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c       |  2 +-
 drivers/mtd/nand/raw/jz4740_nand.c               |  2 +-
 drivers/mtd/nand/raw/jz4780_nand.c               |  2 +-
 drivers/mtd/nand/raw/lpc32xx_mlc.c               |  4 +--
 drivers/mtd/nand/raw/lpc32xx_slc.c               |  2 +-
 drivers/mtd/nand/raw/mpc5121_nfc.c               |  2 +-
 drivers/mtd/nand/raw/mtk_nand.c                  |  2 +-
 drivers/mtd/nand/raw/mxc_nand.c                  |  2 +-
 drivers/mtd/nand/raw/nand_base.c                 | 36 ++++++++++++------------
 drivers/mtd/nand/raw/nandsim.c                   |  2 +-
 drivers/mtd/nand/raw/ndfc.c                      |  2 +-
 drivers/mtd/nand/raw/nuc900_nand.c               |  8 +++---
 drivers/mtd/nand/raw/omap2.c                     |  4 +--
 drivers/mtd/nand/raw/pasemi_nand.c               |  2 +-
 drivers/mtd/nand/raw/plat_nand.c                 |  2 +-
 drivers/mtd/nand/raw/r852.c                      |  6 ++--
 drivers/mtd/nand/raw/s3c2410.c                   |  6 ++--
 drivers/mtd/nand/raw/sharpsl.c                   |  2 +-
 drivers/mtd/nand/raw/socrates_nand.c             |  2 +-
 drivers/mtd/nand/raw/sunxi_nand.c                |  4 +--
 drivers/mtd/nand/raw/tango_nand.c                |  4 +--
 drivers/mtd/nand/raw/tmio_nand.c                 |  4 +--
 drivers/mtd/nand/raw/txx9ndfmc.c                 |  2 +-
 drivers/mtd/nand/raw/xway_nand.c                 |  2 +-
 drivers/staging/mt29f_spinand/mt29f_spinand.c    |  2 +-
 include/linux/mtd/rawnand.h                      | 18 ++++++------
 42 files changed, 93 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst
index 1d2403f1d8c5..0d3fa4d6576d 100644
--- a/Documentation/driver-api/mtdnand.rst
+++ b/Documentation/driver-api/mtdnand.rst
@@ -197,7 +197,7 @@ to read back the state of the pin. The function has no arguments and
 should return 0, if the device is busy (R/B pin is low) and 1, if the
 device is ready (R/B pin is high). If the hardware interface does not
 give access to the ready busy pin, then the function must not be defined
-and the function pointer this->dev_ready is set to NULL.
+and the function pointer this->legacy.dev_ready is set to NULL.
 
 Init function
 -------------
@@ -242,7 +242,7 @@ necessary information about the device.
         /* Set command delay time, see datasheet for correct value */
         this->chip_delay = CHIP_DEPENDEND_COMMAND_DELAY;
         /* Assign the device ready function, if available */
-        this->dev_ready = board_dev_ready;
+        this->legacy.dev_ready = board_dev_ready;
         this->eccmode = NAND_ECC_SOFT;
 
         /* Scan to find existence of the device */
diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index 756f6339d457..2fa6fa3c7464 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -215,9 +215,9 @@ static int ams_delta_init(struct platform_device *pdev)
 	this->legacy.read_buf = ams_delta_read_buf;
 	this->legacy.cmd_ctrl = ams_delta_hwcontrol;
 	if (gpio_request(AMS_DELTA_GPIO_PIN_NAND_RB, "nand_rdy") == 0) {
-		this->dev_ready = ams_delta_nand_ready;
+		this->legacy.dev_ready = ams_delta_nand_ready;
 	} else {
-		this->dev_ready = NULL;
+		this->legacy.dev_ready = NULL;
 		pr_notice("Couldn't request gpio for Delta NAND ready.\n");
 	}
 	/* 25 us command delay time */
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index 37f617ec178e..dd022080442d 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -488,14 +488,14 @@ static void atmel_nand_select_chip(struct nand_chip *chip, int cs)
 
 	if (cs < 0 || cs >= nand->numcs) {
 		nand->activecs = NULL;
-		chip->dev_ready = NULL;
+		chip->legacy.dev_ready = NULL;
 		return;
 	}
 
 	nand->activecs = &nand->cs[cs];
 
 	if (nand->activecs->rb.type == ATMEL_NAND_GPIO_RB)
-		chip->dev_ready = atmel_nand_dev_ready;
+		chip->legacy.dev_ready = atmel_nand_dev_ready;
 }
 
 static int atmel_hsmc_nand_dev_ready(struct nand_chip *chip)
@@ -528,7 +528,7 @@ static void atmel_hsmc_nand_select_chip(struct nand_chip *chip, int cs)
 	}
 
 	if (nand->activecs->rb.type == ATMEL_NAND_NATIVE_RB)
-		chip->dev_ready = atmel_hsmc_nand_dev_ready;
+		chip->legacy.dev_ready = atmel_hsmc_nand_dev_ready;
 
 	regmap_update_bits(nc->base.smc, ATMEL_HSMC_NFC_CFG,
 			   ATMEL_HSMC_NFC_CFG_PAGESIZE_MASK |
@@ -945,7 +945,7 @@ static int atmel_hsmc_nand_pmecc_write_pg(struct nand_chip *chip,
 		dev_err(nc->base.dev, "Failed to program NAND page (err = %d)\n",
 			ret);
 
-	status = chip->waitfunc(chip);
+	status = chip->legacy.waitfunc(chip);
 	if (status & NAND_STATUS_FAIL)
 		return -EIO;
 
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 5d45f13288fc..81bba469c0e4 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -342,7 +342,8 @@ static void au1550_command(struct nand_chip *this, unsigned command,
 		/* Apply a short delay always to ensure that we do wait tWB. */
 		ndelay(100);
 		/* Wait for a chip to become ready... */
-		for (i = this->chip_delay; !this->dev_ready(this) && i > 0; --i)
+		for (i = this->chip_delay;
+		     !this->legacy.dev_ready(this) && i > 0; --i)
 			udelay(1);
 
 		/* Release -CE and re-enable interrupts. */
@@ -353,7 +354,7 @@ static void au1550_command(struct nand_chip *this, unsigned command,
 	/* Apply this short delay always to ensure that we do wait tWB. */
 	ndelay(100);
 
-	while(!this->dev_ready(this));
+	while(!this->legacy.dev_ready(this));
 }
 
 static int find_nand_cs(unsigned long nand_base)
@@ -428,7 +429,7 @@ static int au1550nd_probe(struct platform_device *pdev)
 	}
 	ctx->cs = cs;
 
-	this->dev_ready = au1550_device_ready;
+	this->legacy.dev_ready = au1550_device_ready;
 	this->select_chip = au1550_select_chip;
 	this->legacy.cmdfunc = au1550_command;
 
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 2bd389b49b4a..925d4cd4401e 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -385,7 +385,7 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 
 	b47n->nand_chip.select_chip = bcm47xxnflash_ops_bcm4706_select_chip;
 	nand_chip->legacy.cmd_ctrl = bcm47xxnflash_ops_bcm4706_cmd_ctrl;
-	nand_chip->dev_ready = bcm47xxnflash_ops_bcm4706_dev_ready;
+	nand_chip->legacy.dev_ready = bcm47xxnflash_ops_bcm4706_dev_ready;
 	b47n->nand_chip.legacy.cmdfunc = bcm47xxnflash_ops_bcm4706_cmdfunc;
 	b47n->nand_chip.legacy.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
 	b47n->nand_chip.legacy.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 162ec11ab251..482c6f093f99 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -2272,7 +2272,7 @@ static int brcmnand_init_cs(struct brcmnand_host *host, struct device_node *dn)
 
 	chip->legacy.cmd_ctrl = brcmnand_cmd_ctrl;
 	chip->legacy.cmdfunc = brcmnand_cmdfunc;
-	chip->waitfunc = brcmnand_waitfunc;
+	chip->legacy.waitfunc = brcmnand_waitfunc;
 	chip->legacy.read_byte = brcmnand_read_byte;
 	chip->legacy.read_buf = brcmnand_read_buf;
 	chip->legacy.write_buf = brcmnand_write_buf;
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 0f734442fd3d..738af0f0a48d 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -704,7 +704,7 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	}
 
 	cafe->nand.legacy.cmdfunc = cafe_nand_cmdfunc;
-	cafe->nand.dev_ready = cafe_device_ready;
+	cafe->nand.legacy.dev_ready = cafe_device_ready;
 	cafe->nand.legacy.read_byte = cafe_read_byte;
 	cafe->nand.legacy.read_buf = cafe_read_buf;
 	cafe->nand.legacy.write_buf = cafe_write_buf;
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index c543f073d971..585dbd51d8b1 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -176,7 +176,7 @@ static int __init cmx270_init(void)
 	this->legacy.IO_ADDR_R = cmx270_nand_io;
 	this->legacy.IO_ADDR_W = cmx270_nand_io;
 	this->legacy.cmd_ctrl = cmx270_hwcontrol;
-	this->dev_ready = cmx270_device_ready;
+	this->legacy.dev_ready = cmx270_device_ready;
 
 	/* 15 us command delay time */
 	this->chip_delay = 20;
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index bd75ec4e5508..61ae9e60bf0d 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -207,7 +207,7 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
 	}
 
 	this->legacy.cmd_ctrl = cs553x_hwcontrol;
-	this->dev_ready = cs553x_device_ready;
+	this->legacy.dev_ready = cs553x_device_ready;
 	this->legacy.read_byte = cs553x_read_byte;
 	this->legacy.read_buf = cs553x_read_buf;
 	this->legacy.write_buf = cs553x_write_buf;
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index 5fcd8c30293a..bae568d68432 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -782,7 +782,7 @@ static int nand_davinci_probe(struct platform_device *pdev)
 
 	/* Set address of hardware control function */
 	info->chip.legacy.cmd_ctrl	= nand_davinci_hwcontrol;
-	info->chip.dev_ready	= nand_davinci_dev_ready;
+	info->chip.legacy.dev_ready	= nand_davinci_dev_ready;
 
 	/* Speed up buffer I/O */
 	info->chip.legacy.read_buf     = nand_davinci_read_buf;
diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index c11547bfb2e7..c14493ef6126 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -287,7 +287,8 @@ static void denali_cmd_ctrl(struct nand_chip *chip, int dat, unsigned int ctrl)
 		return;
 
 	/*
-	 * Some commands are followed by chip->dev_ready or chip->waitfunc.
+	 * Some commands are followed by chip->legacy.dev_ready or
+	 * chip->legacy.waitfunc.
 	 * irq_status must be cleared here to catch the R/B# interrupt later.
 	 */
 	if (ctrl & NAND_CTRL_CHANGE)
@@ -1346,8 +1347,8 @@ int denali_init(struct denali_nand_info *denali)
 	chip->legacy.read_byte = denali_read_byte;
 	chip->legacy.write_byte = denali_write_byte;
 	chip->legacy.cmd_ctrl = denali_cmd_ctrl;
-	chip->dev_ready = denali_dev_ready;
-	chip->waitfunc = denali_waitfunc;
+	chip->legacy.dev_ready = denali_dev_ready;
+	chip->legacy.waitfunc = denali_waitfunc;
 
 	if (features & FEATURES__INDEX_ADDR) {
 		denali->host_read = denali_indexed_read;
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 2604e80b5475..c3a79369fbed 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -715,7 +715,7 @@ static void doc2001plus_command(struct nand_chip *this, unsigned command,
 		return;
 
 	case NAND_CMD_RESET:
-		if (this->dev_ready)
+		if (this->legacy.dev_ready)
 			break;
 		udelay(this->chip_delay);
 		WriteDOC(NAND_CMD_STATUS, docptr, Mplus_FlashCmd);
@@ -730,7 +730,7 @@ static void doc2001plus_command(struct nand_chip *this, unsigned command,
 		 * If we don't have access to the busy pin, we apply the given
 		 * command delay
 		 */
-		if (!this->dev_ready) {
+		if (!this->legacy.dev_ready) {
 			udelay(this->chip_delay);
 			return;
 		}
@@ -740,7 +740,7 @@ static void doc2001plus_command(struct nand_chip *this, unsigned command,
 	 * any case on any machine. */
 	ndelay(100);
 	/* wait until command is processed */
-	while (!this->dev_ready(this)) ;
+	while (!this->legacy.dev_ready(this)) ;
 }
 
 static int doc200x_dev_ready(struct nand_chip *this)
@@ -1570,8 +1570,8 @@ static int __init doc_probe(unsigned long physadr)
 	nand_set_controller_data(nand, doc);
 	nand->select_chip	= doc200x_select_chip;
 	nand->legacy.cmd_ctrl		= doc200x_hwcontrol;
-	nand->dev_ready		= doc200x_dev_ready;
-	nand->waitfunc		= doc200x_wait;
+	nand->legacy.dev_ready	= doc200x_dev_ready;
+	nand->legacy.waitfunc	= doc200x_wait;
 	nand->block_bad		= doc200x_block_bad;
 	nand->ecc.hwctl		= doc200x_enable_hwecc;
 	nand->ecc.calculate	= doc200x_calculate_ecc;
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index fa21d00c3443..29f0832de39b 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -781,7 +781,7 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 	chip->legacy.read_buf = fsl_elbc_read_buf;
 	chip->select_chip = fsl_elbc_select_chip;
 	chip->legacy.cmdfunc = fsl_elbc_cmdfunc;
-	chip->waitfunc = fsl_elbc_wait;
+	chip->legacy.waitfunc = fsl_elbc_wait;
 	chip->set_features = nand_get_set_features_notsupp;
 	chip->get_features = nand_get_set_features_notsupp;
 
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 20c86b6503a8..682ae383c3e9 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -866,7 +866,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	chip->legacy.read_buf = fsl_ifc_read_buf;
 	chip->select_chip = fsl_ifc_select_chip;
 	chip->legacy.cmdfunc = fsl_ifc_cmdfunc;
-	chip->waitfunc = fsl_ifc_wait;
+	chip->legacy.waitfunc = fsl_ifc_wait;
 	chip->set_features = nand_get_set_features_notsupp;
 	chip->get_features = nand_get_set_features_notsupp;
 
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index 23baef375bfb..fcb79718b6c3 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -173,7 +173,7 @@ static int fun_chip_init(struct fsl_upm_nand *fun,
 		fun->chip.select_chip = fun_select_chip;
 
 	if (fun->rnb_gpio[0] >= 0)
-		fun->chip.dev_ready = fun_chip_ready;
+		fun->chip.legacy.dev_ready = fun_chip_ready;
 
 	mtd->dev.parent = fun->dev;
 
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index 1e6e81047978..c4f19067702e 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -271,7 +271,7 @@ static int gpio_nand_probe(struct platform_device *pdev)
 	}
 	/* Using RDY pin */
 	if (gpiomtd->rdy)
-		chip->dev_ready = gpio_nand_devready;
+		chip->legacy.dev_ready = gpio_nand_devready;
 
 	nand_set_flash_node(chip, pdev->dev.of_node);
 	chip->legacy.IO_ADDR_W	= chip->legacy.IO_ADDR_R;
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index fa37d21e5f16..dc6291902acf 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1903,7 +1903,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 	chip->select_chip	= gpmi_select_chip;
 	chip->setup_data_interface = gpmi_setup_data_interface;
 	chip->legacy.cmd_ctrl	= gpmi_cmd_ctrl;
-	chip->dev_ready		= gpmi_dev_ready;
+	chip->legacy.dev_ready	= gpmi_dev_ready;
 	chip->legacy.read_byte	= gpmi_read_byte;
 	chip->legacy.read_buf	= gpmi_read_buf;
 	chip->legacy.write_buf	= gpmi_write_buf;
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index ae0c268a5cb6..7a1b4c3ff6fd 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -431,7 +431,7 @@ static int jz_nand_probe(struct platform_device *pdev)
 	chip->dummy_controller.ops = &jz_nand_controller_ops;
 
 	if (nand->busy_gpio)
-		chip->dev_ready = jz_nand_dev_ready;
+		chip->legacy.dev_ready = jz_nand_dev_ready;
 
 	platform_set_drvdata(pdev, nand);
 
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 7faf5da6f5ea..2a960211b97d 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -256,7 +256,7 @@ static int jz4780_nand_init_chip(struct platform_device *pdev,
 		dev_err(dev, "failed to request busy GPIO: %d\n", ret);
 		return ret;
 	} else if (nand->busy_gpio) {
-		nand->chip.dev_ready = jz4780_nand_dev_ready;
+		nand->chip.legacy.dev_ready = jz4780_nand_dev_ready;
 	}
 
 	nand->wp_gpio = devm_gpiod_get_optional(dev, "wp", GPIOD_OUT_LOW);
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index bf3b7aa8a401..8a1dfb7ee885 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -740,7 +740,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 		goto put_clk;
 
 	nand_chip->legacy.cmd_ctrl = lpc32xx_nand_cmd_ctrl;
-	nand_chip->dev_ready = lpc32xx_nand_device_ready;
+	nand_chip->legacy.dev_ready = lpc32xx_nand_device_ready;
 	nand_chip->chip_delay = 25; /* us */
 	nand_chip->legacy.IO_ADDR_R = MLC_DATA(host->io_base);
 	nand_chip->legacy.IO_ADDR_W = MLC_DATA(host->io_base);
@@ -760,7 +760,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	nand_chip->ecc.read_oob = lpc32xx_read_oob;
 	nand_chip->ecc.strength = 4;
 	nand_chip->ecc.bytes = 10;
-	nand_chip->waitfunc = lpc32xx_waitfunc;
+	nand_chip->legacy.waitfunc = lpc32xx_waitfunc;
 
 	nand_chip->options = NAND_NO_SUBPAGE_WRITE;
 	nand_chip->bbt_options = NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB;
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 1c3f437c42a1..75d62d6bed7b 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -881,7 +881,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	chip->legacy.IO_ADDR_R = SLC_DATA(host->io_base);
 	chip->legacy.IO_ADDR_W = SLC_DATA(host->io_base);
 	chip->legacy.cmd_ctrl = lpc32xx_nand_cmd_ctrl;
-	chip->dev_ready = lpc32xx_nand_device_ready;
+	chip->legacy.dev_ready = lpc32xx_nand_device_ready;
 	chip->chip_delay = 20; /* 20us command delay time */
 
 	/* Init NAND controller */
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index 1af4c777f887..9a6dc783689e 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -692,7 +692,7 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	}
 
 	mtd->name = "MPC5121 NAND";
-	chip->dev_ready = mpc5121_nfc_dev_ready;
+	chip->legacy.dev_ready = mpc5121_nfc_dev_ready;
 	chip->legacy.cmdfunc = mpc5121_nfc_command;
 	chip->legacy.read_byte = mpc5121_nfc_read_byte;
 	chip->legacy.read_buf = mpc5121_nfc_read_buf;
diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c
index 0cfdca39a269..2bb0df1b7244 100644
--- a/drivers/mtd/nand/raw/mtk_nand.c
+++ b/drivers/mtd/nand/raw/mtk_nand.c
@@ -1332,7 +1332,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc,
 	nand_set_controller_data(nand, nfc);
 
 	nand->options |= NAND_USE_BOUNCE_BUFFER | NAND_SUBPAGE_READ;
-	nand->dev_ready = mtk_nfc_dev_ready;
+	nand->legacy.dev_ready = mtk_nfc_dev_ready;
 	nand->select_chip = mtk_nfc_select_chip;
 	nand->legacy.write_byte = mtk_nfc_write_byte;
 	nand->legacy.write_buf = mtk_nfc_write_buf;
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index 146e95153289..ca074c955147 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1773,7 +1773,7 @@ static int mxcnd_probe(struct platform_device *pdev)
 
 	nand_set_controller_data(this, host);
 	nand_set_flash_node(this, pdev->dev.of_node),
-	this->dev_ready = mxc_nand_dev_ready;
+	this->legacy.dev_ready = mxc_nand_dev_ready;
 	this->legacy.cmdfunc = mxc_nand_command;
 	this->legacy.read_byte = mxc_nand_read_byte;
 	this->legacy.write_buf = mxc_nand_write_buf;
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index f3d6cd52f7eb..30b55a4677f9 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -604,7 +604,7 @@ static void panic_nand_wait_ready(struct mtd_info *mtd, unsigned long timeo)
 
 	/* Wait for the device to get ready */
 	for (i = 0; i < timeo; i++) {
-		if (chip->dev_ready(chip))
+		if (chip->legacy.dev_ready(chip))
 			break;
 		touch_softlockup_watchdog();
 		mdelay(1);
@@ -628,12 +628,12 @@ void nand_wait_ready(struct nand_chip *chip)
 	/* Wait until command is processed or timeout occurs */
 	timeo = jiffies + msecs_to_jiffies(timeo);
 	do {
-		if (chip->dev_ready(chip))
+		if (chip->legacy.dev_ready(chip))
 			return;
 		cond_resched();
 	} while (time_before(jiffies, timeo));
 
-	if (!chip->dev_ready(chip))
+	if (!chip->legacy.dev_ready(chip))
 		pr_warn_ratelimited("timeout while waiting for chip to become ready\n");
 }
 EXPORT_SYMBOL_GPL(nand_wait_ready);
@@ -804,7 +804,7 @@ static void nand_command(struct nand_chip *chip, unsigned int command,
 		return;
 
 	case NAND_CMD_RESET:
-		if (chip->dev_ready)
+		if (chip->legacy.dev_ready)
 			break;
 		udelay(chip->chip_delay);
 		chip->legacy.cmd_ctrl(chip, NAND_CMD_STATUS,
@@ -831,7 +831,7 @@ static void nand_command(struct nand_chip *chip, unsigned int command,
 		 * If we don't have access to the busy pin, we apply the given
 		 * command delay
 		 */
-		if (!chip->dev_ready) {
+		if (!chip->legacy.dev_ready) {
 			udelay(chip->chip_delay);
 			return;
 		}
@@ -941,7 +941,7 @@ static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 		return;
 
 	case NAND_CMD_RESET:
-		if (chip->dev_ready)
+		if (chip->legacy.dev_ready)
 			break;
 		udelay(chip->chip_delay);
 		chip->legacy.cmd_ctrl(chip, NAND_CMD_STATUS,
@@ -983,7 +983,7 @@ static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 		 * If we don't have access to the busy pin, we apply the given
 		 * command delay.
 		 */
-		if (!chip->dev_ready) {
+		if (!chip->legacy.dev_ready) {
 			udelay(chip->chip_delay);
 			return;
 		}
@@ -1069,8 +1069,8 @@ static void panic_nand_wait(struct nand_chip *chip, unsigned long timeo)
 {
 	int i;
 	for (i = 0; i < timeo; i++) {
-		if (chip->dev_ready) {
-			if (chip->dev_ready(chip))
+		if (chip->legacy.dev_ready) {
+			if (chip->legacy.dev_ready(chip))
 				break;
 		} else {
 			int ret;
@@ -1117,8 +1117,8 @@ static int nand_wait(struct nand_chip *chip)
 	else {
 		timeo = jiffies + msecs_to_jiffies(timeo);
 		do {
-			if (chip->dev_ready) {
-				if (chip->dev_ready(chip))
+			if (chip->legacy.dev_ready) {
+				if (chip->legacy.dev_ready(chip))
 					break;
 			} else {
 				ret = nand_read_data_op(chip, &status,
@@ -1827,7 +1827,7 @@ int nand_prog_page_end_op(struct nand_chip *chip)
 			return ret;
 	} else {
 		chip->legacy.cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
-		ret = chip->waitfunc(chip);
+		ret = chip->legacy.waitfunc(chip);
 		if (ret < 0)
 			return ret;
 
@@ -1875,7 +1875,7 @@ int nand_prog_page_op(struct nand_chip *chip, unsigned int page,
 				     page);
 		chip->legacy.write_buf(chip, buf, len);
 		chip->legacy.cmdfunc(chip, NAND_CMD_PAGEPROG, -1, -1);
-		status = chip->waitfunc(chip);
+		status = chip->legacy.waitfunc(chip);
 	}
 
 	if (status & NAND_STATUS_FAIL)
@@ -2106,7 +2106,7 @@ int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock)
 		chip->legacy.cmdfunc(chip, NAND_CMD_ERASE1, -1, page);
 		chip->legacy.cmdfunc(chip, NAND_CMD_ERASE2, -1, -1);
 
-		ret = chip->waitfunc(chip);
+		ret = chip->legacy.waitfunc(chip);
 		if (ret < 0)
 			return ret;
 
@@ -2157,7 +2157,7 @@ static int nand_set_features_op(struct nand_chip *chip, u8 feature,
 	for (i = 0; i < ONFI_SUBFEATURE_PARAM_LEN; ++i)
 		chip->legacy.write_byte(chip, params[i]);
 
-	ret = chip->waitfunc(chip);
+	ret = chip->legacy.waitfunc(chip);
 	if (ret < 0)
 		return ret;
 
@@ -2222,7 +2222,7 @@ static int nand_wait_rdy_op(struct nand_chip *chip, unsigned int timeout_ms,
 	}
 
 	/* Apply delay or wait for ready/busy pin */
-	if (!chip->dev_ready)
+	if (!chip->legacy.dev_ready)
 		udelay(chip->chip_delay);
 	else
 		nand_wait_ready(chip);
@@ -4927,8 +4927,8 @@ static void nand_set_defaults(struct nand_chip *chip)
 		chip->legacy.cmdfunc = nand_command;
 
 	/* check, if a user supplied wait function given */
-	if (chip->waitfunc == NULL)
-		chip->waitfunc = nand_wait;
+	if (chip->legacy.waitfunc == NULL)
+		chip->legacy.waitfunc = nand_wait;
 
 	if (!chip->select_chip)
 		chip->select_chip = nand_select_chip;
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index 360c3a7c69d7..ff0c372ee288 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -2251,7 +2251,7 @@ static int __init ns_init_module(void)
 	 */
 	chip->legacy.cmd_ctrl	 = ns_hwcontrol;
 	chip->legacy.read_byte  = ns_nand_read_byte;
-	chip->dev_ready  = ns_device_ready;
+	chip->legacy.dev_ready  = ns_device_ready;
 	chip->legacy.write_buf  = ns_nand_write_buf;
 	chip->legacy.read_buf   = ns_nand_read_buf;
 	chip->ecc.mode   = NAND_ECC_SOFT;
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 4e58b64ac690..19b4cb2d1c6e 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -145,7 +145,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
 	chip->legacy.IO_ADDR_R = ndfc->ndfcbase + NDFC_DATA;
 	chip->legacy.IO_ADDR_W = ndfc->ndfcbase + NDFC_DATA;
 	chip->legacy.cmd_ctrl = ndfc_hwcontrol;
-	chip->dev_ready = ndfc_ready;
+	chip->legacy.dev_ready = ndfc_ready;
 	chip->select_chip = ndfc_select_chip;
 	chip->chip_delay = 50;
 	chip->controller = &ndfc->ndfc_control;
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 32946dc5c4b8..208b6de67510 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -175,7 +175,7 @@ static void nuc900_nand_command_lp(struct nand_chip *chip,
 		return;
 
 	case NAND_CMD_RESET:
-		if (chip->dev_ready)
+		if (chip->legacy.dev_ready)
 			break;
 		udelay(chip->chip_delay);
 
@@ -196,7 +196,7 @@ static void nuc900_nand_command_lp(struct nand_chip *chip,
 		write_cmd_reg(nand, NAND_CMD_READSTART);
 	default:
 
-		if (!chip->dev_ready) {
+		if (!chip->legacy.dev_ready) {
 			udelay(chip->chip_delay);
 			return;
 		}
@@ -206,7 +206,7 @@ static void nuc900_nand_command_lp(struct nand_chip *chip,
 	 * any case on any machine. */
 	ndelay(100);
 
-	while (!chip->dev_ready(chip))
+	while (!chip->legacy.dev_ready(chip))
 		;
 }
 
@@ -255,7 +255,7 @@ static int nuc900_nand_probe(struct platform_device *pdev)
 	clk_enable(nuc900_nand->clk);
 
 	chip->legacy.cmdfunc	= nuc900_nand_command_lp;
-	chip->dev_ready		= nuc900_nand_devready;
+	chip->legacy.dev_ready	= nuc900_nand_devready;
 	chip->legacy.read_byte	= nuc900_nand_read_byte;
 	chip->legacy.write_buf	= nuc900_nand_write_buf;
 	chip->legacy.read_buf	= nuc900_nand_read_buf;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index af427a72c7ca..5e1e7ced0d9e 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -2247,10 +2247,10 @@ static int omap_nand_probe(struct platform_device *pdev)
 	 * device and read status register until you get a failure or success
 	 */
 	if (info->ready_gpiod) {
-		nand_chip->dev_ready = omap_dev_ready;
+		nand_chip->legacy.dev_ready = omap_dev_ready;
 		nand_chip->chip_delay = 0;
 	} else {
-		nand_chip->waitfunc = omap_wait;
+		nand_chip->legacy.waitfunc = omap_wait;
 		nand_chip->chip_delay = 50;
 	}
 
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index d99d7b63e545..e9a4e82bfb34 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -140,7 +140,7 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
 	}
 
 	chip->legacy.cmd_ctrl = pasemi_hwcontrol;
-	chip->dev_ready = pasemi_device_ready;
+	chip->legacy.dev_ready = pasemi_device_ready;
 	chip->legacy.read_buf = pasemi_read_buf;
 	chip->legacy.write_buf = pasemi_write_buf;
 	chip->chip_delay = 0;
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index c66c7f942179..6cce01d09364 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -63,7 +63,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 	data->chip.legacy.IO_ADDR_R = data->io_base;
 	data->chip.legacy.IO_ADDR_W = data->io_base;
 	data->chip.legacy.cmd_ctrl = pdata->ctrl.cmd_ctrl;
-	data->chip.dev_ready = pdata->ctrl.dev_ready;
+	data->chip.legacy.dev_ready = pdata->ctrl.dev_ready;
 	data->chip.select_chip = pdata->ctrl.select_chip;
 	data->chip.legacy.write_buf = pdata->ctrl.write_buf;
 	data->chip.legacy.read_buf = pdata->ctrl.read_buf;
diff --git a/drivers/mtd/nand/raw/r852.c b/drivers/mtd/nand/raw/r852.c
index 6e602586cb14..b08aa0a5a074 100644
--- a/drivers/mtd/nand/raw/r852.c
+++ b/drivers/mtd/nand/raw/r852.c
@@ -373,7 +373,7 @@ static int r852_wait(struct nand_chip *chip)
 		msecs_to_jiffies(400) : msecs_to_jiffies(20));
 
 	while (time_before(jiffies, timeout))
-		if (chip->dev_ready(chip))
+		if (chip->legacy.dev_ready(chip))
 			break;
 
 	nand_status_op(chip, &status);
@@ -854,8 +854,8 @@ static int  r852_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 
 	/* commands */
 	chip->legacy.cmd_ctrl = r852_cmdctl;
-	chip->waitfunc = r852_wait;
-	chip->dev_ready = r852_ready;
+	chip->legacy.waitfunc = r852_wait;
+	chip->legacy.dev_ready = r852_ready;
 
 	/* I/O */
 	chip->legacy.read_byte = r852_read_byte;
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index f232d683f32d..87b5162857f8 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -885,7 +885,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		info->sel_reg   = regs + S3C2410_NFCONF;
 		info->sel_bit	= S3C2410_NFCONF_nFCE;
 		chip->legacy.cmd_ctrl  = s3c2410_nand_hwcontrol;
-		chip->dev_ready = s3c2410_nand_devready;
+		chip->legacy.dev_ready = s3c2410_nand_devready;
 		break;
 
 	case TYPE_S3C2440:
@@ -893,7 +893,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		info->sel_reg   = regs + S3C2440_NFCONT;
 		info->sel_bit	= S3C2440_NFCONT_nFCE;
 		chip->legacy.cmd_ctrl  = s3c2440_nand_hwcontrol;
-		chip->dev_ready = s3c2440_nand_devready;
+		chip->legacy.dev_ready = s3c2440_nand_devready;
 		chip->legacy.read_buf  = s3c2440_nand_read_buf;
 		chip->legacy.write_buf	= s3c2440_nand_write_buf;
 		break;
@@ -903,7 +903,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 		info->sel_reg   = regs + S3C2440_NFCONT;
 		info->sel_bit	= S3C2412_NFCONT_nFCE0;
 		chip->legacy.cmd_ctrl  = s3c2440_nand_hwcontrol;
-		chip->dev_ready = s3c2412_nand_devready;
+		chip->legacy.dev_ready = s3c2412_nand_devready;
 
 		if (readl(regs + S3C2410_NFCONF) & S3C2412_NFCONF_NANDBOOT)
 			dev_info(info->device, "System booted from NAND\n");
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index a626fb7af8d1..3d14408cbbce 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -157,7 +157,7 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
 	this->legacy.IO_ADDR_W = sharpsl->io + FLASHIO;
 	/* Set address of hardware control function */
 	this->legacy.cmd_ctrl = sharpsl_nand_hwcontrol;
-	this->dev_ready = sharpsl_nand_dev_ready;
+	this->legacy.dev_ready = sharpsl_nand_dev_ready;
 	/* 15 us command delay time */
 	this->chip_delay = 15;
 	/* set eccmode using hardware ECC */
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 0ca81fa956b9..604ef7508f14 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -156,7 +156,7 @@ static int socrates_nand_probe(struct platform_device *ofdev)
 	nand_chip->legacy.read_byte = socrates_nand_read_byte;
 	nand_chip->legacy.write_buf = socrates_nand_write_buf;
 	nand_chip->legacy.read_buf = socrates_nand_read_buf;
-	nand_chip->dev_ready = socrates_nand_device_ready;
+	nand_chip->legacy.dev_ready = socrates_nand_device_ready;
 
 	nand_chip->ecc.mode = NAND_ECC_SOFT;	/* enable ECC */
 	nand_chip->ecc.algo = NAND_ECC_HAMMING;
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 48bb28872298..37e23aec4bce 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -442,9 +442,9 @@ static void sunxi_nfc_select_chip(struct nand_chip *nand, int chip)
 		ctl |= NFC_CE_SEL(sel->cs) | NFC_EN |
 		       NFC_PAGE_SHIFT(nand->page_shift);
 		if (sel->rb < 0) {
-			nand->dev_ready = NULL;
+			nand->legacy.dev_ready = NULL;
 		} else {
-			nand->dev_ready = sunxi_nfc_dev_ready;
+			nand->legacy.dev_ready = sunxi_nfc_dev_ready;
 			ctl |= NFC_RB_SEL(sel->rb);
 		}
 
diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c
index f0285c0b3089..8818f893f300 100644
--- a/drivers/mtd/nand/raw/tango_nand.c
+++ b/drivers/mtd/nand/raw/tango_nand.c
@@ -314,7 +314,7 @@ static int tango_write_page(struct nand_chip *chip, const u8 *buf,
 	if (err)
 		return err;
 
-	status = chip->waitfunc(chip);
+	status = chip->legacy.waitfunc(chip);
 	if (status & NAND_STATUS_FAIL)
 		return -EIO;
 
@@ -569,7 +569,7 @@ static int chip_init(struct device *dev, struct device_node *np)
 	chip->legacy.read_buf = tango_read_buf;
 	chip->select_chip = tango_select_chip;
 	chip->legacy.cmd_ctrl = tango_cmd_ctrl;
-	chip->dev_ready = tango_dev_ready;
+	chip->legacy.dev_ready = tango_dev_ready;
 	chip->setup_data_interface = tango_set_timings;
 	chip->options = NAND_USE_BOUNCE_BUFFER |
 			NAND_NO_SUBPAGE_WRITE |
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 5037359754eb..7e49272ecef6 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -404,7 +404,7 @@ static int tmio_probe(struct platform_device *dev)
 
 	/* Set address of hardware control function */
 	nand_chip->legacy.cmd_ctrl = tmio_nand_hwcontrol;
-	nand_chip->dev_ready = tmio_nand_dev_ready;
+	nand_chip->legacy.dev_ready = tmio_nand_dev_ready;
 	nand_chip->legacy.read_byte = tmio_nand_read_byte;
 	nand_chip->legacy.write_buf = tmio_nand_write_buf;
 	nand_chip->legacy.read_buf = tmio_nand_read_buf;
@@ -432,7 +432,7 @@ static int tmio_probe(struct platform_device *dev)
 	}
 
 	tmio->irq = irq;
-	nand_chip->waitfunc = tmio_nand_wait;
+	nand_chip->legacy.waitfunc = tmio_nand_wait;
 
 	/* Scan to find existence of the device */
 	retval = nand_scan(nand_chip, 1);
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 9eab56a45a5e..46cfb11c5502 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -328,7 +328,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 		chip->legacy.read_buf = txx9ndfmc_read_buf;
 		chip->legacy.write_buf = txx9ndfmc_write_buf;
 		chip->legacy.cmd_ctrl = txx9ndfmc_cmd_ctrl;
-		chip->dev_ready = txx9ndfmc_dev_ready;
+		chip->legacy.dev_ready = txx9ndfmc_dev_ready;
 		chip->ecc.calculate = txx9ndfmc_calculate_ecc;
 		chip->ecc.correct = txx9ndfmc_correct_data;
 		chip->ecc.hwctl = txx9ndfmc_enable_hwecc;
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index ef351a4c507f..dcce487f6517 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -175,7 +175,7 @@ static int xway_nand_probe(struct platform_device *pdev)
 	mtd->dev.parent = &pdev->dev;
 
 	data->chip.legacy.cmd_ctrl = xway_cmd_ctrl;
-	data->chip.dev_ready = xway_dev_ready;
+	data->chip.legacy.dev_ready = xway_dev_ready;
 	data->chip.select_chip = xway_select_chip;
 	data->chip.legacy.write_buf = xway_write_buf;
 	data->chip.legacy.read_buf = xway_read_buf;
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index fbfa024f683d..fb28cc6331f2 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -919,7 +919,7 @@ static int spinand_probe(struct spi_device *spi_nand)
 	chip->legacy.write_buf	= spinand_write_buf;
 	chip->legacy.read_byte	= spinand_read_byte;
 	chip->legacy.cmdfunc	= spinand_cmdfunc;
-	chip->waitfunc	= spinand_wait;
+	chip->legacy.waitfunc	= spinand_wait;
 	chip->options	|= NAND_CACHEPRG;
 	chip->select_chip = spinand_select_chip;
 	chip->set_features = nand_get_set_features_notsupp;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 5ef2004a37a2..15642c028da2 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1183,6 +1183,10 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * @cmd_ctrl: hardware specific function for controlling ALE/CLE/nCE. Also used
  *	      to write command and address
  * @cmdfunc: hardware specific function for writing commands to the chip.
+ * @dev_ready: hardware specific function for accessing device ready/busy line.
+ *	       If set to NULL no access to ready/busy is available and the
+ *	       ready/busy information is read from the chip status register.
+ * @waitfunc: hardware specific function for wait on ready.
  *
  * If you look at this structure you're already wrong. These fields/hooks are
  * all deprecated.
@@ -1197,6 +1201,8 @@ struct nand_legacy {
 	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
 	void (*cmdfunc)(struct nand_chip *chip, unsigned command, int column,
 			int page_addr);
+	int (*dev_ready)(struct nand_chip *chip);
+	int (*waitfunc)(struct nand_chip *chip);
 };
 
 /**
@@ -1210,16 +1216,10 @@ struct nand_legacy {
  * @select_chip:	[REPLACEABLE] select chip nr
  * @block_bad:		[REPLACEABLE] check if a block is bad, using OOB markers
  * @block_markbad:	[REPLACEABLE] mark a block bad
- * @dev_ready:		[BOARDSPECIFIC] hardwarespecific function for accessing
- *			device ready/busy line. If set to NULL no access to
- *			ready/busy is available and the ready/busy information
- *			is read from the chip status register.
- * @waitfunc:		[REPLACEABLE] hardwarespecific function for wait on
- *			ready.
  * @exec_op:		controller specific method to execute NAND operations.
  *			This method replaces ->cmdfunc(),
- *			->legacy.{read,write}_{buf,byte,word}(), ->dev_ready()
- *			and ->waifunc().
+ *			->legacy.{read,write}_{buf,byte,word}(),
+ *			->legacy.dev_ready() and ->waifunc().
  * @setup_read_retry:	[FLASHSPECIFIC] flash (vendor) specific function for
  *			setting the read-retry mode. Mostly needed for MLC NAND.
  * @ecc:		[BOARDSPECIFIC] ECC control structure
@@ -1305,8 +1305,6 @@ struct nand_chip {
 	void (*select_chip)(struct nand_chip *chip, int cs);
 	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
-	int (*dev_ready)(struct nand_chip *chip);
-	int (*waitfunc)(struct nand_chip *chip);
 	int (*exec_op)(struct nand_chip *chip,
 		       const struct nand_operation *op,
 		       bool check_only);
-- 
cgit v1.2.3


From cdc784c743945f445a26f4c0bd60a50a1adb39c4 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:38 +0200
Subject: mtd: rawnand: Deprecate ->block_{bad,markbad}() hooks

Those hooks have been overloaded by some drivers for bad reasons:
either the driver was not fitting in the NAND framework and should
have been an MTD driver (docg4), or it was not properly implementing
the OOB read/write request or had a weird layout where BBM are trashed.
In any case, we should discourage people from overloading those
methods and encourage them to fix their driver instead.

Move the ->block_{bad,markbad}() hooks to the nand_legacy struct to
make it clear.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/cafe_nand.c           |  2 +-
 drivers/mtd/nand/raw/diskonchip.c          |  2 +-
 drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c |  4 ++--
 drivers/mtd/nand/raw/nand_base.c           | 37 ++++++++++++++++++++++--------
 drivers/mtd/nand/raw/nand_bbt.c            |  2 +-
 drivers/mtd/nand/raw/qcom_nandc.c          |  4 ++--
 drivers/mtd/nand/raw/sm_common.c           |  2 +-
 include/linux/mtd/rawnand.h                |  9 ++++----
 8 files changed, 40 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index 738af0f0a48d..db62b12800d3 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -719,7 +719,7 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 
 	if (skipbbt) {
 		cafe->nand.options |= NAND_SKIP_BBTSCAN;
-		cafe->nand.block_bad = cafe_nand_block_bad;
+		cafe->nand.legacy.block_bad = cafe_nand_block_bad;
 	}
 
 	if (numtimings && numtimings != 3) {
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index c3a79369fbed..16fdfd06ef25 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -1572,7 +1572,7 @@ static int __init doc_probe(unsigned long physadr)
 	nand->legacy.cmd_ctrl		= doc200x_hwcontrol;
 	nand->legacy.dev_ready	= doc200x_dev_ready;
 	nand->legacy.waitfunc	= doc200x_wait;
-	nand->block_bad		= doc200x_block_bad;
+	nand->legacy.block_bad	= doc200x_block_bad;
 	nand->ecc.hwctl		= doc200x_enable_hwecc;
 	nand->ecc.calculate	= doc200x_calculate_ecc;
 	nand->ecc.correct	= doc200x_correct_data;
diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
index dc6291902acf..94c2b7525c85 100644
--- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c
@@ -1774,7 +1774,7 @@ static int mx23_boot_init(struct gpmi_nand_data  *this)
 		 */
 		if (block_mark != 0xff) {
 			dev_dbg(dev, "Transcribing mark in block %u\n", block);
-			ret = chip->block_markbad(chip, byte);
+			ret = chip->legacy.block_markbad(chip, byte);
 			if (ret)
 				dev_err(dev,
 					"Failed to mark block bad with ret %d\n",
@@ -1908,7 +1908,7 @@ static int gpmi_nand_init(struct gpmi_nand_data *this)
 	chip->legacy.read_buf	= gpmi_read_buf;
 	chip->legacy.write_buf	= gpmi_write_buf;
 	chip->badblock_pattern	= &gpmi_bbt_descr;
-	chip->block_markbad	= gpmi_block_markbad;
+	chip->legacy.block_markbad = gpmi_block_markbad;
 	chip->options		|= NAND_NO_SUBPAGE_WRITE;
 
 	/* Set up swap_block_mark, must be set before the gpmi_set_geometry() */
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 30b55a4677f9..d71a3d303903 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -475,6 +475,27 @@ static int nand_default_block_markbad(struct nand_chip *chip, loff_t ofs)
 	return ret;
 }
 
+/**
+ * nand_markbad_bbm - mark a block by updating the BBM
+ * @chip: NAND chip object
+ * @ofs: offset of the block to mark bad
+ */
+int nand_markbad_bbm(struct nand_chip *chip, loff_t ofs)
+{
+	if (chip->legacy.block_markbad)
+		return chip->legacy.block_markbad(chip, ofs);
+
+	return nand_default_block_markbad(chip, ofs);
+}
+
+static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs)
+{
+	if (chip->legacy.block_bad)
+		return chip->legacy.block_bad(chip, ofs);
+
+	return nand_block_bad(chip, ofs);
+}
+
 /**
  * nand_block_markbad_lowlevel - mark a block bad
  * @mtd: MTD device structure
@@ -482,7 +503,7 @@ static int nand_default_block_markbad(struct nand_chip *chip, loff_t ofs)
  *
  * This function performs the generic NAND bad block marking steps (i.e., bad
  * block table(s) and/or marker(s)). We only allow the hardware driver to
- * specify how to write bad block markers to OOB (chip->block_markbad).
+ * specify how to write bad block markers to OOB (chip->legacy.block_markbad).
  *
  * We try operations in the following order:
  *
@@ -510,7 +531,7 @@ static int nand_block_markbad_lowlevel(struct mtd_info *mtd, loff_t ofs)
 
 		/* Write bad block marker to OOB */
 		nand_get_device(mtd, FL_WRITING);
-		ret = chip->block_markbad(chip, ofs);
+		ret = nand_markbad_bbm(chip, ofs);
 		nand_release_device(mtd);
 	}
 
@@ -582,11 +603,11 @@ static int nand_block_checkbad(struct mtd_info *mtd, loff_t ofs, int allowbbt)
 {
 	struct nand_chip *chip = mtd_to_nand(mtd);
 
-	if (!chip->bbt)
-		return chip->block_bad(chip, ofs);
-
 	/* Return info from the table */
-	return nand_isbad_bbt(chip, ofs, allowbbt);
+	if (chip->bbt)
+		return nand_isbad_bbt(chip, ofs, allowbbt);
+
+	return nand_isbad_bbm(chip, ofs);
 }
 
 /**
@@ -4942,10 +4963,6 @@ static void nand_set_defaults(struct nand_chip *chip)
 	/* If called twice, pointers that depend on busw may need to be reset */
 	if (!chip->legacy.read_byte || chip->legacy.read_byte == nand_read_byte)
 		chip->legacy.read_byte = busw ? nand_read_byte16 : nand_read_byte;
-	if (!chip->block_bad)
-		chip->block_bad = nand_block_bad;
-	if (!chip->block_markbad)
-		chip->block_markbad = nand_default_block_markbad;
 	if (!chip->legacy.write_buf || chip->legacy.write_buf == nand_write_buf)
 		chip->legacy.write_buf = busw ? nand_write_buf16 : nand_write_buf;
 	if (!chip->legacy.write_byte || chip->legacy.write_byte == nand_write_byte)
diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c
index 9d73e086c5de..b838ecdde8fb 100644
--- a/drivers/mtd/nand/raw/nand_bbt.c
+++ b/drivers/mtd/nand/raw/nand_bbt.c
@@ -689,7 +689,7 @@ static void mark_bbt_block_bad(struct nand_chip *this,
 	bbt_mark_entry(this, block, BBT_BLOCK_WORN);
 
 	to = (loff_t)block << this->bbt_erase_shift;
-	res = this->block_markbad(this, to);
+	res = nand_markbad_bbm(this, to);
 	if (res)
 		pr_warn("nand_bbt: error %d while marking block %d bad\n",
 			res, block);
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index bd187139416f..79342e24ed81 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2819,8 +2819,8 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc,
 	 * and block_markbad helpers until we permanently switch to using
 	 * MTD_OPS_RAW for all drivers (with the help of badblockbits)
 	 */
-	chip->block_bad		= qcom_nandc_block_bad;
-	chip->block_markbad	= qcom_nandc_block_markbad;
+	chip->legacy.block_bad		= qcom_nandc_block_bad;
+	chip->legacy.block_markbad	= qcom_nandc_block_markbad;
 
 	chip->controller = &nandc->controller;
 	chip->options |= NAND_NO_SUBPAGE_WRITE | NAND_USE_BOUNCE_BUFFER |
diff --git a/drivers/mtd/nand/raw/sm_common.c b/drivers/mtd/nand/raw/sm_common.c
index bf143e0db787..6f063ef57640 100644
--- a/drivers/mtd/nand/raw/sm_common.c
+++ b/drivers/mtd/nand/raw/sm_common.c
@@ -168,7 +168,7 @@ static int sm_attach_chip(struct nand_chip *chip)
 	/* Bad block marker position */
 	chip->badblockpos = 0x05;
 	chip->badblockbits = 7;
-	chip->block_markbad = sm_block_markbad;
+	chip->legacy.block_markbad = sm_block_markbad;
 
 	/* ECC layout */
 	if (mtd->writesize == SM_SECTOR_SIZE)
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 15642c028da2..aa3e931d0206 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1187,6 +1187,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  *	       If set to NULL no access to ready/busy is available and the
  *	       ready/busy information is read from the chip status register.
  * @waitfunc: hardware specific function for wait on ready.
+ * @block_bad: check if a block is bad, using OOB markers
+ * @block_markbad: mark a block bad
  *
  * If you look at this structure you're already wrong. These fields/hooks are
  * all deprecated.
@@ -1203,6 +1205,8 @@ struct nand_legacy {
 			int page_addr);
 	int (*dev_ready)(struct nand_chip *chip);
 	int (*waitfunc)(struct nand_chip *chip);
+	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
+	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
 };
 
 /**
@@ -1214,8 +1218,6 @@ struct nand_legacy {
  *			fields/hooks, you should consider reworking the driver
  *			avoid using them.
  * @select_chip:	[REPLACEABLE] select chip nr
- * @block_bad:		[REPLACEABLE] check if a block is bad, using OOB markers
- * @block_markbad:	[REPLACEABLE] mark a block bad
  * @exec_op:		controller specific method to execute NAND operations.
  *			This method replaces ->cmdfunc(),
  *			->legacy.{read,write}_{buf,byte,word}(),
@@ -1303,8 +1305,6 @@ struct nand_chip {
 	struct nand_legacy legacy;
 
 	void (*select_chip)(struct nand_chip *chip, int cs);
-	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
-	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
 	int (*exec_op)(struct nand_chip *chip,
 		       const struct nand_operation *op,
 		       bool check_only);
@@ -1556,6 +1556,7 @@ extern const struct nand_manufacturer_ops macronix_nand_manuf_ops;
 
 int nand_create_bbt(struct nand_chip *chip);
 int nand_markbad_bbt(struct nand_chip *chip, loff_t offs);
+int nand_markbad_bbm(struct nand_chip *chip, loff_t ofs);
 int nand_isreserved_bbt(struct nand_chip *chip, loff_t offs);
 int nand_isbad_bbt(struct nand_chip *chip, loff_t offs, int allowbbt);
 int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
-- 
cgit v1.2.3


From f9ebd1bb41031afc162e9acda7ad044a35bccf82 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:39 +0200
Subject: mtd: rawnand: Deprecate ->erase()

The ->erase() hook have been overloaded by some drivers for bad reasons:
either the driver was not fitting in the NAND framework and should have
been an MTD driver (docg4), or the driver uses a specific path for the
ERASE operation (denali), instead of implementing it generically.
In any case, we should discourage people from overloading this method
and encourage them to implement ->exec_op() instead.

Move the ->erase() hook to the nand_legacy struct to make it clear.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/denali.c    | 2 +-
 drivers/mtd/nand/raw/nand_base.c | 7 +++++--
 include/linux/mtd/rawnand.h      | 4 ++--
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c
index c14493ef6126..858358027dc9 100644
--- a/drivers/mtd/nand/raw/denali.c
+++ b/drivers/mtd/nand/raw/denali.c
@@ -1275,7 +1275,7 @@ static int denali_attach_chip(struct nand_chip *chip)
 	chip->ecc.write_page_raw = denali_write_page_raw;
 	chip->ecc.read_oob = denali_read_oob;
 	chip->ecc.write_oob = denali_write_oob;
-	chip->erase = denali_erase;
+	chip->legacy.erase = denali_erase;
 
 	ret = denali_multidev_fixup(denali);
 	if (ret)
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index d71a3d303903..57c89e275a3a 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4730,7 +4730,11 @@ int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		    (page + pages_per_block))
 			chip->pagebuf = -1;
 
-		status = chip->erase(chip, page & chip->pagemask);
+		if (chip->legacy.erase)
+			status = chip->legacy.erase(chip,
+						    page & chip->pagemask);
+		else
+			status = single_erase(chip, page & chip->pagemask);
 
 		/* See if block erase succeeded */
 		if (status) {
@@ -5756,7 +5760,6 @@ ident_done:
 		chip->options |= NAND_ROW_ADDR_3;
 
 	chip->badblockbits = 8;
-	chip->erase = single_erase;
 
 	/* Do not replace user supplied command function! */
 	if (mtd->writesize > 512 && chip->legacy.cmdfunc == nand_command)
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index aa3e931d0206..97c6ff7d127e 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1189,6 +1189,7 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * @waitfunc: hardware specific function for wait on ready.
  * @block_bad: check if a block is bad, using OOB markers
  * @block_markbad: mark a block bad
+ * @erase: erase function
  *
  * If you look at this structure you're already wrong. These fields/hooks are
  * all deprecated.
@@ -1207,6 +1208,7 @@ struct nand_legacy {
 	int (*waitfunc)(struct nand_chip *chip);
 	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
+	int (*erase)(struct nand_chip *chip, int page);
 };
 
 /**
@@ -1228,7 +1230,6 @@ struct nand_legacy {
  * @buf_align:		minimum buffer alignment required by a platform
  * @dummy_controller:	dummy controller implementation for drivers that can
  *			only control a single chip
- * @erase:		[REPLACEABLE] erase function
  * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
  *			data from array to read regs (tR).
  * @state:		[INTERN] the current state of the NAND device
@@ -1308,7 +1309,6 @@ struct nand_chip {
 	int (*exec_op)(struct nand_chip *chip,
 		       const struct nand_operation *op,
 		       bool check_only);
-	int (*erase)(struct nand_chip *chip, int page);
 	int (*set_features)(struct nand_chip *chip, int feature_addr,
 			    uint8_t *subfeature_para);
 	int (*get_features)(struct nand_chip *chip, int feature_addr,
-- 
cgit v1.2.3


From 45240367939b071b9957b970379cf64f9a2934ce Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:40 +0200
Subject: mtd: rawnand: Deprecate ->{set,get}_features() hooks

Those hooks should be replaced by a proper ->exec_op() implementation.
Move them to the nand_legacy struct to make it clear.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |   4 +-
 drivers/mtd/nand/raw/cafe_nand.c                 |   4 +-
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |   4 +-
 drivers/mtd/nand/raw/fsl_ifc_nand.c              |   4 +-
 drivers/mtd/nand/raw/hisi504_nand.c              |   4 +-
 drivers/mtd/nand/raw/mpc5121_nfc.c               |   4 +-
 drivers/mtd/nand/raw/mxc_nand.c                  |   4 +-
 drivers/mtd/nand/raw/nand_base.c                 | 112 +++++++++--------------
 drivers/mtd/nand/raw/qcom_nandc.c                |   4 +-
 drivers/mtd/nand/raw/sh_flctl.c                  |   4 +-
 drivers/staging/mt29f_spinand/mt29f_spinand.c    |   4 +-
 include/linux/mtd/rawnand.h                      |  12 +--
 12 files changed, 70 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 925d4cd4401e..357bc75948b0 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -390,8 +390,8 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 	b47n->nand_chip.legacy.read_byte = bcm47xxnflash_ops_bcm4706_read_byte;
 	b47n->nand_chip.legacy.read_buf = bcm47xxnflash_ops_bcm4706_read_buf;
 	b47n->nand_chip.legacy.write_buf = bcm47xxnflash_ops_bcm4706_write_buf;
-	b47n->nand_chip.set_features = nand_get_set_features_notsupp;
-	b47n->nand_chip.get_features = nand_get_set_features_notsupp;
+	b47n->nand_chip.legacy.set_features = nand_get_set_features_notsupp;
+	b47n->nand_chip.legacy.get_features = nand_get_set_features_notsupp;
 
 	nand_chip->chip_delay = 50;
 	b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH;
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index db62b12800d3..e3f702bef549 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -709,8 +709,8 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	cafe->nand.legacy.read_buf = cafe_read_buf;
 	cafe->nand.legacy.write_buf = cafe_write_buf;
 	cafe->nand.select_chip = cafe_select_chip;
-	cafe->nand.set_features = nand_get_set_features_notsupp;
-	cafe->nand.get_features = nand_get_set_features_notsupp;
+	cafe->nand.legacy.set_features = nand_get_set_features_notsupp;
+	cafe->nand.legacy.get_features = nand_get_set_features_notsupp;
 
 	cafe->nand.chip_delay = 0;
 
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index 29f0832de39b..c5f3aa908416 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -782,8 +782,8 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 	chip->select_chip = fsl_elbc_select_chip;
 	chip->legacy.cmdfunc = fsl_elbc_cmdfunc;
 	chip->legacy.waitfunc = fsl_elbc_wait;
-	chip->set_features = nand_get_set_features_notsupp;
-	chip->get_features = nand_get_set_features_notsupp;
+	chip->legacy.set_features = nand_get_set_features_notsupp;
+	chip->legacy.get_features = nand_get_set_features_notsupp;
 
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index 682ae383c3e9..a303d12079f0 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -867,8 +867,8 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	chip->select_chip = fsl_ifc_select_chip;
 	chip->legacy.cmdfunc = fsl_ifc_cmdfunc;
 	chip->legacy.waitfunc = fsl_ifc_wait;
-	chip->set_features = nand_get_set_features_notsupp;
-	chip->get_features = nand_get_set_features_notsupp;
+	chip->legacy.set_features = nand_get_set_features_notsupp;
+	chip->legacy.get_features = nand_get_set_features_notsupp;
 
 	chip->bbt_td = &bbt_main_descr;
 	chip->bbt_md = &bbt_mirror_descr;
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index 6e17239983db..fee7d63e8de8 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -788,8 +788,8 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 	chip->legacy.write_buf	= hisi_nfc_write_buf;
 	chip->legacy.read_buf	= hisi_nfc_read_buf;
 	chip->chip_delay	= HINFC504_CHIP_DELAY;
-	chip->set_features	= nand_get_set_features_notsupp;
-	chip->get_features	= nand_get_set_features_notsupp;
+	chip->legacy.set_features	= nand_get_set_features_notsupp;
+	chip->legacy.get_features	= nand_get_set_features_notsupp;
 
 	hisi_nfc_host_init(host);
 
diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c
index 9a6dc783689e..86a0aabe08df 100644
--- a/drivers/mtd/nand/raw/mpc5121_nfc.c
+++ b/drivers/mtd/nand/raw/mpc5121_nfc.c
@@ -698,8 +698,8 @@ static int mpc5121_nfc_probe(struct platform_device *op)
 	chip->legacy.read_buf = mpc5121_nfc_read_buf;
 	chip->legacy.write_buf = mpc5121_nfc_write_buf;
 	chip->select_chip = mpc5121_nfc_select_chip;
-	chip->set_features	= nand_get_set_features_notsupp;
-	chip->get_features	= nand_get_set_features_notsupp;
+	chip->legacy.set_features = nand_get_set_features_notsupp;
+	chip->legacy.get_features = nand_get_set_features_notsupp;
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 	chip->ecc.mode = NAND_ECC_SOFT;
 	chip->ecc.algo = NAND_ECC_HAMMING;
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index ca074c955147..b8115100a0b8 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1778,8 +1778,8 @@ static int mxcnd_probe(struct platform_device *pdev)
 	this->legacy.read_byte = mxc_nand_read_byte;
 	this->legacy.write_buf = mxc_nand_write_buf;
 	this->legacy.read_buf = mxc_nand_read_buf;
-	this->set_features = mxc_nand_set_features;
-	this->get_features = mxc_nand_get_features;
+	this->legacy.set_features = mxc_nand_set_features;
+	this->legacy.get_features = mxc_nand_get_features;
 
 	host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(host->clk))
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 57c89e275a3a..d4a84a871fc7 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1175,44 +1175,6 @@ static bool nand_supports_set_features(struct nand_chip *chip, int addr)
 		test_bit(addr, chip->parameters.set_feature_list));
 }
 
-/**
- * nand_get_features - wrapper to perform a GET_FEATURE
- * @chip: NAND chip info structure
- * @addr: feature address
- * @subfeature_param: the subfeature parameters, a four bytes array
- *
- * Returns 0 for success, a negative error otherwise. Returns -ENOTSUPP if the
- * operation cannot be handled.
- */
-int nand_get_features(struct nand_chip *chip, int addr,
-		      u8 *subfeature_param)
-{
-	if (!nand_supports_get_features(chip, addr))
-		return -ENOTSUPP;
-
-	return chip->get_features(chip, addr, subfeature_param);
-}
-EXPORT_SYMBOL_GPL(nand_get_features);
-
-/**
- * nand_set_features - wrapper to perform a SET_FEATURE
- * @chip: NAND chip info structure
- * @addr: feature address
- * @subfeature_param: the subfeature parameters, a four bytes array
- *
- * Returns 0 for success, a negative error otherwise. Returns -ENOTSUPP if the
- * operation cannot be handled.
- */
-int nand_set_features(struct nand_chip *chip, int addr,
-		      u8 *subfeature_param)
-{
-	if (!nand_supports_set_features(chip, addr))
-		return -ENOTSUPP;
-
-	return chip->set_features(chip, addr, subfeature_param);
-}
-EXPORT_SYMBOL_GPL(nand_set_features);
-
 /**
  * nand_reset_data_interface - Reset data interface and timings
  * @chip: The NAND chip
@@ -2833,6 +2795,50 @@ int nand_reset(struct nand_chip *chip, int chipnr)
 }
 EXPORT_SYMBOL_GPL(nand_reset);
 
+/**
+ * nand_get_features - wrapper to perform a GET_FEATURE
+ * @chip: NAND chip info structure
+ * @addr: feature address
+ * @subfeature_param: the subfeature parameters, a four bytes array
+ *
+ * Returns 0 for success, a negative error otherwise. Returns -ENOTSUPP if the
+ * operation cannot be handled.
+ */
+int nand_get_features(struct nand_chip *chip, int addr,
+		      u8 *subfeature_param)
+{
+	if (!nand_supports_get_features(chip, addr))
+		return -ENOTSUPP;
+
+	if (chip->legacy.get_features)
+		return chip->legacy.get_features(chip, addr, subfeature_param);
+
+	return nand_get_features_op(chip, addr, subfeature_param);
+}
+EXPORT_SYMBOL_GPL(nand_get_features);
+
+/**
+ * nand_set_features - wrapper to perform a SET_FEATURE
+ * @chip: NAND chip info structure
+ * @addr: feature address
+ * @subfeature_param: the subfeature parameters, a four bytes array
+ *
+ * Returns 0 for success, a negative error otherwise. Returns -ENOTSUPP if the
+ * operation cannot be handled.
+ */
+int nand_set_features(struct nand_chip *chip, int addr,
+		      u8 *subfeature_param)
+{
+	if (!nand_supports_set_features(chip, addr))
+		return -ENOTSUPP;
+
+	if (chip->legacy.set_features)
+		return chip->legacy.set_features(chip, addr, subfeature_param);
+
+	return nand_set_features_op(chip, addr, subfeature_param);
+}
+EXPORT_SYMBOL_GPL(nand_set_features);
+
 /**
  * nand_check_erased_buf - check if a buffer contains (almost) only 0xff data
  * @buf: buffer to test
@@ -4864,30 +4870,6 @@ static int nand_max_bad_blocks(struct mtd_info *mtd, loff_t ofs, size_t len)
 	return chip->max_bb_per_die * (part_end_die - part_start_die + 1);
 }
 
-/**
- * nand_default_set_features- [REPLACEABLE] set NAND chip features
- * @chip: nand chip info structure
- * @addr: feature address.
- * @subfeature_param: the subfeature parameters, a four bytes array.
- */
-static int nand_default_set_features(struct nand_chip *chip, int addr,
-				     uint8_t *subfeature_param)
-{
-	return nand_set_features_op(chip, addr, subfeature_param);
-}
-
-/**
- * nand_default_get_features- [REPLACEABLE] get NAND chip features
- * @chip: nand chip info structure
- * @addr: feature address.
- * @subfeature_param: the subfeature parameters, a four bytes array.
- */
-static int nand_default_get_features(struct nand_chip *chip, int addr,
-				     uint8_t *subfeature_param)
-{
-	return nand_get_features_op(chip, addr, subfeature_param);
-}
-
 /**
  * nand_get_set_features_notsupp - set/get features stub returning -ENOTSUPP
  * @chip: nand chip info structure
@@ -4958,12 +4940,6 @@ static void nand_set_defaults(struct nand_chip *chip)
 	if (!chip->select_chip)
 		chip->select_chip = nand_select_chip;
 
-	/* set for ONFI nand */
-	if (!chip->set_features)
-		chip->set_features = nand_default_set_features;
-	if (!chip->get_features)
-		chip->get_features = nand_default_get_features;
-
 	/* If called twice, pointers that depend on busw may need to be reset */
 	if (!chip->legacy.read_byte || chip->legacy.read_byte == nand_read_byte)
 		chip->legacy.read_byte = busw ? nand_read_byte16 : nand_read_byte;
diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 79342e24ed81..ef75dfa62a4f 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -2808,8 +2808,8 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc,
 	chip->legacy.read_byte	= qcom_nandc_read_byte;
 	chip->legacy.read_buf	= qcom_nandc_read_buf;
 	chip->legacy.write_buf	= qcom_nandc_write_buf;
-	chip->set_features	= nand_get_set_features_notsupp;
-	chip->get_features	= nand_get_set_features_notsupp;
+	chip->legacy.set_features	= nand_get_set_features_notsupp;
+	chip->legacy.get_features	= nand_get_set_features_notsupp;
 
 	/*
 	 * the bad block marker is readable only when we read the last codeword
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index 71658fbd99a3..40a4159924a7 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -1185,8 +1185,8 @@ static int flctl_probe(struct platform_device *pdev)
 	nand->legacy.read_buf = flctl_read_buf;
 	nand->select_chip = flctl_select_chip;
 	nand->legacy.cmdfunc = flctl_cmdfunc;
-	nand->set_features = nand_get_set_features_notsupp;
-	nand->get_features = nand_get_set_features_notsupp;
+	nand->legacy.set_features = nand_get_set_features_notsupp;
+	nand->legacy.get_features = nand_get_set_features_notsupp;
 
 	if (pdata->flcmncr_val & SEL_16BIT)
 		nand->options |= NAND_BUSWIDTH_16;
diff --git a/drivers/staging/mt29f_spinand/mt29f_spinand.c b/drivers/staging/mt29f_spinand/mt29f_spinand.c
index fb28cc6331f2..def8a1f57d1c 100644
--- a/drivers/staging/mt29f_spinand/mt29f_spinand.c
+++ b/drivers/staging/mt29f_spinand/mt29f_spinand.c
@@ -922,8 +922,8 @@ static int spinand_probe(struct spi_device *spi_nand)
 	chip->legacy.waitfunc	= spinand_wait;
 	chip->options	|= NAND_CACHEPRG;
 	chip->select_chip = spinand_select_chip;
-	chip->set_features = nand_get_set_features_notsupp;
-	chip->get_features = nand_get_set_features_notsupp;
+	chip->legacy.set_features = nand_get_set_features_notsupp;
+	chip->legacy.get_features = nand_get_set_features_notsupp;
 
 	mtd = nand_to_mtd(chip);
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 97c6ff7d127e..02ac70a30c63 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1190,6 +1190,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * @block_bad: check if a block is bad, using OOB markers
  * @block_markbad: mark a block bad
  * @erase: erase function
+ * @set_features: set the NAND chip features
+ * @get_features: get the NAND chip features
  *
  * If you look at this structure you're already wrong. These fields/hooks are
  * all deprecated.
@@ -1209,6 +1211,10 @@ struct nand_legacy {
 	int (*block_bad)(struct nand_chip *chip, loff_t ofs);
 	int (*block_markbad)(struct nand_chip *chip, loff_t ofs);
 	int (*erase)(struct nand_chip *chip, int page);
+	int (*set_features)(struct nand_chip *chip, int feature_addr,
+			    u8 *subfeature_para);
+	int (*get_features)(struct nand_chip *chip, int feature_addr,
+			    u8 *subfeature_para);
 };
 
 /**
@@ -1279,8 +1285,6 @@ struct nand_legacy {
  * @blocks_per_die:	[INTERN] The number of PEBs in a die
  * @data_interface:	[INTERN] NAND interface timing information
  * @read_retries:	[INTERN] the number of read retry modes supported
- * @set_features:	[REPLACEABLE] set the NAND chip features
- * @get_features:	[REPLACEABLE] get the NAND chip features
  * @setup_data_interface: [OPTIONAL] setup the data interface and timing. If
  *			  chipnr is set to %NAND_DATA_IFACE_CHECK_ONLY this
  *			  means the configuration should not be applied but
@@ -1309,10 +1313,6 @@ struct nand_chip {
 	int (*exec_op)(struct nand_chip *chip,
 		       const struct nand_operation *op,
 		       bool check_only);
-	int (*set_features)(struct nand_chip *chip, int feature_addr,
-			    uint8_t *subfeature_para);
-	int (*get_features)(struct nand_chip *chip, int feature_addr,
-			    uint8_t *subfeature_para);
 	int (*setup_read_retry)(struct nand_chip *chip, int retry_mode);
 	int (*setup_data_interface)(struct nand_chip *chip, int chipnr,
 				    const struct nand_data_interface *conf);
-- 
cgit v1.2.3


From 3cece3abebda068e55e19302a6f0fa60cf553737 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:41 +0200
Subject: mtd: rawnand: Deprecate ->chip_delay

The wait timeouts and delays are directly extracted from the NAND
timings and ->chip_delay is only used in legacy path, so let's move it
to the nand_legacy struct to make it clear.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 Documentation/driver-api/mtdnand.rst             |  2 +-
 drivers/mtd/nand/raw/ams-delta.c                 |  2 +-
 drivers/mtd/nand/raw/atmel/nand-controller.c     |  2 +-
 drivers/mtd/nand/raw/au1550nd.c                  |  4 ++--
 drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c |  2 +-
 drivers/mtd/nand/raw/cafe_nand.c                 |  2 +-
 drivers/mtd/nand/raw/cmx270_nand.c               |  2 +-
 drivers/mtd/nand/raw/cs553x_nand.c               |  2 +-
 drivers/mtd/nand/raw/davinci_nand.c              |  2 +-
 drivers/mtd/nand/raw/diskonchip.c                |  4 ++--
 drivers/mtd/nand/raw/fsl_elbc_nand.c             |  4 ++--
 drivers/mtd/nand/raw/fsl_ifc_nand.c              |  4 ++--
 drivers/mtd/nand/raw/fsl_upm.c                   |  2 +-
 drivers/mtd/nand/raw/fsmc_nand.c                 |  1 -
 drivers/mtd/nand/raw/gpio.c                      |  2 +-
 drivers/mtd/nand/raw/hisi504_nand.c              |  2 +-
 drivers/mtd/nand/raw/jz4740_nand.c               |  2 +-
 drivers/mtd/nand/raw/jz4780_nand.c               |  2 +-
 drivers/mtd/nand/raw/lpc32xx_mlc.c               |  2 +-
 drivers/mtd/nand/raw/lpc32xx_slc.c               |  2 +-
 drivers/mtd/nand/raw/mxc_nand.c                  |  2 +-
 drivers/mtd/nand/raw/nand_base.c                 | 14 +++++++-------
 drivers/mtd/nand/raw/nandsim.c                   |  2 +-
 drivers/mtd/nand/raw/ndfc.c                      |  2 +-
 drivers/mtd/nand/raw/nuc900_nand.c               |  6 +++---
 drivers/mtd/nand/raw/omap2.c                     |  4 ++--
 drivers/mtd/nand/raw/orion_nand.c                |  2 +-
 drivers/mtd/nand/raw/oxnas_nand.c                |  2 +-
 drivers/mtd/nand/raw/pasemi_nand.c               |  2 +-
 drivers/mtd/nand/raw/plat_nand.c                 |  2 +-
 drivers/mtd/nand/raw/s3c2410.c                   |  2 +-
 drivers/mtd/nand/raw/sh_flctl.c                  |  2 +-
 drivers/mtd/nand/raw/sharpsl.c                   |  2 +-
 drivers/mtd/nand/raw/socrates_nand.c             |  2 +-
 drivers/mtd/nand/raw/sunxi_nand.c                |  2 +-
 drivers/mtd/nand/raw/tmio_nand.c                 |  2 +-
 drivers/mtd/nand/raw/txx9ndfmc.c                 |  2 +-
 drivers/mtd/nand/raw/xway_nand.c                 |  2 +-
 include/linux/mtd/rawnand.h                      |  6 +++---
 39 files changed, 53 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst
index 0d3fa4d6576d..55447659b81f 100644
--- a/Documentation/driver-api/mtdnand.rst
+++ b/Documentation/driver-api/mtdnand.rst
@@ -240,7 +240,7 @@ necessary information about the device.
         /* Reference hardware control function */
         this->hwcontrol = board_hwcontrol;
         /* Set command delay time, see datasheet for correct value */
-        this->chip_delay = CHIP_DEPENDEND_COMMAND_DELAY;
+        this->legacy.chip_delay = CHIP_DEPENDEND_COMMAND_DELAY;
         /* Assign the device ready function, if available */
         this->legacy.dev_ready = board_dev_ready;
         this->eccmode = NAND_ECC_SOFT;
diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c
index 2fa6fa3c7464..3d3786dcc5d1 100644
--- a/drivers/mtd/nand/raw/ams-delta.c
+++ b/drivers/mtd/nand/raw/ams-delta.c
@@ -221,7 +221,7 @@ static int ams_delta_init(struct platform_device *pdev)
 		pr_notice("Couldn't request gpio for Delta NAND ready.\n");
 	}
 	/* 25 us command delay time */
-	this->chip_delay = 30;
+	this->legacy.chip_delay = 30;
 	this->ecc.mode = NAND_ECC_SOFT;
 	this->ecc.algo = NAND_ECC_HAMMING;
 
diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c
index dd022080442d..ad0245c66892 100644
--- a/drivers/mtd/nand/raw/atmel/nand-controller.c
+++ b/drivers/mtd/nand/raw/atmel/nand-controller.c
@@ -1483,7 +1483,7 @@ static void atmel_nand_init(struct atmel_nand_controller *nc,
 		chip->setup_data_interface = atmel_nand_setup_data_interface;
 
 	/* Some NANDs require a longer delay than the default one (20us). */
-	chip->chip_delay = 40;
+	chip->legacy.chip_delay = 40;
 
 	/*
 	 * Use a bounce buffer when the buffer passed by the MTD user is not
diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c
index 81bba469c0e4..9731c1c487f6 100644
--- a/drivers/mtd/nand/raw/au1550nd.c
+++ b/drivers/mtd/nand/raw/au1550nd.c
@@ -342,7 +342,7 @@ static void au1550_command(struct nand_chip *this, unsigned command,
 		/* Apply a short delay always to ensure that we do wait tWB. */
 		ndelay(100);
 		/* Wait for a chip to become ready... */
-		for (i = this->chip_delay;
+		for (i = this->legacy.chip_delay;
 		     !this->legacy.dev_ready(this) && i > 0; --i)
 			udelay(1);
 
@@ -434,7 +434,7 @@ static int au1550nd_probe(struct platform_device *pdev)
 	this->legacy.cmdfunc = au1550_command;
 
 	/* 30 us command delay time */
-	this->chip_delay = 30;
+	this->legacy.chip_delay = 30;
 	this->ecc.mode = NAND_ECC_SOFT;
 	this->ecc.algo = NAND_ECC_HAMMING;
 
diff --git a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
index 357bc75948b0..9095a79ebc7d 100644
--- a/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
+++ b/drivers/mtd/nand/raw/bcm47xxnflash/ops_bcm4706.c
@@ -393,7 +393,7 @@ int bcm47xxnflash_ops_bcm4706_init(struct bcm47xxnflash *b47n)
 	b47n->nand_chip.legacy.set_features = nand_get_set_features_notsupp;
 	b47n->nand_chip.legacy.get_features = nand_get_set_features_notsupp;
 
-	nand_chip->chip_delay = 50;
+	nand_chip->legacy.chip_delay = 50;
 	b47n->nand_chip.bbt_options = NAND_BBT_USE_FLASH;
 	b47n->nand_chip.ecc.mode = NAND_ECC_NONE; /* TODO: implement ECC */
 
diff --git a/drivers/mtd/nand/raw/cafe_nand.c b/drivers/mtd/nand/raw/cafe_nand.c
index e3f702bef549..c1a745940d12 100644
--- a/drivers/mtd/nand/raw/cafe_nand.c
+++ b/drivers/mtd/nand/raw/cafe_nand.c
@@ -712,7 +712,7 @@ static int cafe_nand_probe(struct pci_dev *pdev,
 	cafe->nand.legacy.set_features = nand_get_set_features_notsupp;
 	cafe->nand.legacy.get_features = nand_get_set_features_notsupp;
 
-	cafe->nand.chip_delay = 0;
+	cafe->nand.legacy.chip_delay = 0;
 
 	/* Enable the following for a flash based bad block table */
 	cafe->nand.bbt_options = NAND_BBT_USE_FLASH;
diff --git a/drivers/mtd/nand/raw/cmx270_nand.c b/drivers/mtd/nand/raw/cmx270_nand.c
index 585dbd51d8b1..143e4acacaae 100644
--- a/drivers/mtd/nand/raw/cmx270_nand.c
+++ b/drivers/mtd/nand/raw/cmx270_nand.c
@@ -179,7 +179,7 @@ static int __init cmx270_init(void)
 	this->legacy.dev_ready = cmx270_device_ready;
 
 	/* 15 us command delay time */
-	this->chip_delay = 20;
+	this->legacy.chip_delay = 20;
 	this->ecc.mode = NAND_ECC_SOFT;
 	this->ecc.algo = NAND_ECC_HAMMING;
 
diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c
index 61ae9e60bf0d..c6f578aff5d9 100644
--- a/drivers/mtd/nand/raw/cs553x_nand.c
+++ b/drivers/mtd/nand/raw/cs553x_nand.c
@@ -212,7 +212,7 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
 	this->legacy.read_buf = cs553x_read_buf;
 	this->legacy.write_buf = cs553x_write_buf;
 
-	this->chip_delay = 0;
+	this->legacy.chip_delay = 0;
 
 	this->ecc.mode = NAND_ECC_HW;
 	this->ecc.size = 256;
diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c
index bae568d68432..80f228d23cd2 100644
--- a/drivers/mtd/nand/raw/davinci_nand.c
+++ b/drivers/mtd/nand/raw/davinci_nand.c
@@ -761,7 +761,7 @@ static int nand_davinci_probe(struct platform_device *pdev)
 
 	info->chip.legacy.IO_ADDR_R	= vaddr;
 	info->chip.legacy.IO_ADDR_W	= vaddr;
-	info->chip.chip_delay	= 0;
+	info->chip.legacy.chip_delay	= 0;
 	info->chip.select_chip	= nand_davinci_select_chip;
 
 	/* options such as NAND_BBT_USE_FLASH */
diff --git a/drivers/mtd/nand/raw/diskonchip.c b/drivers/mtd/nand/raw/diskonchip.c
index 16fdfd06ef25..3a4c373affab 100644
--- a/drivers/mtd/nand/raw/diskonchip.c
+++ b/drivers/mtd/nand/raw/diskonchip.c
@@ -717,7 +717,7 @@ static void doc2001plus_command(struct nand_chip *this, unsigned command,
 	case NAND_CMD_RESET:
 		if (this->legacy.dev_ready)
 			break;
-		udelay(this->chip_delay);
+		udelay(this->legacy.chip_delay);
 		WriteDOC(NAND_CMD_STATUS, docptr, Mplus_FlashCmd);
 		WriteDOC(0, docptr, Mplus_WritePipeTerm);
 		WriteDOC(0, docptr, Mplus_WritePipeTerm);
@@ -731,7 +731,7 @@ static void doc2001plus_command(struct nand_chip *this, unsigned command,
 		 * command delay
 		 */
 		if (!this->legacy.dev_ready) {
-			udelay(this->chip_delay);
+			udelay(this->legacy.chip_delay);
 			return;
 		}
 	}
diff --git a/drivers/mtd/nand/raw/fsl_elbc_nand.c b/drivers/mtd/nand/raw/fsl_elbc_nand.c
index c5f3aa908416..d6ed697fcfe6 100644
--- a/drivers/mtd/nand/raw/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_elbc_nand.c
@@ -658,8 +658,8 @@ static int fsl_elbc_attach_chip(struct nand_chip *chip)
 	        chip->chipsize);
 	dev_dbg(priv->dev, "fsl_elbc_init: nand->pagemask = %8x\n",
 	        chip->pagemask);
-	dev_dbg(priv->dev, "fsl_elbc_init: nand->chip_delay = %d\n",
-	        chip->chip_delay);
+	dev_dbg(priv->dev, "fsl_elbc_init: nand->legacy.chip_delay = %d\n",
+	        chip->legacy.chip_delay);
 	dev_dbg(priv->dev, "fsl_elbc_init: nand->badblockpos = %d\n",
 	        chip->badblockpos);
 	dev_dbg(priv->dev, "fsl_elbc_init: nand->chip_shift = %d\n",
diff --git a/drivers/mtd/nand/raw/fsl_ifc_nand.c b/drivers/mtd/nand/raw/fsl_ifc_nand.c
index a303d12079f0..6f4afc44381a 100644
--- a/drivers/mtd/nand/raw/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/raw/fsl_ifc_nand.c
@@ -727,8 +727,8 @@ static int fsl_ifc_attach_chip(struct nand_chip *chip)
 							chip->chipsize);
 	dev_dbg(priv->dev, "%s: nand->pagemask = %8x\n", __func__,
 							chip->pagemask);
-	dev_dbg(priv->dev, "%s: nand->chip_delay = %d\n", __func__,
-							chip->chip_delay);
+	dev_dbg(priv->dev, "%s: nand->legacy.chip_delay = %d\n", __func__,
+		chip->legacy.chip_delay);
 	dev_dbg(priv->dev, "%s: nand->badblockpos = %d\n", __func__,
 							chip->badblockpos);
 	dev_dbg(priv->dev, "%s: nand->chip_shift = %d\n", __func__,
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index fcb79718b6c3..673c5a0c9345 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -163,7 +163,7 @@ static int fun_chip_init(struct fsl_upm_nand *fun,
 	fun->chip.legacy.IO_ADDR_R = fun->io_base;
 	fun->chip.legacy.IO_ADDR_W = fun->io_base;
 	fun->chip.legacy.cmd_ctrl = fun_cmd_ctrl;
-	fun->chip.chip_delay = fun->chip_delay;
+	fun->chip.legacy.chip_delay = fun->chip_delay;
 	fun->chip.legacy.read_byte = fun_read_byte;
 	fun->chip.legacy.read_buf = fun_read_buf;
 	fun->chip.legacy.write_buf = fun_write_buf;
diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index 5e06fce4b295..f9874fc72f30 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -1080,7 +1080,6 @@ static int __init fsmc_nand_probe(struct platform_device *pdev)
 	mtd->dev.parent = &pdev->dev;
 	nand->exec_op = fsmc_exec_op;
 	nand->select_chip = fsmc_select_chip;
-	nand->chip_delay = 30;
 
 	/*
 	 * Setup default ECC mode. nand_dt_init() called from nand_scan_ident()
diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c
index c4f19067702e..a6c9a824a7d4 100644
--- a/drivers/mtd/nand/raw/gpio.c
+++ b/drivers/mtd/nand/raw/gpio.c
@@ -278,7 +278,7 @@ static int gpio_nand_probe(struct platform_device *pdev)
 	chip->ecc.mode		= NAND_ECC_SOFT;
 	chip->ecc.algo		= NAND_ECC_HAMMING;
 	chip->options		= gpiomtd->plat.options;
-	chip->chip_delay	= gpiomtd->plat.chip_delay;
+	chip->legacy.chip_delay	= gpiomtd->plat.chip_delay;
 	chip->legacy.cmd_ctrl	= gpio_nand_cmd_ctrl;
 
 	mtd			= nand_to_mtd(chip);
diff --git a/drivers/mtd/nand/raw/hisi504_nand.c b/drivers/mtd/nand/raw/hisi504_nand.c
index fee7d63e8de8..f043938ee36b 100644
--- a/drivers/mtd/nand/raw/hisi504_nand.c
+++ b/drivers/mtd/nand/raw/hisi504_nand.c
@@ -787,7 +787,7 @@ static int hisi_nfc_probe(struct platform_device *pdev)
 	chip->legacy.read_byte	= hisi_nfc_read_byte;
 	chip->legacy.write_buf	= hisi_nfc_write_buf;
 	chip->legacy.read_buf	= hisi_nfc_read_buf;
-	chip->chip_delay	= HINFC504_CHIP_DELAY;
+	chip->legacy.chip_delay	= HINFC504_CHIP_DELAY;
 	chip->legacy.set_features	= nand_get_set_features_notsupp;
 	chip->legacy.get_features	= nand_get_set_features_notsupp;
 
diff --git a/drivers/mtd/nand/raw/jz4740_nand.c b/drivers/mtd/nand/raw/jz4740_nand.c
index 7a1b4c3ff6fd..fb59cfca11a7 100644
--- a/drivers/mtd/nand/raw/jz4740_nand.c
+++ b/drivers/mtd/nand/raw/jz4740_nand.c
@@ -425,7 +425,7 @@ static int jz_nand_probe(struct platform_device *pdev)
 	chip->ecc.strength	= 4;
 	chip->ecc.options	= NAND_ECC_GENERIC_ERASED_CHECK;
 
-	chip->chip_delay = 50;
+	chip->legacy.chip_delay = 50;
 	chip->legacy.cmd_ctrl = jz_nand_cmd_ctrl;
 	chip->select_chip = jz_nand_select_chip;
 	chip->dummy_controller.ops = &jz_nand_controller_ops;
diff --git a/drivers/mtd/nand/raw/jz4780_nand.c b/drivers/mtd/nand/raw/jz4780_nand.c
index 2a960211b97d..cdf22100ab77 100644
--- a/drivers/mtd/nand/raw/jz4780_nand.c
+++ b/drivers/mtd/nand/raw/jz4780_nand.c
@@ -277,7 +277,7 @@ static int jz4780_nand_init_chip(struct platform_device *pdev,
 
 	chip->legacy.IO_ADDR_R = cs->base + OFFSET_DATA;
 	chip->legacy.IO_ADDR_W = cs->base + OFFSET_DATA;
-	chip->chip_delay = RB_DELAY_US;
+	chip->legacy.chip_delay = RB_DELAY_US;
 	chip->options = NAND_NO_SUBPAGE_WRITE;
 	chip->select_chip = jz4780_nand_select_chip;
 	chip->legacy.cmd_ctrl = jz4780_nand_cmd_ctrl;
diff --git a/drivers/mtd/nand/raw/lpc32xx_mlc.c b/drivers/mtd/nand/raw/lpc32xx_mlc.c
index 8a1dfb7ee885..abbb655fe154 100644
--- a/drivers/mtd/nand/raw/lpc32xx_mlc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_mlc.c
@@ -741,7 +741,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 
 	nand_chip->legacy.cmd_ctrl = lpc32xx_nand_cmd_ctrl;
 	nand_chip->legacy.dev_ready = lpc32xx_nand_device_ready;
-	nand_chip->chip_delay = 25; /* us */
+	nand_chip->legacy.chip_delay = 25; /* us */
 	nand_chip->legacy.IO_ADDR_R = MLC_DATA(host->io_base);
 	nand_chip->legacy.IO_ADDR_W = MLC_DATA(host->io_base);
 
diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c
index 75d62d6bed7b..f2f2cdbb9d04 100644
--- a/drivers/mtd/nand/raw/lpc32xx_slc.c
+++ b/drivers/mtd/nand/raw/lpc32xx_slc.c
@@ -882,7 +882,7 @@ static int lpc32xx_nand_probe(struct platform_device *pdev)
 	chip->legacy.IO_ADDR_W = SLC_DATA(host->io_base);
 	chip->legacy.cmd_ctrl = lpc32xx_nand_cmd_ctrl;
 	chip->legacy.dev_ready = lpc32xx_nand_device_ready;
-	chip->chip_delay = 20; /* 20us command delay time */
+	chip->legacy.chip_delay = 20; /* 20us command delay time */
 
 	/* Init NAND controller */
 	lpc32xx_nand_setup(host);
diff --git a/drivers/mtd/nand/raw/mxc_nand.c b/drivers/mtd/nand/raw/mxc_nand.c
index b8115100a0b8..88bd3f6a499c 100644
--- a/drivers/mtd/nand/raw/mxc_nand.c
+++ b/drivers/mtd/nand/raw/mxc_nand.c
@@ -1769,7 +1769,7 @@ static int mxcnd_probe(struct platform_device *pdev)
 	mtd->name = DRIVER_NAME;
 
 	/* 50 us command delay time */
-	this->chip_delay = 5;
+	this->legacy.chip_delay = 5;
 
 	nand_set_controller_data(this, host);
 	nand_set_flash_node(this, pdev->dev.of_node),
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index d4a84a871fc7..2f8bbc3bca7a 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -827,7 +827,7 @@ static void nand_command(struct nand_chip *chip, unsigned int command,
 	case NAND_CMD_RESET:
 		if (chip->legacy.dev_ready)
 			break;
-		udelay(chip->chip_delay);
+		udelay(chip->legacy.chip_delay);
 		chip->legacy.cmd_ctrl(chip, NAND_CMD_STATUS,
 				      NAND_CTRL_CLE | NAND_CTRL_CHANGE);
 		chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
@@ -853,7 +853,7 @@ static void nand_command(struct nand_chip *chip, unsigned int command,
 		 * command delay
 		 */
 		if (!chip->legacy.dev_ready) {
-			udelay(chip->chip_delay);
+			udelay(chip->legacy.chip_delay);
 			return;
 		}
 	}
@@ -964,7 +964,7 @@ static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 	case NAND_CMD_RESET:
 		if (chip->legacy.dev_ready)
 			break;
-		udelay(chip->chip_delay);
+		udelay(chip->legacy.chip_delay);
 		chip->legacy.cmd_ctrl(chip, NAND_CMD_STATUS,
 				      NAND_NCE | NAND_CLE | NAND_CTRL_CHANGE);
 		chip->legacy.cmd_ctrl(chip, NAND_CMD_NONE,
@@ -1005,7 +1005,7 @@ static void nand_command_lp(struct nand_chip *chip, unsigned int command,
 		 * command delay.
 		 */
 		if (!chip->legacy.dev_ready) {
-			udelay(chip->chip_delay);
+			udelay(chip->legacy.chip_delay);
 			return;
 		}
 	}
@@ -2206,7 +2206,7 @@ static int nand_wait_rdy_op(struct nand_chip *chip, unsigned int timeout_ms,
 
 	/* Apply delay or wait for ready/busy pin */
 	if (!chip->legacy.dev_ready)
-		udelay(chip->chip_delay);
+		udelay(chip->legacy.chip_delay);
 	else
 		nand_wait_ready(chip);
 
@@ -4926,8 +4926,8 @@ static void nand_set_defaults(struct nand_chip *chip)
 	unsigned int busw = chip->options & NAND_BUSWIDTH_16;
 
 	/* check for proper chip_delay setup, set 20us if not */
-	if (!chip->chip_delay)
-		chip->chip_delay = 20;
+	if (!chip->legacy.chip_delay)
+		chip->legacy.chip_delay = 20;
 
 	/* check, if a user supplied command function given */
 	if (!chip->legacy.cmdfunc && !chip->exec_op)
diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c
index ff0c372ee288..c452819f6123 100644
--- a/drivers/mtd/nand/raw/nandsim.c
+++ b/drivers/mtd/nand/raw/nandsim.c
@@ -656,7 +656,7 @@ static int __init init_nandsim(struct mtd_info *mtd)
 	}
 
 	/* Force mtd to not do delays */
-	chip->chip_delay = 0;
+	chip->legacy.chip_delay = 0;
 
 	/* Initialize the NAND flash parameters */
 	ns->busw = chip->options & NAND_BUSWIDTH_16 ? 16 : 8;
diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c
index 19b4cb2d1c6e..d49a7a17146c 100644
--- a/drivers/mtd/nand/raw/ndfc.c
+++ b/drivers/mtd/nand/raw/ndfc.c
@@ -147,7 +147,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc,
 	chip->legacy.cmd_ctrl = ndfc_hwcontrol;
 	chip->legacy.dev_ready = ndfc_ready;
 	chip->select_chip = ndfc_select_chip;
-	chip->chip_delay = 50;
+	chip->legacy.chip_delay = 50;
 	chip->controller = &ndfc->ndfc_control;
 	chip->legacy.read_buf = ndfc_read_buf;
 	chip->legacy.write_buf = ndfc_write_buf;
diff --git a/drivers/mtd/nand/raw/nuc900_nand.c b/drivers/mtd/nand/raw/nuc900_nand.c
index 208b6de67510..38b1994e7ed3 100644
--- a/drivers/mtd/nand/raw/nuc900_nand.c
+++ b/drivers/mtd/nand/raw/nuc900_nand.c
@@ -177,7 +177,7 @@ static void nuc900_nand_command_lp(struct nand_chip *chip,
 	case NAND_CMD_RESET:
 		if (chip->legacy.dev_ready)
 			break;
-		udelay(chip->chip_delay);
+		udelay(chip->legacy.chip_delay);
 
 		write_cmd_reg(nand, NAND_CMD_STATUS);
 		write_cmd_reg(nand, command);
@@ -197,7 +197,7 @@ static void nuc900_nand_command_lp(struct nand_chip *chip,
 	default:
 
 		if (!chip->legacy.dev_ready) {
-			udelay(chip->chip_delay);
+			udelay(chip->legacy.chip_delay);
 			return;
 		}
 	}
@@ -259,7 +259,7 @@ static int nuc900_nand_probe(struct platform_device *pdev)
 	chip->legacy.read_byte	= nuc900_nand_read_byte;
 	chip->legacy.write_buf	= nuc900_nand_write_buf;
 	chip->legacy.read_buf	= nuc900_nand_read_buf;
-	chip->chip_delay	= 50;
+	chip->legacy.chip_delay	= 50;
 	chip->options		= 0;
 	chip->ecc.mode		= NAND_ECC_SOFT;
 	chip->ecc.algo		= NAND_ECC_HAMMING;
diff --git a/drivers/mtd/nand/raw/omap2.c b/drivers/mtd/nand/raw/omap2.c
index 5e1e7ced0d9e..886d05c391ef 100644
--- a/drivers/mtd/nand/raw/omap2.c
+++ b/drivers/mtd/nand/raw/omap2.c
@@ -2248,10 +2248,10 @@ static int omap_nand_probe(struct platform_device *pdev)
 	 */
 	if (info->ready_gpiod) {
 		nand_chip->legacy.dev_ready = omap_dev_ready;
-		nand_chip->chip_delay = 0;
+		nand_chip->legacy.chip_delay = 0;
 	} else {
 		nand_chip->legacy.waitfunc = omap_wait;
-		nand_chip->chip_delay = 50;
+		nand_chip->legacy.chip_delay = 50;
 	}
 
 	if (info->flash_bbt)
diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c
index bf288c3c930b..d27b39a7223c 100644
--- a/drivers/mtd/nand/raw/orion_nand.c
+++ b/drivers/mtd/nand/raw/orion_nand.c
@@ -143,7 +143,7 @@ static int __init orion_nand_probe(struct platform_device *pdev)
 	nc->ecc.algo = NAND_ECC_HAMMING;
 
 	if (board->chip_delay)
-		nc->chip_delay = board->chip_delay;
+		nc->legacy.chip_delay = board->chip_delay;
 
 	WARN(board->width > 16,
 		"%d bit bus width out of range",
diff --git a/drivers/mtd/nand/raw/oxnas_nand.c b/drivers/mtd/nand/raw/oxnas_nand.c
index fb0ebb296f6c..0e52dc29141c 100644
--- a/drivers/mtd/nand/raw/oxnas_nand.c
+++ b/drivers/mtd/nand/raw/oxnas_nand.c
@@ -136,7 +136,7 @@ static int oxnas_nand_probe(struct platform_device *pdev)
 		chip->legacy.read_buf = oxnas_nand_read_buf;
 		chip->legacy.read_byte = oxnas_nand_read_byte;
 		chip->legacy.write_buf = oxnas_nand_write_buf;
-		chip->chip_delay = 30;
+		chip->legacy.chip_delay = 30;
 
 		/* Scan to find existence of the device */
 		err = nand_scan(chip, 1);
diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c
index e9a4e82bfb34..643cd22af009 100644
--- a/drivers/mtd/nand/raw/pasemi_nand.c
+++ b/drivers/mtd/nand/raw/pasemi_nand.c
@@ -143,7 +143,7 @@ static int pasemi_nand_probe(struct platform_device *ofdev)
 	chip->legacy.dev_ready = pasemi_device_ready;
 	chip->legacy.read_buf = pasemi_read_buf;
 	chip->legacy.write_buf = pasemi_write_buf;
-	chip->chip_delay = 0;
+	chip->legacy.chip_delay = 0;
 	chip->ecc.mode = NAND_ECC_SOFT;
 	chip->ecc.algo = NAND_ECC_HAMMING;
 
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 6cce01d09364..156c9150fec4 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -67,7 +67,7 @@ static int plat_nand_probe(struct platform_device *pdev)
 	data->chip.select_chip = pdata->ctrl.select_chip;
 	data->chip.legacy.write_buf = pdata->ctrl.write_buf;
 	data->chip.legacy.read_buf = pdata->ctrl.read_buf;
-	data->chip.chip_delay = pdata->chip.chip_delay;
+	data->chip.legacy.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
 	data->chip.bbt_options |= pdata->chip.bbt_options;
 
diff --git a/drivers/mtd/nand/raw/s3c2410.c b/drivers/mtd/nand/raw/s3c2410.c
index 87b5162857f8..d2e42e9d0e8c 100644
--- a/drivers/mtd/nand/raw/s3c2410.c
+++ b/drivers/mtd/nand/raw/s3c2410.c
@@ -867,7 +867,7 @@ static void s3c2410_nand_init_chip(struct s3c2410_nand_info *info,
 	chip->legacy.write_buf    = s3c2410_nand_write_buf;
 	chip->legacy.read_buf     = s3c2410_nand_read_buf;
 	chip->select_chip  = s3c2410_nand_select_chip;
-	chip->chip_delay   = 50;
+	chip->legacy.chip_delay   = 50;
 	nand_set_controller_data(chip, nmtd);
 	chip->options	   = set->options;
 	chip->controller   = &info->controller;
diff --git a/drivers/mtd/nand/raw/sh_flctl.c b/drivers/mtd/nand/raw/sh_flctl.c
index 40a4159924a7..c0c0798f268f 100644
--- a/drivers/mtd/nand/raw/sh_flctl.c
+++ b/drivers/mtd/nand/raw/sh_flctl.c
@@ -1178,7 +1178,7 @@ static int flctl_probe(struct platform_device *pdev)
 
 	/* Set address of hardware control function */
 	/* 20 us command delay time */
-	nand->chip_delay = 20;
+	nand->legacy.chip_delay = 20;
 
 	nand->legacy.read_byte = flctl_read_byte;
 	nand->legacy.write_buf = flctl_write_buf;
diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c
index 3d14408cbbce..c82f26c8b58c 100644
--- a/drivers/mtd/nand/raw/sharpsl.c
+++ b/drivers/mtd/nand/raw/sharpsl.c
@@ -159,7 +159,7 @@ static int sharpsl_nand_probe(struct platform_device *pdev)
 	this->legacy.cmd_ctrl = sharpsl_nand_hwcontrol;
 	this->legacy.dev_ready = sharpsl_nand_dev_ready;
 	/* 15 us command delay time */
-	this->chip_delay = 15;
+	this->legacy.chip_delay = 15;
 	/* set eccmode using hardware ECC */
 	this->ecc.mode = NAND_ECC_HW;
 	this->ecc.size = 256;
diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c
index 604ef7508f14..8be9a50c7880 100644
--- a/drivers/mtd/nand/raw/socrates_nand.c
+++ b/drivers/mtd/nand/raw/socrates_nand.c
@@ -162,7 +162,7 @@ static int socrates_nand_probe(struct platform_device *ofdev)
 	nand_chip->ecc.algo = NAND_ECC_HAMMING;
 
 	/* TODO: I have no idea what real delay is. */
-	nand_chip->chip_delay = 20;		/* 20us command delay time */
+	nand_chip->legacy.chip_delay = 20;	/* 20us command delay time */
 
 	dev_set_drvdata(&ofdev->dev, host);
 
diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c
index 37e23aec4bce..51b1a548064b 100644
--- a/drivers/mtd/nand/raw/sunxi_nand.c
+++ b/drivers/mtd/nand/raw/sunxi_nand.c
@@ -1912,7 +1912,7 @@ static int sunxi_nand_chip_init(struct device *dev, struct sunxi_nfc *nfc,
 
 	nand = &chip->nand;
 	/* Default tR value specified in the ONFI spec (chapter 4.15.1) */
-	nand->chip_delay = 200;
+	nand->legacy.chip_delay = 200;
 	nand->controller = &nfc->controller;
 	nand->controller->ops = &sunxi_nand_controller_ops;
 
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 7e49272ecef6..3297621241d2 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -422,7 +422,7 @@ static int tmio_probe(struct platform_device *dev)
 		nand_chip->badblock_pattern = data->badblock_pattern;
 
 	/* 15 us command delay time */
-	nand_chip->chip_delay = 15;
+	nand_chip->legacy.chip_delay = 15;
 
 	retval = devm_request_irq(&dev->dev, irq, &tmio_irq, 0,
 				  dev_name(&dev->dev), tmio);
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 46cfb11c5502..3a99c8e3f944 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -334,7 +334,7 @@ static int __init txx9ndfmc_probe(struct platform_device *dev)
 		chip->ecc.hwctl = txx9ndfmc_enable_hwecc;
 		chip->ecc.mode = NAND_ECC_HW;
 		chip->ecc.strength = 1;
-		chip->chip_delay = 100;
+		chip->legacy.chip_delay = 100;
 		chip->controller = &drvdata->controller;
 
 		nand_set_controller_data(chip, txx9_priv);
diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c
index dcce487f6517..a234a5cb4868 100644
--- a/drivers/mtd/nand/raw/xway_nand.c
+++ b/drivers/mtd/nand/raw/xway_nand.c
@@ -180,7 +180,7 @@ static int xway_nand_probe(struct platform_device *pdev)
 	data->chip.legacy.write_buf = xway_write_buf;
 	data->chip.legacy.read_buf = xway_read_buf;
 	data->chip.legacy.read_byte = xway_read_byte;
-	data->chip.chip_delay = 30;
+	data->chip.legacy.chip_delay = 30;
 
 	data->chip.ecc.mode = NAND_ECC_SOFT;
 	data->chip.ecc.algo = NAND_ECC_HAMMING;
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 02ac70a30c63..992d78d29674 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1192,6 +1192,8 @@ int nand_op_parser_exec_op(struct nand_chip *chip,
  * @erase: erase function
  * @set_features: set the NAND chip features
  * @get_features: get the NAND chip features
+ * @chip_delay: chip dependent delay for transferring data from array to read
+ *		regs (tR).
  *
  * If you look at this structure you're already wrong. These fields/hooks are
  * all deprecated.
@@ -1215,6 +1217,7 @@ struct nand_legacy {
 			    u8 *subfeature_para);
 	int (*get_features)(struct nand_chip *chip, int feature_addr,
 			    u8 *subfeature_para);
+	int chip_delay;
 };
 
 /**
@@ -1236,8 +1239,6 @@ struct nand_legacy {
  * @buf_align:		minimum buffer alignment required by a platform
  * @dummy_controller:	dummy controller implementation for drivers that can
  *			only control a single chip
- * @chip_delay:		[BOARDSPECIFIC] chip dependent delay for transferring
- *			data from array to read regs (tR).
  * @state:		[INTERN] the current state of the NAND device
  * @oob_poi:		"poison value buffer," used for laying out OOB data
  *			before writing
@@ -1317,7 +1318,6 @@ struct nand_chip {
 	int (*setup_data_interface)(struct nand_chip *chip, int chipnr,
 				    const struct nand_data_interface *conf);
 
-	int chip_delay;
 	unsigned int options;
 	unsigned int bbt_options;
 
-- 
cgit v1.2.3


From 0b4e61c1c26e0f34c9a5f6c35edee16cc26a72fd Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:42 +0200
Subject: mtd: rawnand: Move function prototypes after struct declarations

Move nand_scan[_with_ids]() and nand_wait_ready() at the end of the
file where all function prototype lies. This will also allow us to get
rid of the nand_flash_dev forward declaration.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 992d78d29674..15183b73fed2 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -27,18 +27,6 @@
 struct nand_chip;
 struct nand_flash_dev;
 
-/* Scan and identify a NAND device */
-int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips,
-		       struct nand_flash_dev *ids);
-
-static inline int nand_scan(struct nand_chip *chip, unsigned int max_chips)
-{
-	return nand_scan_with_ids(chip, max_chips, NULL);
-}
-
-/* Internal helper for board drivers which need to override command function */
-void nand_wait_ready(struct nand_chip *chip);
-
 /* The maximum number of NAND chips in an array */
 #define NAND_MAX_CHIPS		8
 
@@ -1739,6 +1727,18 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len,
 int nand_write_data_op(struct nand_chip *chip, const void *buf,
 		       unsigned int len, bool force_8bit);
 
+/* Scan and identify a NAND device */
+int nand_scan_with_ids(struct nand_chip *chip, unsigned int max_chips,
+		       struct nand_flash_dev *ids);
+
+static inline int nand_scan(struct nand_chip *chip, unsigned int max_chips)
+{
+	return nand_scan_with_ids(chip, max_chips, NULL);
+}
+
+/* Internal helper for board drivers which need to override command function */
+void nand_wait_ready(struct nand_chip *chip);
+
 /*
  * Free resources held by the NAND device, must be called on error after a
  * sucessful nand_scan().
-- 
cgit v1.2.3


From 394938eadff2b875c099529452ffb7f7303f1547 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:43 +0200
Subject: mtd: rawnand: Get rid of nand_flash_dev forward declation

nand_scan[with_ids]() have been moved at the end of the file. We can
now get rid of of the nand_flash_dev forward declaration.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 15183b73fed2..768415ed1159 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -25,7 +25,6 @@
 #include <linux/types.h>
 
 struct nand_chip;
-struct nand_flash_dev;
 
 /* The maximum number of NAND chips in an array */
 #define NAND_MAX_CHIPS		8
-- 
cgit v1.2.3


From d16397d57a90d8c35bef3d8104460bf18e111fa7 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:44 +0200
Subject: mtd: rawnand: Get rid of the duplicate nand_chip forward declaration

There's already a forward declaration of nand_chip at the beginning of
the file. Get rid of this one.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/rawnand.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 768415ed1159..608279104aae 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -210,9 +210,6 @@ enum nand_ecc_algo {
 #define NAND_CI_CELLTYPE_MSK	0x0C
 #define NAND_CI_CELLTYPE_SHIFT	2
 
-/* Keep gcc happy */
-struct nand_chip;
-
 /* ONFI version bits */
 #define ONFI_VERSION_1_0		BIT(1)
 #define ONFI_VERSION_2_0		BIT(2)
-- 
cgit v1.2.3


From 4114f97c41cd3992724f450f595417e4432737a0 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:45 +0200
Subject: mtd: rawnand: Get rid of a few unused definitions

Those definitions are not used, let's remove them.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_timings.c | 14 --------------
 include/linux/mtd/rawnand.h         |  8 --------
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_timings.c b/drivers/mtd/nand/raw/nand_timings.c
index ebc7b5f76f77..cb4f0007b65c 100644
--- a/drivers/mtd/nand/raw/nand_timings.c
+++ b/drivers/mtd/nand/raw/nand_timings.c
@@ -270,20 +270,6 @@ static const struct nand_data_interface onfi_sdr_timings[] = {
 	},
 };
 
-/**
- * onfi_async_timing_mode_to_sdr_timings - [NAND Interface] Retrieve NAND
- * timings according to the given ONFI timing mode
- * @mode: ONFI timing mode
- */
-const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode)
-{
-	if (mode < 0 || mode >= ARRAY_SIZE(onfi_sdr_timings))
-		return ERR_PTR(-EINVAL);
-
-	return &onfi_sdr_timings[mode].timings.sdr;
-}
-EXPORT_SYMBOL(onfi_async_timing_mode_to_sdr_timings);
-
 /**
  * onfi_fill_data_interface - [NAND Interface] Initialize a data interface from
  * given ONFI mode
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 608279104aae..8aa8b57ca4b1 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -119,10 +119,6 @@ enum nand_ecc_algo {
 #define NAND_ECC_GENERIC_ERASED_CHECK	BIT(0)
 #define NAND_ECC_MAXIMIZE		BIT(1)
 
-/* Bit mask for flags passed to do_nand_read_ecc */
-#define NAND_GET_DEVICE		0x80
-
-
 /*
  * Option constants for bizarre disfunctionality and real
  * features.
@@ -163,9 +159,7 @@ enum nand_ecc_algo {
 #define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG
 
 /* Macros to identify the above */
-#define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG))
 #define NAND_HAS_SUBPAGE_READ(chip) ((chip->options & NAND_SUBPAGE_READ))
-#define NAND_HAS_SUBPAGE_WRITE(chip) !((chip)->options & NAND_NO_SUBPAGE_WRITE)
 
 /* Non chip related options */
 /* This option skips the bbt scan during initialization. */
@@ -1649,8 +1643,6 @@ static inline int nand_opcode_8bits(unsigned int command)
 	return 0;
 }
 
-/* get timing characteristics from ONFI timing mode. */
-const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode);
 
 int nand_check_erased_ecc_chunk(void *data, int datalen,
 				void *ecc, int ecclen,
-- 
cgit v1.2.3


From c7921bb32ab616462cefb9c2f3dd81d85d32b948 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:46 +0200
Subject: mtd: rawnand: Move platform_nand_xxx definitions out of rawnand.h

platform_nand_xxx definitions are just used by the plat_nand driver.
Let's move those definitions out of the core/driver-agnostic rawnand.h
header.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 arch/arm/mach-ep93xx/snappercl15.c      |  3 +-
 arch/arm/mach-ep93xx/ts72xx.c           |  3 +-
 arch/arm/mach-imx/mach-qong.c           |  2 +-
 arch/arm/mach-ixp4xx/ixdp425-setup.c    |  1 +
 arch/arm/mach-omap1/board-fsample.c     |  3 +-
 arch/arm/mach-omap1/board-h2.c          |  3 +-
 arch/arm/mach-omap1/board-h3.c          |  2 +-
 arch/arm/mach-omap1/board-perseus2.c    |  3 +-
 arch/arm/mach-orion5x/ts78xx-setup.c    |  3 +-
 arch/arm/mach-pxa/balloon3.c            |  3 +-
 arch/arm/mach-pxa/em-x270.c             |  3 +-
 arch/arm/mach-pxa/palmtx.c              |  3 +-
 arch/mips/alchemy/devboards/db1200.c    |  3 +-
 arch/mips/alchemy/devboards/db1300.c    |  3 +-
 arch/mips/alchemy/devboards/db1550.c    |  3 +-
 arch/mips/netlogic/xlr/platform-flash.c |  3 +-
 arch/mips/pnx833x/common/platform.c     |  3 +-
 arch/mips/rb532/devices.c               |  3 +-
 arch/sh/boards/mach-migor/setup.c       |  2 +-
 drivers/mtd/nand/raw/plat_nand.c        |  3 +-
 include/linux/mtd/platnand.h            | 74 +++++++++++++++++++++++++++++++++
 include/linux/mtd/rawnand.h             | 60 --------------------------
 22 files changed, 94 insertions(+), 95 deletions(-)
 create mode 100644 include/linux/mtd/platnand.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c
index 1dad83a0bc5b..cf0cb58b3454 100644
--- a/arch/arm/mach-ep93xx/snappercl15.c
+++ b/arch/arm/mach-ep93xx/snappercl15.c
@@ -23,8 +23,7 @@
 #include <linux/i2c.h>
 #include <linux/fb.h>
 
-#include <linux/mtd/partitions.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
 
 #include <mach/hardware.h>
 #include <linux/platform_data/video-ep93xx.h>
diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c
index 188bf02595c5..c6a533699b00 100644
--- a/arch/arm/mach-ep93xx/ts72xx.c
+++ b/arch/arm/mach-ep93xx/ts72xx.c
@@ -16,8 +16,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/spi/spi.h>
 #include <linux/spi/flash.h>
 #include <linux/spi/mmc_spi.h>
diff --git a/arch/arm/mach-imx/mach-qong.c b/arch/arm/mach-imx/mach-qong.c
index 48972944bb95..5c5df8ca38dd 100644
--- a/arch/arm/mach-imx/mach-qong.c
+++ b/arch/arm/mach-imx/mach-qong.c
@@ -18,7 +18,7 @@
 #include <linux/memory.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/physmap.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
 #include <linux/gpio.h>
 
 #include <asm/mach-types.h>
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index 797e7edc7124..57d7df79d838 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -20,6 +20,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/rawnand.h>
 #include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <asm/types.h>
diff --git a/arch/arm/mach-omap1/board-fsample.c b/arch/arm/mach-omap1/board-fsample.c
index e9f512a0602e..4a0a66815ca0 100644
--- a/arch/arm/mach-omap1/board-fsample.c
+++ b/arch/arm/mach-omap1/board-fsample.c
@@ -16,8 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/mtd/physmap.h>
 #include <linux/input.h>
 #include <linux/smc91x.h>
diff --git a/arch/arm/mach-omap1/board-h2.c b/arch/arm/mach-omap1/board-h2.c
index d5dd2acd6f78..9d9a6ca15df0 100644
--- a/arch/arm/mach-omap1/board-h2.c
+++ b/arch/arm/mach-omap1/board-h2.c
@@ -24,8 +24,7 @@
 #include <linux/delay.h>
 #include <linux/i2c.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/mtd/physmap.h>
 #include <linux/input.h>
 #include <linux/mfd/tps65010.h>
diff --git a/arch/arm/mach-omap1/board-h3.c b/arch/arm/mach-omap1/board-h3.c
index a75856fe4259..cd6e02c5c01a 100644
--- a/arch/arm/mach-omap1/board-h3.c
+++ b/arch/arm/mach-omap1/board-h3.c
@@ -23,7 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/i2c.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/physmap.h>
 #include <linux/input.h>
diff --git a/arch/arm/mach-omap1/board-perseus2.c b/arch/arm/mach-omap1/board-perseus2.c
index c61c7c7520ca..06a584fef5b8 100644
--- a/arch/arm/mach-omap1/board-perseus2.c
+++ b/arch/arm/mach-omap1/board-perseus2.c
@@ -16,8 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/mtd/physmap.h>
 #include <linux/input.h>
 #include <linux/smc91x.h>
diff --git a/arch/arm/mach-orion5x/ts78xx-setup.c b/arch/arm/mach-orion5x/ts78xx-setup.c
index aac2c6eb35e2..fda9b75c3a33 100644
--- a/arch/arm/mach-orion5x/ts78xx-setup.c
+++ b/arch/arm/mach-orion5x/ts78xx-setup.c
@@ -16,8 +16,7 @@
 #include <linux/platform_device.h>
 #include <linux/mv643xx_eth.h>
 #include <linux/ata_platform.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/timeriomem-rng.h>
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index 256e60c38a6d..c52c081eb6d9 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -25,11 +25,10 @@
 #include <linux/ioport.h>
 #include <linux/ucb1400.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
 #include <linux/types.h>
 #include <linux/platform_data/pcf857x.h>
 #include <linux/platform_data/i2c-pxa.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
 #include <linux/mtd/physmap.h>
 #include <linux/regulator/max1586.h>
 
diff --git a/arch/arm/mach-pxa/em-x270.c b/arch/arm/mach-pxa/em-x270.c
index 3acb945a2628..c30d20e1fb7a 100644
--- a/arch/arm/mach-pxa/em-x270.c
+++ b/arch/arm/mach-pxa/em-x270.c
@@ -15,8 +15,7 @@
 
 #include <linux/dm9000.h>
 #include <linux/platform_data/rtc-v3020.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/mtd/physmap.h>
 #include <linux/input.h>
 #include <linux/gpio_keys.h>
diff --git a/arch/arm/mach-pxa/palmtx.c b/arch/arm/mach-pxa/palmtx.c
index 36ea32c1bbcc..1d06a8e91d8f 100644
--- a/arch/arm/mach-pxa/palmtx.c
+++ b/arch/arm/mach-pxa/palmtx.c
@@ -28,8 +28,7 @@
 #include <linux/wm97xx.h>
 #include <linux/power_supply.h>
 #include <linux/usb/gpio_vbus.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/physmap.h>
 
diff --git a/arch/mips/alchemy/devboards/db1200.c b/arch/mips/alchemy/devboards/db1200.c
index 97dc74f7f41a..4bf02f96ab7f 100644
--- a/arch/mips/alchemy/devboards/db1200.c
+++ b/arch/mips/alchemy/devboards/db1200.c
@@ -29,8 +29,7 @@
 #include <linux/leds.h>
 #include <linux/mmc/host.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
 #include <linux/spi/spi.h>
diff --git a/arch/mips/alchemy/devboards/db1300.c b/arch/mips/alchemy/devboards/db1300.c
index b813dc1c1682..ad7dd8e89598 100644
--- a/arch/mips/alchemy/devboards/db1300.c
+++ b/arch/mips/alchemy/devboards/db1300.c
@@ -19,8 +19,7 @@
 #include <linux/mmc/host.h>
 #include <linux/module.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/platform_device.h>
 #include <linux/smsc911x.h>
 #include <linux/wm97xx.h>
diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c
index 65f6b7184fbe..7700ad0b93b4 100644
--- a/arch/mips/alchemy/devboards/db1550.c
+++ b/arch/mips/alchemy/devboards/db1550.c
@@ -13,8 +13,7 @@
 #include <linux/io.h>
 #include <linux/interrupt.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/spi/spi.h>
diff --git a/arch/mips/netlogic/xlr/platform-flash.c b/arch/mips/netlogic/xlr/platform-flash.c
index 4f76b85b44c9..cf9162284b07 100644
--- a/arch/mips/netlogic/xlr/platform-flash.c
+++ b/arch/mips/netlogic/xlr/platform-flash.c
@@ -19,8 +19,7 @@
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/physmap.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 
 #include <asm/netlogic/haldefs.h>
 #include <asm/netlogic/xlr/iomap.h>
diff --git a/arch/mips/pnx833x/common/platform.c b/arch/mips/pnx833x/common/platform.c
index 33d0f070b33d..dafbf027fad0 100644
--- a/arch/mips/pnx833x/common/platform.c
+++ b/arch/mips/pnx833x/common/platform.c
@@ -30,8 +30,7 @@
 #include <linux/resource.h>
 #include <linux/serial.h>
 #include <linux/serial_pnx8xxx.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 
 #include <irq.h>
 #include <irq-mapping.h>
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index 02a9e042fb44..2b23ad640f39 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -20,9 +20,8 @@
 #include <linux/ctype.h>
 #include <linux/string.h>
 #include <linux/platform_device.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/partitions.h>
 #include <linux/gpio.h>
 #include <linux/gpio_keys.h>
 #include <linux/input.h>
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index ebcc4d5a67ce..f4ad33c6d2aa 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -14,7 +14,7 @@
 #include <linux/mmc/host.h>
 #include <linux/mtd/physmap.h>
 #include <linux/mfd/tmio.h>
-#include <linux/mtd/rawnand.h>
+#include <linux/mtd/platnand.h>
 #include <linux/i2c.h>
 #include <linux/regulator/fixed.h>
 #include <linux/regulator/machine.h>
diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c
index 156c9150fec4..86c536ddaf24 100644
--- a/drivers/mtd/nand/raw/plat_nand.c
+++ b/drivers/mtd/nand/raw/plat_nand.c
@@ -15,8 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
-#include <linux/mtd/partitions.h>
+#include <linux/mtd/platnand.h>
 
 struct plat_nand_data {
 	struct nand_chip	chip;
diff --git a/include/linux/mtd/platnand.h b/include/linux/mtd/platnand.h
new file mode 100644
index 000000000000..bc11eb6b593b
--- /dev/null
+++ b/include/linux/mtd/platnand.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright © 2000-2010 David Woodhouse <dwmw2@infradead.org>
+ *			  Steven J. Hill <sjhill@realitydiluted.com>
+ *			  Thomas Gleixner <tglx@linutronix.de>
+ *
+ * Contains all platform NAND related definitions.
+ */
+
+#ifndef __LINUX_MTD_PLATNAND_H
+#define __LINUX_MTD_PLATNAND_H
+
+#include <linux/mtd/partitions.h>
+#include <linux/mtd/rawnand.h>
+#include <linux/platform_device.h>
+
+/**
+ * struct platform_nand_chip - chip level device structure
+ * @nr_chips: max. number of chips to scan for
+ * @chip_offset: chip number offset
+ * @nr_partitions: number of partitions pointed to by partitions (or zero)
+ * @partitions: mtd partition list
+ * @chip_delay: R/B delay value in us
+ * @options: Option flags, e.g. 16bit buswidth
+ * @bbt_options: BBT option flags, e.g. NAND_BBT_USE_FLASH
+ * @part_probe_types: NULL-terminated array of probe types
+ */
+struct platform_nand_chip {
+	int nr_chips;
+	int chip_offset;
+	int nr_partitions;
+	struct mtd_partition *partitions;
+	int chip_delay;
+	unsigned int options;
+	unsigned int bbt_options;
+	const char **part_probe_types;
+};
+
+/**
+ * struct platform_nand_ctrl - controller level device structure
+ * @probe: platform specific function to probe/setup hardware
+ * @remove: platform specific function to remove/teardown hardware
+ * @dev_ready: platform specific function to read ready/busy pin
+ * @select_chip: platform specific chip select function
+ * @cmd_ctrl: platform specific function for controlling
+ *	      ALE/CLE/nCE. Also used to write command and address
+ * @write_buf: platform specific function for write buffer
+ * @read_buf: platform specific function for read buffer
+ * @priv: private data to transport driver specific settings
+ *
+ * All fields are optional and depend on the hardware driver requirements
+ */
+struct platform_nand_ctrl {
+	int (*probe)(struct platform_device *pdev);
+	void (*remove)(struct platform_device *pdev);
+	int (*dev_ready)(struct nand_chip *chip);
+	void (*select_chip)(struct nand_chip *chip, int cs);
+	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
+	void (*write_buf)(struct nand_chip *chip, const uint8_t *buf, int len);
+	void (*read_buf)(struct nand_chip *chip, uint8_t *buf, int len);
+	void *priv;
+};
+
+/**
+ * struct platform_nand_data - container structure for platform-specific data
+ * @chip: chip level chip structure
+ * @ctrl: controller level device structure
+ */
+struct platform_nand_data {
+	struct platform_nand_chip chip;
+	struct platform_nand_ctrl ctrl;
+};
+
+#endif /* __LINUX_MTD_PLATNAND_H */
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 8aa8b57ca4b1..04e11a314e9c 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1540,66 +1540,6 @@ int nand_isbad_bbt(struct nand_chip *chip, loff_t offs, int allowbbt);
 int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		    int allowbbt);
 
-/**
- * struct platform_nand_chip - chip level device structure
- * @nr_chips:		max. number of chips to scan for
- * @chip_offset:	chip number offset
- * @nr_partitions:	number of partitions pointed to by partitions (or zero)
- * @partitions:		mtd partition list
- * @chip_delay:		R/B delay value in us
- * @options:		Option flags, e.g. 16bit buswidth
- * @bbt_options:	BBT option flags, e.g. NAND_BBT_USE_FLASH
- * @part_probe_types:	NULL-terminated array of probe types
- */
-struct platform_nand_chip {
-	int nr_chips;
-	int chip_offset;
-	int nr_partitions;
-	struct mtd_partition *partitions;
-	int chip_delay;
-	unsigned int options;
-	unsigned int bbt_options;
-	const char **part_probe_types;
-};
-
-/* Keep gcc happy */
-struct platform_device;
-
-/**
- * struct platform_nand_ctrl - controller level device structure
- * @probe:		platform specific function to probe/setup hardware
- * @remove:		platform specific function to remove/teardown hardware
- * @dev_ready:		platform specific function to read ready/busy pin
- * @select_chip:	platform specific chip select function
- * @cmd_ctrl:		platform specific function for controlling
- *			ALE/CLE/nCE. Also used to write command and address
- * @write_buf:		platform specific function for write buffer
- * @read_buf:		platform specific function for read buffer
- * @priv:		private data to transport driver specific settings
- *
- * All fields are optional and depend on the hardware driver requirements
- */
-struct platform_nand_ctrl {
-	int (*probe)(struct platform_device *pdev);
-	void (*remove)(struct platform_device *pdev);
-	int (*dev_ready)(struct nand_chip *chip);
-	void (*select_chip)(struct nand_chip *chip, int cs);
-	void (*cmd_ctrl)(struct nand_chip *chip, int dat, unsigned int ctrl);
-	void (*write_buf)(struct nand_chip *chip, const uint8_t *buf, int len);
-	void (*read_buf)(struct nand_chip *chip, uint8_t *buf, int len);
-	void *priv;
-};
-
-/**
- * struct platform_nand_data - container structure for platform-specific data
- * @chip:		chip level chip structure
- * @ctrl:		controller level device structure
- */
-struct platform_nand_data {
-	struct platform_nand_chip chip;
-	struct platform_nand_ctrl ctrl;
-};
-
 /* return the supported asynchronous timing mode. */
 static inline int onfi_get_async_timing_mode(struct nand_chip *chip)
 {
-- 
cgit v1.2.3


From 462f35d3e5e805657b48c76d3e7dd1c37e0ac62c Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:47 +0200
Subject: mtd: rawnand: Inline onfi_get_async_timing_mode()

onfi_get_async_timing_mode() is only used in one place inside
nand_base.c. Let's inline the code and kill the helper.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/nand_base.c | 5 +++--
 include/linux/mtd/rawnand.h      | 9 ---------
 2 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 2f8bbc3bca7a..136ccdc61a06 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -1310,8 +1310,9 @@ static int nand_init_data_interface(struct nand_chip *chip)
 	 * if the NAND does not support ONFI, fallback to the default ONFI
 	 * timing mode.
 	 */
-	modes = onfi_get_async_timing_mode(chip);
-	if (modes == ONFI_TIMING_MODE_UNKNOWN) {
+	if (chip->parameters.onfi) {
+		modes = chip->parameters.onfi->async_timing_mode;
+	} else {
 		if (!chip->onfi_timing_mode_default)
 			return 0;
 
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 04e11a314e9c..7f0e3dc222ed 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -1540,15 +1540,6 @@ int nand_isbad_bbt(struct nand_chip *chip, loff_t offs, int allowbbt);
 int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
 		    int allowbbt);
 
-/* return the supported asynchronous timing mode. */
-static inline int onfi_get_async_timing_mode(struct nand_chip *chip)
-{
-	if (!chip->parameters.onfi)
-		return ONFI_TIMING_MODE_UNKNOWN;
-
-	return chip->parameters.onfi->async_timing_mode;
-}
-
 int onfi_fill_data_interface(struct nand_chip *chip,
 			     enum nand_data_interface_type type,
 			     int timing_mode);
-- 
cgit v1.2.3


From 348d56a8c6067f3b2f1799d6f050f3be4bf5904b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:48 +0200
Subject: mtd: rawnand: Keep all internal stuff private

A lot of things defined in rawnand.h should not be exposed to NAND
controller drivers and should only be shared by core files.

Create the drivers/mtd/nand/raw/internals.h header to store such
definitions, and move all private defs to this header.

Also remove EXPORT_SYMBOLS() on functions that are not supposed to be
exposed.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/internals.h     | 98 ++++++++++++++++++++++++++++++++++++
 drivers/mtd/nand/raw/nand_amd.c      |  2 +-
 drivers/mtd/nand/raw/nand_base.c     | 21 ++++----
 drivers/mtd/nand/raw/nand_bbt.c      |  3 +-
 drivers/mtd/nand/raw/nand_hynix.c    |  3 +-
 drivers/mtd/nand/raw/nand_ids.c      |  4 +-
 drivers/mtd/nand/raw/nand_macronix.c |  2 +-
 drivers/mtd/nand/raw/nand_micron.c   |  3 +-
 drivers/mtd/nand/raw/nand_samsung.c  |  2 +-
 drivers/mtd/nand/raw/nand_timings.c  |  4 +-
 drivers/mtd/nand/raw/nand_toshiba.c  |  2 +-
 include/linux/mtd/rawnand.h          | 96 -----------------------------------
 12 files changed, 123 insertions(+), 117 deletions(-)
 create mode 100644 drivers/mtd/nand/raw/internals.h

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/internals.h b/drivers/mtd/nand/raw/internals.h
new file mode 100644
index 000000000000..1d2edddb6127
--- /dev/null
+++ b/drivers/mtd/nand/raw/internals.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 - Bootlin
+ *
+ * Author: Boris Brezillon <boris.brezillon@bootlin.com>
+ *
+ * Header containing internal definitions to be used only by core files.
+ * NAND controller drivers should not include this file.
+ */
+
+#ifndef __LINUX_RAWNAND_INTERNALS
+#define __LINUX_RAWNAND_INTERNALS
+
+#include <linux/mtd/rawnand.h>
+
+/*
+ * NAND Flash Manufacturer ID Codes
+ */
+#define NAND_MFR_TOSHIBA	0x98
+#define NAND_MFR_ESMT		0xc8
+#define NAND_MFR_SAMSUNG	0xec
+#define NAND_MFR_FUJITSU	0x04
+#define NAND_MFR_NATIONAL	0x8f
+#define NAND_MFR_RENESAS	0x07
+#define NAND_MFR_STMICRO	0x20
+#define NAND_MFR_HYNIX		0xad
+#define NAND_MFR_MICRON		0x2c
+#define NAND_MFR_AMD		0x01
+#define NAND_MFR_MACRONIX	0xc2
+#define NAND_MFR_EON		0x92
+#define NAND_MFR_SANDISK	0x45
+#define NAND_MFR_INTEL		0x89
+#define NAND_MFR_ATO		0x9b
+#define NAND_MFR_WINBOND	0xef
+
+/**
+ * struct nand_manufacturer_ops - NAND Manufacturer operations
+ * @detect: detect the NAND memory organization and capabilities
+ * @init: initialize all vendor specific fields (like the ->read_retry()
+ *	  implementation) if any.
+ * @cleanup: the ->init() function may have allocated resources, ->cleanup()
+ *	     is here to let vendor specific code release those resources.
+ * @fixup_onfi_param_page: apply vendor specific fixups to the ONFI parameter
+ *			   page. This is called after the checksum is verified.
+ */
+struct nand_manufacturer_ops {
+	void (*detect)(struct nand_chip *chip);
+	int (*init)(struct nand_chip *chip);
+	void (*cleanup)(struct nand_chip *chip);
+	void (*fixup_onfi_param_page)(struct nand_chip *chip,
+				      struct nand_onfi_params *p);
+};
+
+/**
+ * struct nand_manufacturer - NAND Flash Manufacturer structure
+ * @name: Manufacturer name
+ * @id: manufacturer ID code of device.
+ * @ops: manufacturer operations
+ */
+struct nand_manufacturer {
+	int id;
+	char *name;
+	const struct nand_manufacturer_ops *ops;
+};
+
+
+extern struct nand_flash_dev nand_flash_ids[];
+
+extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops;
+extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
+extern const struct nand_manufacturer_ops hynix_nand_manuf_ops;
+extern const struct nand_manufacturer_ops micron_nand_manuf_ops;
+extern const struct nand_manufacturer_ops amd_nand_manuf_ops;
+extern const struct nand_manufacturer_ops macronix_nand_manuf_ops;
+
+/* Core functions */
+const struct nand_manufacturer *nand_get_manufacturer(u8 id);
+int nand_markbad_bbm(struct nand_chip *chip, loff_t ofs);
+int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
+		    int allowbbt);
+int onfi_fill_data_interface(struct nand_chip *chip,
+			     enum nand_data_interface_type type,
+			     int timing_mode);
+int nand_get_features(struct nand_chip *chip, int addr, u8 *subfeature_param);
+int nand_set_features(struct nand_chip *chip, int addr, u8 *subfeature_param);
+int nand_read_page_raw_notsupp(struct nand_chip *chip, u8 *buf,
+			       int oob_required, int page);
+int nand_write_page_raw_notsupp(struct nand_chip *chip, const u8 *buf,
+				int oob_required, int page);
+int nand_exit_status_op(struct nand_chip *chip);
+void nand_decode_ext_id(struct nand_chip *chip);
+
+/* BBT functions */
+int nand_markbad_bbt(struct nand_chip *chip, loff_t offs);
+int nand_isreserved_bbt(struct nand_chip *chip, loff_t offs);
+int nand_isbad_bbt(struct nand_chip *chip, loff_t offs, int allowbbt);
+
+#endif /* __LINUX_RAWNAND_INTERNALS */
diff --git a/drivers/mtd/nand/raw/nand_amd.c b/drivers/mtd/nand/raw/nand_amd.c
index 22f060f38123..890c5b43e03c 100644
--- a/drivers/mtd/nand/raw/nand_amd.c
+++ b/drivers/mtd/nand/raw/nand_amd.c
@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/mtd/rawnand.h>
+#include "internals.h"
 
 static void amd_nand_decode_id(struct nand_chip *chip)
 {
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 136ccdc61a06..d48c588d1abe 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -39,7 +39,6 @@
 #include <linux/nmi.h>
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
-#include <linux/mtd/rawnand.h>
 #include <linux/mtd/nand_ecc.h>
 #include <linux/mtd/nand_bch.h>
 #include <linux/interrupt.h>
@@ -48,6 +47,8 @@
 #include <linux/mtd/partitions.h>
 #include <linux/of.h>
 
+#include "internals.h"
+
 static int nand_get_device(struct mtd_info *mtd, int new_state);
 
 static int nand_do_write_oob(struct mtd_info *mtd, loff_t to,
@@ -1319,7 +1320,6 @@ static int nand_init_data_interface(struct nand_chip *chip)
 		modes = GENMASK(chip->onfi_timing_mode_default, 0);
 	}
 
-
 	for (mode = fls(modes) - 1; mode >= 0; mode--) {
 		ret = onfi_fill_data_interface(chip, NAND_SDR_IFACE, mode);
 		if (ret)
@@ -2043,7 +2043,6 @@ int nand_exit_status_op(struct nand_chip *chip)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(nand_exit_status_op);
 
 /**
  * nand_erase_op - Do an erase operation
@@ -2816,7 +2815,6 @@ int nand_get_features(struct nand_chip *chip, int addr,
 
 	return nand_get_features_op(chip, addr, subfeature_param);
 }
-EXPORT_SYMBOL_GPL(nand_get_features);
 
 /**
  * nand_set_features - wrapper to perform a SET_FEATURE
@@ -2838,7 +2836,6 @@ int nand_set_features(struct nand_chip *chip, int addr,
 
 	return nand_set_features_op(chip, addr, subfeature_param);
 }
-EXPORT_SYMBOL_GPL(nand_set_features);
 
 /**
  * nand_check_erased_buf - check if a buffer contains (almost) only 0xff data
@@ -2985,7 +2982,6 @@ int nand_read_page_raw_notsupp(struct nand_chip *chip, u8 *buf,
 {
 	return -ENOTSUPP;
 }
-EXPORT_SYMBOL(nand_read_page_raw_notsupp);
 
 /**
  * nand_read_page_raw - [INTERN] read raw page data without ecc
@@ -3729,7 +3725,7 @@ EXPORT_SYMBOL(nand_read_oob_std);
  * @chip: nand chip info structure
  * @page: page number to read
  */
-int nand_read_oob_syndrome(struct nand_chip *chip, int page)
+static int nand_read_oob_syndrome(struct nand_chip *chip, int page)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int length = mtd->oobsize;
@@ -3776,7 +3772,6 @@ int nand_read_oob_syndrome(struct nand_chip *chip, int page)
 
 	return 0;
 }
-EXPORT_SYMBOL(nand_read_oob_syndrome);
 
 /**
  * nand_write_oob_std - [REPLACEABLE] the most common OOB data write function
@@ -3798,7 +3793,7 @@ EXPORT_SYMBOL(nand_write_oob_std);
  * @chip: nand chip info structure
  * @page: page number to write
  */
-int nand_write_oob_syndrome(struct nand_chip *chip, int page)
+static int nand_write_oob_syndrome(struct nand_chip *chip, int page)
 {
 	struct mtd_info *mtd = nand_to_mtd(chip);
 	int chunk = chip->ecc.bytes + chip->ecc.prepad + chip->ecc.postpad;
@@ -3864,7 +3859,6 @@ int nand_write_oob_syndrome(struct nand_chip *chip, int page)
 
 	return nand_prog_page_end_op(chip);
 }
-EXPORT_SYMBOL(nand_write_oob_syndrome);
 
 /**
  * nand_do_read_oob - [INTERN] NAND read out-of-band
@@ -3989,7 +3983,6 @@ int nand_write_page_raw_notsupp(struct nand_chip *chip, const u8 *buf,
 {
 	return -ENOTSUPP;
 }
-EXPORT_SYMBOL(nand_write_page_raw_notsupp);
 
 /**
  * nand_write_page_raw - [INTERN] raw page write function
@@ -5579,6 +5572,12 @@ static void nand_manufacturer_cleanup(struct nand_chip *chip)
 		chip->manufacturer.desc->ops->cleanup(chip);
 }
 
+static const char *
+nand_manufacturer_name(const struct nand_manufacturer *manufacturer)
+{
+	return manufacturer ? manufacturer->name : "Unknown";
+}
+
 /*
  * Get the flash and manufacturer id and lookup if the type is supported.
  */
diff --git a/drivers/mtd/nand/raw/nand_bbt.c b/drivers/mtd/nand/raw/nand_bbt.c
index b838ecdde8fb..98a826838b60 100644
--- a/drivers/mtd/nand/raw/nand_bbt.c
+++ b/drivers/mtd/nand/raw/nand_bbt.c
@@ -61,13 +61,14 @@
 #include <linux/types.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/bbm.h>
-#include <linux/mtd/rawnand.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/vmalloc.h>
 #include <linux/export.h>
 #include <linux/string.h>
 
+#include "internals.h"
+
 #define BBT_BLOCK_GOOD		0x00
 #define BBT_BLOCK_WORN		0x01
 #define BBT_BLOCK_RESERVED	0x02
diff --git a/drivers/mtd/nand/raw/nand_hynix.c b/drivers/mtd/nand/raw/nand_hynix.c
index 7eec0d96909a..ac1b5c103968 100644
--- a/drivers/mtd/nand/raw/nand_hynix.c
+++ b/drivers/mtd/nand/raw/nand_hynix.c
@@ -15,10 +15,11 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/mtd/rawnand.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
 
+#include "internals.h"
+
 #define NAND_HYNIX_CMD_SET_PARAMS	0x36
 #define NAND_HYNIX_CMD_APPLY_PARAMS	0x16
 
diff --git a/drivers/mtd/nand/raw/nand_ids.c b/drivers/mtd/nand/raw/nand_ids.c
index 5423c3bb388e..12d39ccd1cff 100644
--- a/drivers/mtd/nand/raw/nand_ids.c
+++ b/drivers/mtd/nand/raw/nand_ids.c
@@ -6,9 +6,11 @@
  * published by the Free Software Foundation.
  *
  */
-#include <linux/mtd/rawnand.h>
+
 #include <linux/sizes.h>
 
+#include "internals.h"
+
 #define LP_OPTIONS 0
 #define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16)
 
diff --git a/drivers/mtd/nand/raw/nand_macronix.c b/drivers/mtd/nand/raw/nand_macronix.c
index 49c546c97c6f..358dcc957bb2 100644
--- a/drivers/mtd/nand/raw/nand_macronix.c
+++ b/drivers/mtd/nand/raw/nand_macronix.c
@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/mtd/rawnand.h>
+#include "internals.h"
 
 /*
  * Macronix AC series does not support using SET/GET_FEATURES to change
diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c
index 1a5505ccbe54..b85e1c13b79e 100644
--- a/drivers/mtd/nand/raw/nand_micron.c
+++ b/drivers/mtd/nand/raw/nand_micron.c
@@ -15,9 +15,10 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/mtd/rawnand.h>
 #include <linux/slab.h>
 
+#include "internals.h"
+
 /*
  * Special Micron status bit 3 indicates that the block has been
  * corrected by on-die ECC and should be rewritten.
diff --git a/drivers/mtd/nand/raw/nand_samsung.c b/drivers/mtd/nand/raw/nand_samsung.c
index ef022f62f74c..e46d4c492ad8 100644
--- a/drivers/mtd/nand/raw/nand_samsung.c
+++ b/drivers/mtd/nand/raw/nand_samsung.c
@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/mtd/rawnand.h>
+#include "internals.h"
 
 static void samsung_nand_decode_id(struct nand_chip *chip)
 {
diff --git a/drivers/mtd/nand/raw/nand_timings.c b/drivers/mtd/nand/raw/nand_timings.c
index cb4f0007b65c..bea3062d71d6 100644
--- a/drivers/mtd/nand/raw/nand_timings.c
+++ b/drivers/mtd/nand/raw/nand_timings.c
@@ -11,7 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/err.h>
 #include <linux/export.h>
-#include <linux/mtd/rawnand.h>
+
+#include "internals.h"
 
 #define ONFI_DYN_TIMING_MAX U16_MAX
 
@@ -325,4 +326,3 @@ int onfi_fill_data_interface(struct nand_chip *chip,
 
 	return 0;
 }
-EXPORT_SYMBOL(onfi_fill_data_interface);
diff --git a/drivers/mtd/nand/raw/nand_toshiba.c b/drivers/mtd/nand/raw/nand_toshiba.c
index 952fe9e62ab4..941ddc615190 100644
--- a/drivers/mtd/nand/raw/nand_toshiba.c
+++ b/drivers/mtd/nand/raw/nand_toshiba.c
@@ -15,7 +15,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/mtd/rawnand.h>
+#include "internals.h"
 
 /* Bit for detecting BENAND */
 #define TOSHIBA_NAND_ID4_IS_BENAND		BIT(7)
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 7f0e3dc222ed..fbe7686cfc59 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -775,24 +775,6 @@ nand_get_sdr_timings(const struct nand_data_interface *conf)
 	return &conf->timings.sdr;
 }
 
-/**
- * struct nand_manufacturer_ops - NAND Manufacturer operations
- * @detect: detect the NAND memory organization and capabilities
- * @init: initialize all vendor specific fields (like the ->read_retry()
- *	  implementation) if any.
- * @cleanup: the ->init() function may have allocated resources, ->cleanup()
- *	     is here to let vendor specific code release those resources.
- * @fixup_onfi_param_page: apply vendor specific fixups to the ONFI parameter
- *			   page. This is called after the checksum is verified.
- */
-struct nand_manufacturer_ops {
-	void (*detect)(struct nand_chip *chip);
-	int (*init)(struct nand_chip *chip);
-	void (*cleanup)(struct nand_chip *chip);
-	void (*fixup_onfi_param_page)(struct nand_chip *chip,
-				      struct nand_onfi_params *p);
-};
-
 /**
  * struct nand_op_cmd_instr - Definition of a command instruction
  * @opcode: the command to issue in one cycle
@@ -1403,27 +1385,6 @@ static inline void *nand_get_manufacturer_data(struct nand_chip *chip)
 	return chip->manufacturer.priv;
 }
 
-/*
- * NAND Flash Manufacturer ID Codes
- */
-#define NAND_MFR_TOSHIBA	0x98
-#define NAND_MFR_ESMT		0xc8
-#define NAND_MFR_SAMSUNG	0xec
-#define NAND_MFR_FUJITSU	0x04
-#define NAND_MFR_NATIONAL	0x8f
-#define NAND_MFR_RENESAS	0x07
-#define NAND_MFR_STMICRO	0x20
-#define NAND_MFR_HYNIX		0xad
-#define NAND_MFR_MICRON		0x2c
-#define NAND_MFR_AMD		0x01
-#define NAND_MFR_MACRONIX	0xc2
-#define NAND_MFR_EON		0x92
-#define NAND_MFR_SANDISK	0x45
-#define NAND_MFR_INTEL		0x89
-#define NAND_MFR_ATO		0x9b
-#define NAND_MFR_WINBOND	0xef
-
-
 /*
  * A helper for defining older NAND chips where the second ID byte fully
  * defined the chip, including the geometry (chip size, eraseblock size, page
@@ -1503,46 +1464,7 @@ struct nand_flash_dev {
 	int onfi_timing_mode_default;
 };
 
-/**
- * struct nand_manufacturer - NAND Flash Manufacturer structure
- * @name:	Manufacturer name
- * @id:		manufacturer ID code of device.
- * @ops:	manufacturer operations
-*/
-struct nand_manufacturer {
-	int id;
-	char *name;
-	const struct nand_manufacturer_ops *ops;
-};
-
-const struct nand_manufacturer *nand_get_manufacturer(u8 id);
-
-static inline const char *
-nand_manufacturer_name(const struct nand_manufacturer *manufacturer)
-{
-	return manufacturer ? manufacturer->name : "Unknown";
-}
-
-extern struct nand_flash_dev nand_flash_ids[];
-
-extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops;
-extern const struct nand_manufacturer_ops samsung_nand_manuf_ops;
-extern const struct nand_manufacturer_ops hynix_nand_manuf_ops;
-extern const struct nand_manufacturer_ops micron_nand_manuf_ops;
-extern const struct nand_manufacturer_ops amd_nand_manuf_ops;
-extern const struct nand_manufacturer_ops macronix_nand_manuf_ops;
-
 int nand_create_bbt(struct nand_chip *chip);
-int nand_markbad_bbt(struct nand_chip *chip, loff_t offs);
-int nand_markbad_bbm(struct nand_chip *chip, loff_t ofs);
-int nand_isreserved_bbt(struct nand_chip *chip, loff_t offs);
-int nand_isbad_bbt(struct nand_chip *chip, loff_t offs, int allowbbt);
-int nand_erase_nand(struct nand_chip *chip, struct erase_info *instr,
-		    int allowbbt);
-
-int onfi_fill_data_interface(struct nand_chip *chip,
-			     enum nand_data_interface_type type,
-			     int timing_mode);
 
 /*
  * Check if it is a SLC nand.
@@ -1574,7 +1496,6 @@ static inline int nand_opcode_8bits(unsigned int command)
 	return 0;
 }
 
-
 int nand_check_erased_ecc_chunk(void *data, int datalen,
 				void *ecc, int ecclen,
 				void *extraoob, int extraooblen,
@@ -1586,18 +1507,9 @@ int nand_ecc_choose_conf(struct nand_chip *chip,
 /* Default write_oob implementation */
 int nand_write_oob_std(struct nand_chip *chip, int page);
 
-/* Default write_oob syndrome implementation */
-int nand_write_oob_syndrome(struct nand_chip *chip, int page);
-
 /* Default read_oob implementation */
 int nand_read_oob_std(struct nand_chip *chip, int page);
 
-/* Default read_oob syndrome implementation */
-int nand_read_oob_syndrome(struct nand_chip *chip, int page);
-
-/* Wrapper to use in order for controllers/vendors to GET/SET FEATURES */
-int nand_get_features(struct nand_chip *chip, int addr, u8 *subfeature_param);
-int nand_set_features(struct nand_chip *chip, int addr, u8 *subfeature_param);
 /* Stub used by drivers that do not support GET/SET FEATURES operations */
 int nand_get_set_features_notsupp(struct nand_chip *chip, int addr,
 				  u8 *subfeature_param);
@@ -1605,14 +1517,10 @@ int nand_get_set_features_notsupp(struct nand_chip *chip, int addr,
 /* Default read_page_raw implementation */
 int nand_read_page_raw(struct nand_chip *chip, uint8_t *buf, int oob_required,
 		       int page);
-int nand_read_page_raw_notsupp(struct nand_chip *chip, u8 *buf,
-			       int oob_required, int page);
 
 /* Default write_page_raw implementation */
 int nand_write_page_raw(struct nand_chip *chip, const uint8_t *buf,
 			int oob_required, int page);
-int nand_write_page_raw_notsupp(struct nand_chip *chip, const u8 *buf,
-				int oob_required, int page);
 
 /* Reset and initialize a NAND device */
 int nand_reset(struct nand_chip *chip, int chipnr);
@@ -1622,7 +1530,6 @@ int nand_reset_op(struct nand_chip *chip);
 int nand_readid_op(struct nand_chip *chip, u8 addr, void *buf,
 		   unsigned int len);
 int nand_status_op(struct nand_chip *chip, u8 *status);
-int nand_exit_status_op(struct nand_chip *chip);
 int nand_erase_op(struct nand_chip *chip, unsigned int eraseblock);
 int nand_read_page_op(struct nand_chip *chip, unsigned int page,
 		      unsigned int offset_in_page, void *buf, unsigned int len);
@@ -1666,9 +1573,6 @@ void nand_cleanup(struct nand_chip *chip);
 /* Unregister the MTD device and calls nand_cleanup() */
 void nand_release(struct nand_chip *chip);
 
-/* Default extended ID decoding function */
-void nand_decode_ext_id(struct nand_chip *chip);
-
 /*
  * External helper for controller drivers that have to implement the WAITRDY
  * instruction and have no physical pin to check it.
-- 
cgit v1.2.3


From 1c325cc5077a88510afc08b1d2c75bcf18681f21 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:50 +0200
Subject: mtd: rawnand: Move ONFI code to nand_onfi.c

This moves ONFI related code to nand_onfi.c and ONFI related
struct/macros to include/linux/mtd/onfi.h.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/Makefile    |   1 +
 drivers/mtd/nand/raw/internals.h |   7 +
 drivers/mtd/nand/raw/nand_base.c | 296 +------------------------------------
 drivers/mtd/nand/raw/nand_onfi.c | 305 +++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/onfi.h         | 178 +++++++++++++++++++++++
 include/linux/mtd/rawnand.h      | 164 +--------------------
 6 files changed, 496 insertions(+), 455 deletions(-)
 create mode 100644 drivers/mtd/nand/raw/nand_onfi.c
 create mode 100644 include/linux/mtd/onfi.h

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/Makefile b/drivers/mtd/nand/raw/Makefile
index 78f67de2e60b..cc9f50f3ad1b 100644
--- a/drivers/mtd/nand/raw/Makefile
+++ b/drivers/mtd/nand/raw/Makefile
@@ -58,6 +58,7 @@ obj-$(CONFIG_MTD_NAND_MTK)		+= mtk_ecc.o mtk_nand.o
 obj-$(CONFIG_MTD_NAND_TEGRA)		+= tegra_nand.o
 
 nand-objs := nand_base.o nand_legacy.o nand_bbt.o nand_timings.o nand_ids.o
+nand-objs += nand_onfi.o
 nand-objs += nand_amd.o
 nand-objs += nand_hynix.o
 nand-objs += nand_macronix.o
diff --git a/drivers/mtd/nand/raw/internals.h b/drivers/mtd/nand/raw/internals.h
index 289a4b8f7974..1ce720a8d756 100644
--- a/drivers/mtd/nand/raw/internals.h
+++ b/drivers/mtd/nand/raw/internals.h
@@ -88,8 +88,11 @@ int nand_read_page_raw_notsupp(struct nand_chip *chip, u8 *buf,
 int nand_write_page_raw_notsupp(struct nand_chip *chip, const u8 *buf,
 				int oob_required, int page);
 int nand_exit_status_op(struct nand_chip *chip);
+int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf,
+			    unsigned int len);
 void nand_decode_ext_id(struct nand_chip *chip);
 void panic_nand_wait(struct nand_chip *chip, unsigned long timeo);
+void sanitize_string(uint8_t *s, size_t len);
 
 /* BBT functions */
 int nand_markbad_bbt(struct nand_chip *chip, loff_t offs);
@@ -101,4 +104,8 @@ void nand_legacy_set_defaults(struct nand_chip *chip);
 void nand_legacy_adjust_cmdfunc(struct nand_chip *chip);
 int nand_legacy_check_hooks(struct nand_chip *chip);
 
+/* ONFI functions */
+u16 onfi_crc16(u16 crc, u8 const *p, size_t len);
+int nand_onfi_detect(struct nand_chip *chip);
+
 #endif /* __LINUX_RAWNAND_INTERNALS */
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 4ef00cefd5da..812e8bd6ad82 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -986,8 +986,8 @@ EXPORT_SYMBOL_GPL(nand_read_page_op);
  *
  * Returns 0 on success, a negative error code otherwise.
  */
-static int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf,
-				   unsigned int len)
+int nand_read_param_page_op(struct nand_chip *chip, u8 page, void *buf,
+			    unsigned int len)
 {
 	unsigned int i;
 	u8 *p = buf;
@@ -4370,7 +4370,7 @@ static void nand_set_defaults(struct nand_chip *chip)
 }
 
 /* Sanitize ONFI strings so we can safely print them */
-static void sanitize_string(uint8_t *s, size_t len)
+void sanitize_string(uint8_t *s, size_t len)
 {
 	ssize_t i;
 
@@ -4387,294 +4387,6 @@ static void sanitize_string(uint8_t *s, size_t len)
 	strim(s);
 }
 
-static u16 onfi_crc16(u16 crc, u8 const *p, size_t len)
-{
-	int i;
-	while (len--) {
-		crc ^= *p++ << 8;
-		for (i = 0; i < 8; i++)
-			crc = (crc << 1) ^ ((crc & 0x8000) ? 0x8005 : 0);
-	}
-
-	return crc;
-}
-
-/* Parse the Extended Parameter Page. */
-static int nand_flash_detect_ext_param_page(struct nand_chip *chip,
-					    struct nand_onfi_params *p)
-{
-	struct onfi_ext_param_page *ep;
-	struct onfi_ext_section *s;
-	struct onfi_ext_ecc_info *ecc;
-	uint8_t *cursor;
-	int ret;
-	int len;
-	int i;
-
-	len = le16_to_cpu(p->ext_param_page_length) * 16;
-	ep = kmalloc(len, GFP_KERNEL);
-	if (!ep)
-		return -ENOMEM;
-
-	/* Send our own NAND_CMD_PARAM. */
-	ret = nand_read_param_page_op(chip, 0, NULL, 0);
-	if (ret)
-		goto ext_out;
-
-	/* Use the Change Read Column command to skip the ONFI param pages. */
-	ret = nand_change_read_column_op(chip,
-					 sizeof(*p) * p->num_of_param_pages,
-					 ep, len, true);
-	if (ret)
-		goto ext_out;
-
-	ret = -EINVAL;
-	if ((onfi_crc16(ONFI_CRC_BASE, ((uint8_t *)ep) + 2, len - 2)
-		!= le16_to_cpu(ep->crc))) {
-		pr_debug("fail in the CRC.\n");
-		goto ext_out;
-	}
-
-	/*
-	 * Check the signature.
-	 * Do not strictly follow the ONFI spec, maybe changed in future.
-	 */
-	if (strncmp(ep->sig, "EPPS", 4)) {
-		pr_debug("The signature is invalid.\n");
-		goto ext_out;
-	}
-
-	/* find the ECC section. */
-	cursor = (uint8_t *)(ep + 1);
-	for (i = 0; i < ONFI_EXT_SECTION_MAX; i++) {
-		s = ep->sections + i;
-		if (s->type == ONFI_SECTION_TYPE_2)
-			break;
-		cursor += s->length * 16;
-	}
-	if (i == ONFI_EXT_SECTION_MAX) {
-		pr_debug("We can not find the ECC section.\n");
-		goto ext_out;
-	}
-
-	/* get the info we want. */
-	ecc = (struct onfi_ext_ecc_info *)cursor;
-
-	if (!ecc->codeword_size) {
-		pr_debug("Invalid codeword size\n");
-		goto ext_out;
-	}
-
-	chip->ecc_strength_ds = ecc->ecc_bits;
-	chip->ecc_step_ds = 1 << ecc->codeword_size;
-	ret = 0;
-
-ext_out:
-	kfree(ep);
-	return ret;
-}
-
-/*
- * Recover data with bit-wise majority
- */
-static void nand_bit_wise_majority(const void **srcbufs,
-				   unsigned int nsrcbufs,
-				   void *dstbuf,
-				   unsigned int bufsize)
-{
-	int i, j, k;
-
-	for (i = 0; i < bufsize; i++) {
-		u8 val = 0;
-
-		for (j = 0; j < 8; j++) {
-			unsigned int cnt = 0;
-
-			for (k = 0; k < nsrcbufs; k++) {
-				const u8 *srcbuf = srcbufs[k];
-
-				if (srcbuf[i] & BIT(j))
-					cnt++;
-			}
-
-			if (cnt > nsrcbufs / 2)
-				val |= BIT(j);
-		}
-
-		((u8 *)dstbuf)[i] = val;
-	}
-}
-
-/*
- * Check if the NAND chip is ONFI compliant, returns 1 if it is, 0 otherwise.
- */
-static int nand_flash_detect_onfi(struct nand_chip *chip)
-{
-	struct mtd_info *mtd = nand_to_mtd(chip);
-	struct nand_onfi_params *p;
-	struct onfi_params *onfi;
-	int onfi_version = 0;
-	char id[4];
-	int i, ret, val;
-
-	/* Try ONFI for unknown chip or LP */
-	ret = nand_readid_op(chip, 0x20, id, sizeof(id));
-	if (ret || strncmp(id, "ONFI", 4))
-		return 0;
-
-	/* ONFI chip: allocate a buffer to hold its parameter page */
-	p = kzalloc((sizeof(*p) * 3), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	ret = nand_read_param_page_op(chip, 0, NULL, 0);
-	if (ret) {
-		ret = 0;
-		goto free_onfi_param_page;
-	}
-
-	for (i = 0; i < 3; i++) {
-		ret = nand_read_data_op(chip, &p[i], sizeof(*p), true);
-		if (ret) {
-			ret = 0;
-			goto free_onfi_param_page;
-		}
-
-		if (onfi_crc16(ONFI_CRC_BASE, (u8 *)&p[i], 254) ==
-				le16_to_cpu(p->crc)) {
-			if (i)
-				memcpy(p, &p[i], sizeof(*p));
-			break;
-		}
-	}
-
-	if (i == 3) {
-		const void *srcbufs[3] = {p, p + 1, p + 2};
-
-		pr_warn("Could not find a valid ONFI parameter page, trying bit-wise majority to recover it\n");
-		nand_bit_wise_majority(srcbufs, ARRAY_SIZE(srcbufs), p,
-				       sizeof(*p));
-
-		if (onfi_crc16(ONFI_CRC_BASE, (u8 *)p, 254) !=
-				le16_to_cpu(p->crc)) {
-			pr_err("ONFI parameter recovery failed, aborting\n");
-			goto free_onfi_param_page;
-		}
-	}
-
-	if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
-	    chip->manufacturer.desc->ops->fixup_onfi_param_page)
-		chip->manufacturer.desc->ops->fixup_onfi_param_page(chip, p);
-
-	/* Check version */
-	val = le16_to_cpu(p->revision);
-	if (val & ONFI_VERSION_2_3)
-		onfi_version = 23;
-	else if (val & ONFI_VERSION_2_2)
-		onfi_version = 22;
-	else if (val & ONFI_VERSION_2_1)
-		onfi_version = 21;
-	else if (val & ONFI_VERSION_2_0)
-		onfi_version = 20;
-	else if (val & ONFI_VERSION_1_0)
-		onfi_version = 10;
-
-	if (!onfi_version) {
-		pr_info("unsupported ONFI version: %d\n", val);
-		goto free_onfi_param_page;
-	}
-
-	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
-	sanitize_string(p->model, sizeof(p->model));
-	chip->parameters.model = kstrdup(p->model, GFP_KERNEL);
-	if (!chip->parameters.model) {
-		ret = -ENOMEM;
-		goto free_onfi_param_page;
-	}
-
-	mtd->writesize = le32_to_cpu(p->byte_per_page);
-
-	/*
-	 * pages_per_block and blocks_per_lun may not be a power-of-2 size
-	 * (don't ask me who thought of this...). MTD assumes that these
-	 * dimensions will be power-of-2, so just truncate the remaining area.
-	 */
-	mtd->erasesize = 1 << (fls(le32_to_cpu(p->pages_per_block)) - 1);
-	mtd->erasesize *= mtd->writesize;
-
-	mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page);
-
-	/* See erasesize comment */
-	chip->chipsize = 1 << (fls(le32_to_cpu(p->blocks_per_lun)) - 1);
-	chip->chipsize *= (uint64_t)mtd->erasesize * p->lun_count;
-	chip->bits_per_cell = p->bits_per_cell;
-
-	chip->max_bb_per_die = le16_to_cpu(p->bb_per_lun);
-	chip->blocks_per_die = le32_to_cpu(p->blocks_per_lun);
-
-	if (le16_to_cpu(p->features) & ONFI_FEATURE_16_BIT_BUS)
-		chip->options |= NAND_BUSWIDTH_16;
-
-	if (p->ecc_bits != 0xff) {
-		chip->ecc_strength_ds = p->ecc_bits;
-		chip->ecc_step_ds = 512;
-	} else if (onfi_version >= 21 &&
-		(le16_to_cpu(p->features) & ONFI_FEATURE_EXT_PARAM_PAGE)) {
-
-		/*
-		 * The nand_flash_detect_ext_param_page() uses the
-		 * Change Read Column command which maybe not supported
-		 * by the chip->legacy.cmdfunc. So try to update the
-		 * chip->legacy.cmdfunc now. We do not replace user supplied
-		 * command function.
-		 */
-		nand_legacy_adjust_cmdfunc(chip);
-
-		/* The Extended Parameter Page is supported since ONFI 2.1. */
-		if (nand_flash_detect_ext_param_page(chip, p))
-			pr_warn("Failed to detect ONFI extended param page\n");
-	} else {
-		pr_warn("Could not retrieve ONFI ECC requirements\n");
-	}
-
-	/* Save some parameters from the parameter page for future use */
-	if (le16_to_cpu(p->opt_cmd) & ONFI_OPT_CMD_SET_GET_FEATURES) {
-		chip->parameters.supports_set_get_features = true;
-		bitmap_set(chip->parameters.get_feature_list,
-			   ONFI_FEATURE_ADDR_TIMING_MODE, 1);
-		bitmap_set(chip->parameters.set_feature_list,
-			   ONFI_FEATURE_ADDR_TIMING_MODE, 1);
-	}
-
-	onfi = kzalloc(sizeof(*onfi), GFP_KERNEL);
-	if (!onfi) {
-		ret = -ENOMEM;
-		goto free_model;
-	}
-
-	onfi->version = onfi_version;
-	onfi->tPROG = le16_to_cpu(p->t_prog);
-	onfi->tBERS = le16_to_cpu(p->t_bers);
-	onfi->tR = le16_to_cpu(p->t_r);
-	onfi->tCCS = le16_to_cpu(p->t_ccs);
-	onfi->async_timing_mode = le16_to_cpu(p->async_timing_mode);
-	onfi->vendor_revision = le16_to_cpu(p->vendor_revision);
-	memcpy(onfi->vendor, p->vendor, sizeof(p->vendor));
-	chip->parameters.onfi = onfi;
-
-	/* Identification done, free the full ONFI parameter page and exit */
-	kfree(p);
-
-	return 1;
-
-free_model:
-	kfree(chip->parameters.model);
-free_onfi_param_page:
-	kfree(p);
-
-	return ret;
-}
-
 /*
  * Check if the NAND chip is JEDEC compliant, returns 1 if it is, 0 otherwise.
  */
@@ -5076,7 +4788,7 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 
 	if (!type->name || !type->pagesize) {
 		/* Check if the chip is ONFI compliant */
-		ret = nand_flash_detect_onfi(chip);
+		ret = nand_onfi_detect(chip);
 		if (ret < 0)
 			return ret;
 		else if (ret)
diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c
new file mode 100644
index 000000000000..d8184cf591ad
--- /dev/null
+++ b/drivers/mtd/nand/raw/nand_onfi.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com)
+ *		  2002-2006 Thomas Gleixner (tglx@linutronix.de)
+ *
+ *  Credits:
+ *	David Woodhouse for adding multichip support
+ *
+ *	Aleph One Ltd. and Toby Churchill Ltd. for supporting the
+ *	rework for 2K page size chips
+ *
+ * This file contains all ONFI helpers.
+ */
+
+#include <linux/slab.h>
+
+#include "internals.h"
+
+u16 onfi_crc16(u16 crc, u8 const *p, size_t len)
+{
+	int i;
+	while (len--) {
+		crc ^= *p++ << 8;
+		for (i = 0; i < 8; i++)
+			crc = (crc << 1) ^ ((crc & 0x8000) ? 0x8005 : 0);
+	}
+
+	return crc;
+}
+
+/* Parse the Extended Parameter Page. */
+static int nand_flash_detect_ext_param_page(struct nand_chip *chip,
+					    struct nand_onfi_params *p)
+{
+	struct onfi_ext_param_page *ep;
+	struct onfi_ext_section *s;
+	struct onfi_ext_ecc_info *ecc;
+	uint8_t *cursor;
+	int ret;
+	int len;
+	int i;
+
+	len = le16_to_cpu(p->ext_param_page_length) * 16;
+	ep = kmalloc(len, GFP_KERNEL);
+	if (!ep)
+		return -ENOMEM;
+
+	/* Send our own NAND_CMD_PARAM. */
+	ret = nand_read_param_page_op(chip, 0, NULL, 0);
+	if (ret)
+		goto ext_out;
+
+	/* Use the Change Read Column command to skip the ONFI param pages. */
+	ret = nand_change_read_column_op(chip,
+					 sizeof(*p) * p->num_of_param_pages,
+					 ep, len, true);
+	if (ret)
+		goto ext_out;
+
+	ret = -EINVAL;
+	if ((onfi_crc16(ONFI_CRC_BASE, ((uint8_t *)ep) + 2, len - 2)
+		!= le16_to_cpu(ep->crc))) {
+		pr_debug("fail in the CRC.\n");
+		goto ext_out;
+	}
+
+	/*
+	 * Check the signature.
+	 * Do not strictly follow the ONFI spec, maybe changed in future.
+	 */
+	if (strncmp(ep->sig, "EPPS", 4)) {
+		pr_debug("The signature is invalid.\n");
+		goto ext_out;
+	}
+
+	/* find the ECC section. */
+	cursor = (uint8_t *)(ep + 1);
+	for (i = 0; i < ONFI_EXT_SECTION_MAX; i++) {
+		s = ep->sections + i;
+		if (s->type == ONFI_SECTION_TYPE_2)
+			break;
+		cursor += s->length * 16;
+	}
+	if (i == ONFI_EXT_SECTION_MAX) {
+		pr_debug("We can not find the ECC section.\n");
+		goto ext_out;
+	}
+
+	/* get the info we want. */
+	ecc = (struct onfi_ext_ecc_info *)cursor;
+
+	if (!ecc->codeword_size) {
+		pr_debug("Invalid codeword size\n");
+		goto ext_out;
+	}
+
+	chip->ecc_strength_ds = ecc->ecc_bits;
+	chip->ecc_step_ds = 1 << ecc->codeword_size;
+	ret = 0;
+
+ext_out:
+	kfree(ep);
+	return ret;
+}
+
+/*
+ * Recover data with bit-wise majority
+ */
+static void nand_bit_wise_majority(const void **srcbufs,
+				   unsigned int nsrcbufs,
+				   void *dstbuf,
+				   unsigned int bufsize)
+{
+	int i, j, k;
+
+	for (i = 0; i < bufsize; i++) {
+		u8 val = 0;
+
+		for (j = 0; j < 8; j++) {
+			unsigned int cnt = 0;
+
+			for (k = 0; k < nsrcbufs; k++) {
+				const u8 *srcbuf = srcbufs[k];
+
+				if (srcbuf[i] & BIT(j))
+					cnt++;
+			}
+
+			if (cnt > nsrcbufs / 2)
+				val |= BIT(j);
+		}
+
+		((u8 *)dstbuf)[i] = val;
+	}
+}
+
+/*
+ * Check if the NAND chip is ONFI compliant, returns 1 if it is, 0 otherwise.
+ */
+int nand_onfi_detect(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct nand_onfi_params *p;
+	struct onfi_params *onfi;
+	int onfi_version = 0;
+	char id[4];
+	int i, ret, val;
+
+	/* Try ONFI for unknown chip or LP */
+	ret = nand_readid_op(chip, 0x20, id, sizeof(id));
+	if (ret || strncmp(id, "ONFI", 4))
+		return 0;
+
+	/* ONFI chip: allocate a buffer to hold its parameter page */
+	p = kzalloc((sizeof(*p) * 3), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	ret = nand_read_param_page_op(chip, 0, NULL, 0);
+	if (ret) {
+		ret = 0;
+		goto free_onfi_param_page;
+	}
+
+	for (i = 0; i < 3; i++) {
+		ret = nand_read_data_op(chip, &p[i], sizeof(*p), true);
+		if (ret) {
+			ret = 0;
+			goto free_onfi_param_page;
+		}
+
+		if (onfi_crc16(ONFI_CRC_BASE, (u8 *)&p[i], 254) ==
+				le16_to_cpu(p->crc)) {
+			if (i)
+				memcpy(p, &p[i], sizeof(*p));
+			break;
+		}
+	}
+
+	if (i == 3) {
+		const void *srcbufs[3] = {p, p + 1, p + 2};
+
+		pr_warn("Could not find a valid ONFI parameter page, trying bit-wise majority to recover it\n");
+		nand_bit_wise_majority(srcbufs, ARRAY_SIZE(srcbufs), p,
+				       sizeof(*p));
+
+		if (onfi_crc16(ONFI_CRC_BASE, (u8 *)p, 254) !=
+				le16_to_cpu(p->crc)) {
+			pr_err("ONFI parameter recovery failed, aborting\n");
+			goto free_onfi_param_page;
+		}
+	}
+
+	if (chip->manufacturer.desc && chip->manufacturer.desc->ops &&
+	    chip->manufacturer.desc->ops->fixup_onfi_param_page)
+		chip->manufacturer.desc->ops->fixup_onfi_param_page(chip, p);
+
+	/* Check version */
+	val = le16_to_cpu(p->revision);
+	if (val & ONFI_VERSION_2_3)
+		onfi_version = 23;
+	else if (val & ONFI_VERSION_2_2)
+		onfi_version = 22;
+	else if (val & ONFI_VERSION_2_1)
+		onfi_version = 21;
+	else if (val & ONFI_VERSION_2_0)
+		onfi_version = 20;
+	else if (val & ONFI_VERSION_1_0)
+		onfi_version = 10;
+
+	if (!onfi_version) {
+		pr_info("unsupported ONFI version: %d\n", val);
+		goto free_onfi_param_page;
+	}
+
+	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
+	sanitize_string(p->model, sizeof(p->model));
+	chip->parameters.model = kstrdup(p->model, GFP_KERNEL);
+	if (!chip->parameters.model) {
+		ret = -ENOMEM;
+		goto free_onfi_param_page;
+	}
+
+	mtd->writesize = le32_to_cpu(p->byte_per_page);
+
+	/*
+	 * pages_per_block and blocks_per_lun may not be a power-of-2 size
+	 * (don't ask me who thought of this...). MTD assumes that these
+	 * dimensions will be power-of-2, so just truncate the remaining area.
+	 */
+	mtd->erasesize = 1 << (fls(le32_to_cpu(p->pages_per_block)) - 1);
+	mtd->erasesize *= mtd->writesize;
+
+	mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page);
+
+	/* See erasesize comment */
+	chip->chipsize = 1 << (fls(le32_to_cpu(p->blocks_per_lun)) - 1);
+	chip->chipsize *= (uint64_t)mtd->erasesize * p->lun_count;
+	chip->bits_per_cell = p->bits_per_cell;
+
+	chip->max_bb_per_die = le16_to_cpu(p->bb_per_lun);
+	chip->blocks_per_die = le32_to_cpu(p->blocks_per_lun);
+
+	if (le16_to_cpu(p->features) & ONFI_FEATURE_16_BIT_BUS)
+		chip->options |= NAND_BUSWIDTH_16;
+
+	if (p->ecc_bits != 0xff) {
+		chip->ecc_strength_ds = p->ecc_bits;
+		chip->ecc_step_ds = 512;
+	} else if (onfi_version >= 21 &&
+		(le16_to_cpu(p->features) & ONFI_FEATURE_EXT_PARAM_PAGE)) {
+
+		/*
+		 * The nand_flash_detect_ext_param_page() uses the
+		 * Change Read Column command which maybe not supported
+		 * by the chip->legacy.cmdfunc. So try to update the
+		 * chip->legacy.cmdfunc now. We do not replace user supplied
+		 * command function.
+		 */
+		nand_legacy_adjust_cmdfunc(chip);
+
+		/* The Extended Parameter Page is supported since ONFI 2.1. */
+		if (nand_flash_detect_ext_param_page(chip, p))
+			pr_warn("Failed to detect ONFI extended param page\n");
+	} else {
+		pr_warn("Could not retrieve ONFI ECC requirements\n");
+	}
+
+	/* Save some parameters from the parameter page for future use */
+	if (le16_to_cpu(p->opt_cmd) & ONFI_OPT_CMD_SET_GET_FEATURES) {
+		chip->parameters.supports_set_get_features = true;
+		bitmap_set(chip->parameters.get_feature_list,
+			   ONFI_FEATURE_ADDR_TIMING_MODE, 1);
+		bitmap_set(chip->parameters.set_feature_list,
+			   ONFI_FEATURE_ADDR_TIMING_MODE, 1);
+	}
+
+	onfi = kzalloc(sizeof(*onfi), GFP_KERNEL);
+	if (!onfi) {
+		ret = -ENOMEM;
+		goto free_model;
+	}
+
+	onfi->version = onfi_version;
+	onfi->tPROG = le16_to_cpu(p->t_prog);
+	onfi->tBERS = le16_to_cpu(p->t_bers);
+	onfi->tR = le16_to_cpu(p->t_r);
+	onfi->tCCS = le16_to_cpu(p->t_ccs);
+	onfi->async_timing_mode = le16_to_cpu(p->async_timing_mode);
+	onfi->vendor_revision = le16_to_cpu(p->vendor_revision);
+	memcpy(onfi->vendor, p->vendor, sizeof(p->vendor));
+	chip->parameters.onfi = onfi;
+
+	/* Identification done, free the full ONFI parameter page and exit */
+	kfree(p);
+
+	return 1;
+
+free_model:
+	kfree(chip->parameters.model);
+free_onfi_param_page:
+	kfree(p);
+
+	return ret;
+}
diff --git a/include/linux/mtd/onfi.h b/include/linux/mtd/onfi.h
new file mode 100644
index 000000000000..339ac798568e
--- /dev/null
+++ b/include/linux/mtd/onfi.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2000-2010 David Woodhouse <dwmw2@infradead.org>
+ *			 Steven J. Hill <sjhill@realitydiluted.com>
+ *			 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * Contains all ONFI related definitions
+ */
+
+#ifndef __LINUX_MTD_ONFI_H
+#define __LINUX_MTD_ONFI_H
+
+#include <linux/types.h>
+
+/* ONFI version bits */
+#define ONFI_VERSION_1_0		BIT(1)
+#define ONFI_VERSION_2_0		BIT(2)
+#define ONFI_VERSION_2_1		BIT(3)
+#define ONFI_VERSION_2_2		BIT(4)
+#define ONFI_VERSION_2_3		BIT(5)
+#define ONFI_VERSION_3_0		BIT(6)
+#define ONFI_VERSION_3_1		BIT(7)
+#define ONFI_VERSION_3_2		BIT(8)
+#define ONFI_VERSION_4_0		BIT(9)
+
+/* ONFI features */
+#define ONFI_FEATURE_16_BIT_BUS		(1 << 0)
+#define ONFI_FEATURE_EXT_PARAM_PAGE	(1 << 7)
+
+/* ONFI timing mode, used in both asynchronous and synchronous mode */
+#define ONFI_TIMING_MODE_0		(1 << 0)
+#define ONFI_TIMING_MODE_1		(1 << 1)
+#define ONFI_TIMING_MODE_2		(1 << 2)
+#define ONFI_TIMING_MODE_3		(1 << 3)
+#define ONFI_TIMING_MODE_4		(1 << 4)
+#define ONFI_TIMING_MODE_5		(1 << 5)
+#define ONFI_TIMING_MODE_UNKNOWN	(1 << 6)
+
+/* ONFI feature number/address */
+#define ONFI_FEATURE_NUMBER		256
+#define ONFI_FEATURE_ADDR_TIMING_MODE	0x1
+
+/* Vendor-specific feature address (Micron) */
+#define ONFI_FEATURE_ADDR_READ_RETRY	0x89
+#define ONFI_FEATURE_ON_DIE_ECC		0x90
+#define   ONFI_FEATURE_ON_DIE_ECC_EN	BIT(3)
+
+/* ONFI subfeature parameters length */
+#define ONFI_SUBFEATURE_PARAM_LEN	4
+
+/* ONFI optional commands SET/GET FEATURES supported? */
+#define ONFI_OPT_CMD_SET_GET_FEATURES	(1 << 2)
+
+struct nand_onfi_params {
+	/* rev info and features block */
+	/* 'O' 'N' 'F' 'I'  */
+	u8 sig[4];
+	__le16 revision;
+	__le16 features;
+	__le16 opt_cmd;
+	u8 reserved0[2];
+	__le16 ext_param_page_length; /* since ONFI 2.1 */
+	u8 num_of_param_pages;        /* since ONFI 2.1 */
+	u8 reserved1[17];
+
+	/* manufacturer information block */
+	char manufacturer[12];
+	char model[20];
+	u8 jedec_id;
+	__le16 date_code;
+	u8 reserved2[13];
+
+	/* memory organization block */
+	__le32 byte_per_page;
+	__le16 spare_bytes_per_page;
+	__le32 data_bytes_per_ppage;
+	__le16 spare_bytes_per_ppage;
+	__le32 pages_per_block;
+	__le32 blocks_per_lun;
+	u8 lun_count;
+	u8 addr_cycles;
+	u8 bits_per_cell;
+	__le16 bb_per_lun;
+	__le16 block_endurance;
+	u8 guaranteed_good_blocks;
+	__le16 guaranteed_block_endurance;
+	u8 programs_per_page;
+	u8 ppage_attr;
+	u8 ecc_bits;
+	u8 interleaved_bits;
+	u8 interleaved_ops;
+	u8 reserved3[13];
+
+	/* electrical parameter block */
+	u8 io_pin_capacitance_max;
+	__le16 async_timing_mode;
+	__le16 program_cache_timing_mode;
+	__le16 t_prog;
+	__le16 t_bers;
+	__le16 t_r;
+	__le16 t_ccs;
+	__le16 src_sync_timing_mode;
+	u8 src_ssync_features;
+	__le16 clk_pin_capacitance_typ;
+	__le16 io_pin_capacitance_typ;
+	__le16 input_pin_capacitance_typ;
+	u8 input_pin_capacitance_max;
+	u8 driver_strength_support;
+	__le16 t_int_r;
+	__le16 t_adl;
+	u8 reserved4[8];
+
+	/* vendor */
+	__le16 vendor_revision;
+	u8 vendor[88];
+
+	__le16 crc;
+} __packed;
+
+#define ONFI_CRC_BASE	0x4F4E
+
+/* Extended ECC information Block Definition (since ONFI 2.1) */
+struct onfi_ext_ecc_info {
+	u8 ecc_bits;
+	u8 codeword_size;
+	__le16 bb_per_lun;
+	__le16 block_endurance;
+	u8 reserved[2];
+} __packed;
+
+#define ONFI_SECTION_TYPE_0	0	/* Unused section. */
+#define ONFI_SECTION_TYPE_1	1	/* for additional sections. */
+#define ONFI_SECTION_TYPE_2	2	/* for ECC information. */
+struct onfi_ext_section {
+	u8 type;
+	u8 length;
+} __packed;
+
+#define ONFI_EXT_SECTION_MAX 8
+
+/* Extended Parameter Page Definition (since ONFI 2.1) */
+struct onfi_ext_param_page {
+	__le16 crc;
+	u8 sig[4];             /* 'E' 'P' 'P' 'S' */
+	u8 reserved0[10];
+	struct onfi_ext_section sections[ONFI_EXT_SECTION_MAX];
+
+	/*
+	 * The actual size of the Extended Parameter Page is in
+	 * @ext_param_page_length of nand_onfi_params{}.
+	 * The following are the variable length sections.
+	 * So we do not add any fields below. Please see the ONFI spec.
+	 */
+} __packed;
+
+/**
+ * struct onfi_params - ONFI specific parameters that will be reused
+ * @version: ONFI version (BCD encoded), 0 if ONFI is not supported
+ * @tPROG: Page program time
+ * @tBERS: Block erase time
+ * @tR: Page read time
+ * @tCCS: Change column setup time
+ * @async_timing_mode: Supported asynchronous timing mode
+ * @vendor_revision: Vendor specific revision number
+ * @vendor: Vendor specific data
+ */
+struct onfi_params {
+	int version;
+	u16 tPROG;
+	u16 tBERS;
+	u16 tR;
+	u16 tCCS;
+	u16 async_timing_mode;
+	u16 vendor_revision;
+	u8 vendor[88];
+};
+
+#endif /* __LINUX_MTD_ONFI_H */
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index fbe7686cfc59..f4fc0cce7c55 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -21,6 +21,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/flashchip.h>
 #include <linux/mtd/bbm.h>
+#include <linux/mtd/onfi.h>
 #include <linux/of.h>
 #include <linux/types.h>
 
@@ -204,147 +205,6 @@ enum nand_ecc_algo {
 #define NAND_CI_CELLTYPE_MSK	0x0C
 #define NAND_CI_CELLTYPE_SHIFT	2
 
-/* ONFI version bits */
-#define ONFI_VERSION_1_0		BIT(1)
-#define ONFI_VERSION_2_0		BIT(2)
-#define ONFI_VERSION_2_1		BIT(3)
-#define ONFI_VERSION_2_2		BIT(4)
-#define ONFI_VERSION_2_3		BIT(5)
-#define ONFI_VERSION_3_0		BIT(6)
-#define ONFI_VERSION_3_1		BIT(7)
-#define ONFI_VERSION_3_2		BIT(8)
-#define ONFI_VERSION_4_0		BIT(9)
-
-/* ONFI features */
-#define ONFI_FEATURE_16_BIT_BUS		(1 << 0)
-#define ONFI_FEATURE_EXT_PARAM_PAGE	(1 << 7)
-
-/* ONFI timing mode, used in both asynchronous and synchronous mode */
-#define ONFI_TIMING_MODE_0		(1 << 0)
-#define ONFI_TIMING_MODE_1		(1 << 1)
-#define ONFI_TIMING_MODE_2		(1 << 2)
-#define ONFI_TIMING_MODE_3		(1 << 3)
-#define ONFI_TIMING_MODE_4		(1 << 4)
-#define ONFI_TIMING_MODE_5		(1 << 5)
-#define ONFI_TIMING_MODE_UNKNOWN	(1 << 6)
-
-/* ONFI feature number/address */
-#define ONFI_FEATURE_NUMBER		256
-#define ONFI_FEATURE_ADDR_TIMING_MODE	0x1
-
-/* Vendor-specific feature address (Micron) */
-#define ONFI_FEATURE_ADDR_READ_RETRY	0x89
-#define ONFI_FEATURE_ON_DIE_ECC		0x90
-#define   ONFI_FEATURE_ON_DIE_ECC_EN	BIT(3)
-
-/* ONFI subfeature parameters length */
-#define ONFI_SUBFEATURE_PARAM_LEN	4
-
-/* ONFI optional commands SET/GET FEATURES supported? */
-#define ONFI_OPT_CMD_SET_GET_FEATURES	(1 << 2)
-
-struct nand_onfi_params {
-	/* rev info and features block */
-	/* 'O' 'N' 'F' 'I'  */
-	u8 sig[4];
-	__le16 revision;
-	__le16 features;
-	__le16 opt_cmd;
-	u8 reserved0[2];
-	__le16 ext_param_page_length; /* since ONFI 2.1 */
-	u8 num_of_param_pages;        /* since ONFI 2.1 */
-	u8 reserved1[17];
-
-	/* manufacturer information block */
-	char manufacturer[12];
-	char model[20];
-	u8 jedec_id;
-	__le16 date_code;
-	u8 reserved2[13];
-
-	/* memory organization block */
-	__le32 byte_per_page;
-	__le16 spare_bytes_per_page;
-	__le32 data_bytes_per_ppage;
-	__le16 spare_bytes_per_ppage;
-	__le32 pages_per_block;
-	__le32 blocks_per_lun;
-	u8 lun_count;
-	u8 addr_cycles;
-	u8 bits_per_cell;
-	__le16 bb_per_lun;
-	__le16 block_endurance;
-	u8 guaranteed_good_blocks;
-	__le16 guaranteed_block_endurance;
-	u8 programs_per_page;
-	u8 ppage_attr;
-	u8 ecc_bits;
-	u8 interleaved_bits;
-	u8 interleaved_ops;
-	u8 reserved3[13];
-
-	/* electrical parameter block */
-	u8 io_pin_capacitance_max;
-	__le16 async_timing_mode;
-	__le16 program_cache_timing_mode;
-	__le16 t_prog;
-	__le16 t_bers;
-	__le16 t_r;
-	__le16 t_ccs;
-	__le16 src_sync_timing_mode;
-	u8 src_ssync_features;
-	__le16 clk_pin_capacitance_typ;
-	__le16 io_pin_capacitance_typ;
-	__le16 input_pin_capacitance_typ;
-	u8 input_pin_capacitance_max;
-	u8 driver_strength_support;
-	__le16 t_int_r;
-	__le16 t_adl;
-	u8 reserved4[8];
-
-	/* vendor */
-	__le16 vendor_revision;
-	u8 vendor[88];
-
-	__le16 crc;
-} __packed;
-
-#define ONFI_CRC_BASE	0x4F4E
-
-/* Extended ECC information Block Definition (since ONFI 2.1) */
-struct onfi_ext_ecc_info {
-	u8 ecc_bits;
-	u8 codeword_size;
-	__le16 bb_per_lun;
-	__le16 block_endurance;
-	u8 reserved[2];
-} __packed;
-
-#define ONFI_SECTION_TYPE_0	0	/* Unused section. */
-#define ONFI_SECTION_TYPE_1	1	/* for additional sections. */
-#define ONFI_SECTION_TYPE_2	2	/* for ECC information. */
-struct onfi_ext_section {
-	u8 type;
-	u8 length;
-} __packed;
-
-#define ONFI_EXT_SECTION_MAX 8
-
-/* Extended Parameter Page Definition (since ONFI 2.1) */
-struct onfi_ext_param_page {
-	__le16 crc;
-	u8 sig[4];             /* 'E' 'P' 'P' 'S' */
-	u8 reserved0[10];
-	struct onfi_ext_section sections[ONFI_EXT_SECTION_MAX];
-
-	/*
-	 * The actual size of the Extended Parameter Page is in
-	 * @ext_param_page_length of nand_onfi_params{}.
-	 * The following are the variable length sections.
-	 * So we do not add any fields below. Please see the ONFI spec.
-	 */
-} __packed;
-
 struct jedec_ecc_info {
 	u8 ecc_bits;
 	u8 codeword_size;
@@ -423,28 +283,6 @@ struct nand_jedec_params {
 	__le16 crc;
 } __packed;
 
-/**
- * struct onfi_params - ONFI specific parameters that will be reused
- * @version: ONFI version (BCD encoded), 0 if ONFI is not supported
- * @tPROG: Page program time
- * @tBERS: Block erase time
- * @tR: Page read time
- * @tCCS: Change column setup time
- * @async_timing_mode: Supported asynchronous timing mode
- * @vendor_revision: Vendor specific revision number
- * @vendor: Vendor specific data
- */
-struct onfi_params {
-	int version;
-	u16 tPROG;
-	u16 tBERS;
-	u16 tR;
-	u16 tCCS;
-	u16 async_timing_mode;
-	u16 vendor_revision;
-	u8 vendor[88];
-};
-
 /**
  * struct nand_parameters - NAND generic parameters from the parameter page
  * @model: Model name
-- 
cgit v1.2.3


From 8ae3fbf81b9cfdd1fec0451181213742b73fdf1a Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Fri, 7 Sep 2018 00:38:51 +0200
Subject: mtd: rawnand: Move JEDEC code to nand_jedec.c

This moves JEDEC related code to nand_jedec.c and JEDEC related
struct/macros to include/linux/mtd/jedec.h.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/Makefile     |   1 +
 drivers/mtd/nand/raw/internals.h  |   3 +
 drivers/mtd/nand/raw/nand_base.c  |  98 +--------------------------------
 drivers/mtd/nand/raw/nand_jedec.c | 113 ++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/jedec.h         |  91 ++++++++++++++++++++++++++++++
 include/linux/mtd/rawnand.h       |  79 +-------------------------
 6 files changed, 210 insertions(+), 175 deletions(-)
 create mode 100644 drivers/mtd/nand/raw/nand_jedec.c
 create mode 100644 include/linux/mtd/jedec.h

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/Makefile b/drivers/mtd/nand/raw/Makefile
index cc9f50f3ad1b..be2c17863ee5 100644
--- a/drivers/mtd/nand/raw/Makefile
+++ b/drivers/mtd/nand/raw/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_MTD_NAND_TEGRA)		+= tegra_nand.o
 
 nand-objs := nand_base.o nand_legacy.o nand_bbt.o nand_timings.o nand_ids.o
 nand-objs += nand_onfi.o
+nand-objs += nand_jedec.o
 nand-objs += nand_amd.o
 nand-objs += nand_hynix.o
 nand-objs += nand_macronix.o
diff --git a/drivers/mtd/nand/raw/internals.h b/drivers/mtd/nand/raw/internals.h
index 1ce720a8d756..88b5da620e7d 100644
--- a/drivers/mtd/nand/raw/internals.h
+++ b/drivers/mtd/nand/raw/internals.h
@@ -108,4 +108,7 @@ int nand_legacy_check_hooks(struct nand_chip *chip);
 u16 onfi_crc16(u16 crc, u8 const *p, size_t len);
 int nand_onfi_detect(struct nand_chip *chip);
 
+/* JEDEC functions */
+int nand_jedec_detect(struct nand_chip *chip);
+
 #endif /* __LINUX_RAWNAND_INTERNALS */
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index 812e8bd6ad82..dc3955da0426 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -4387,102 +4387,6 @@ void sanitize_string(uint8_t *s, size_t len)
 	strim(s);
 }
 
-/*
- * Check if the NAND chip is JEDEC compliant, returns 1 if it is, 0 otherwise.
- */
-static int nand_flash_detect_jedec(struct nand_chip *chip)
-{
-	struct mtd_info *mtd = nand_to_mtd(chip);
-	struct nand_jedec_params *p;
-	struct jedec_ecc_info *ecc;
-	int jedec_version = 0;
-	char id[5];
-	int i, val, ret;
-
-	/* Try JEDEC for unknown chip or LP */
-	ret = nand_readid_op(chip, 0x40, id, sizeof(id));
-	if (ret || strncmp(id, "JEDEC", sizeof(id)))
-		return 0;
-
-	/* JEDEC chip: allocate a buffer to hold its parameter page */
-	p = kzalloc(sizeof(*p), GFP_KERNEL);
-	if (!p)
-		return -ENOMEM;
-
-	ret = nand_read_param_page_op(chip, 0x40, NULL, 0);
-	if (ret) {
-		ret = 0;
-		goto free_jedec_param_page;
-	}
-
-	for (i = 0; i < 3; i++) {
-		ret = nand_read_data_op(chip, p, sizeof(*p), true);
-		if (ret) {
-			ret = 0;
-			goto free_jedec_param_page;
-		}
-
-		if (onfi_crc16(ONFI_CRC_BASE, (uint8_t *)p, 510) ==
-				le16_to_cpu(p->crc))
-			break;
-	}
-
-	if (i == 3) {
-		pr_err("Could not find valid JEDEC parameter page; aborting\n");
-		goto free_jedec_param_page;
-	}
-
-	/* Check version */
-	val = le16_to_cpu(p->revision);
-	if (val & (1 << 2))
-		jedec_version = 10;
-	else if (val & (1 << 1))
-		jedec_version = 1; /* vendor specific version */
-
-	if (!jedec_version) {
-		pr_info("unsupported JEDEC version: %d\n", val);
-		goto free_jedec_param_page;
-	}
-
-	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
-	sanitize_string(p->model, sizeof(p->model));
-	chip->parameters.model = kstrdup(p->model, GFP_KERNEL);
-	if (!chip->parameters.model) {
-		ret = -ENOMEM;
-		goto free_jedec_param_page;
-	}
-
-	mtd->writesize = le32_to_cpu(p->byte_per_page);
-
-	/* Please reference to the comment for nand_flash_detect_onfi. */
-	mtd->erasesize = 1 << (fls(le32_to_cpu(p->pages_per_block)) - 1);
-	mtd->erasesize *= mtd->writesize;
-
-	mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page);
-
-	/* Please reference to the comment for nand_flash_detect_onfi. */
-	chip->chipsize = 1 << (fls(le32_to_cpu(p->blocks_per_lun)) - 1);
-	chip->chipsize *= (uint64_t)mtd->erasesize * p->lun_count;
-	chip->bits_per_cell = p->bits_per_cell;
-
-	if (le16_to_cpu(p->features) & JEDEC_FEATURE_16_BIT_BUS)
-		chip->options |= NAND_BUSWIDTH_16;
-
-	/* ECC info */
-	ecc = &p->ecc_info[0];
-
-	if (ecc->codeword_size >= 9) {
-		chip->ecc_strength_ds = ecc->ecc_bits;
-		chip->ecc_step_ds = 1 << ecc->codeword_size;
-	} else {
-		pr_warn("Invalid codeword size\n");
-	}
-
-free_jedec_param_page:
-	kfree(p);
-	return ret;
-}
-
 /*
  * nand_id_has_period - Check if an ID string has a given wraparound period
  * @id_data: the ID string
@@ -4795,7 +4699,7 @@ static int nand_detect(struct nand_chip *chip, struct nand_flash_dev *type)
 			goto ident_done;
 
 		/* Check if the chip is JEDEC compliant */
-		ret = nand_flash_detect_jedec(chip);
+		ret = nand_jedec_detect(chip);
 		if (ret < 0)
 			return ret;
 		else if (ret)
diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c
new file mode 100644
index 000000000000..5c26492c841d
--- /dev/null
+++ b/drivers/mtd/nand/raw/nand_jedec.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2000 Steven J. Hill (sjhill@realitydiluted.com)
+ *		  2002-2006 Thomas Gleixner (tglx@linutronix.de)
+ *
+ *  Credits:
+ *	David Woodhouse for adding multichip support
+ *
+ *	Aleph One Ltd. and Toby Churchill Ltd. for supporting the
+ *	rework for 2K page size chips
+ *
+ * This file contains all ONFI helpers.
+ */
+
+#include <linux/slab.h>
+
+#include "internals.h"
+
+/*
+ * Check if the NAND chip is JEDEC compliant, returns 1 if it is, 0 otherwise.
+ */
+int nand_jedec_detect(struct nand_chip *chip)
+{
+	struct mtd_info *mtd = nand_to_mtd(chip);
+	struct nand_jedec_params *p;
+	struct jedec_ecc_info *ecc;
+	int jedec_version = 0;
+	char id[5];
+	int i, val, ret;
+
+	/* Try JEDEC for unknown chip or LP */
+	ret = nand_readid_op(chip, 0x40, id, sizeof(id));
+	if (ret || strncmp(id, "JEDEC", sizeof(id)))
+		return 0;
+
+	/* JEDEC chip: allocate a buffer to hold its parameter page */
+	p = kzalloc(sizeof(*p), GFP_KERNEL);
+	if (!p)
+		return -ENOMEM;
+
+	ret = nand_read_param_page_op(chip, 0x40, NULL, 0);
+	if (ret) {
+		ret = 0;
+		goto free_jedec_param_page;
+	}
+
+	for (i = 0; i < 3; i++) {
+		ret = nand_read_data_op(chip, p, sizeof(*p), true);
+		if (ret) {
+			ret = 0;
+			goto free_jedec_param_page;
+		}
+
+		if (onfi_crc16(ONFI_CRC_BASE, (uint8_t *)p, 510) ==
+				le16_to_cpu(p->crc))
+			break;
+	}
+
+	if (i == 3) {
+		pr_err("Could not find valid JEDEC parameter page; aborting\n");
+		goto free_jedec_param_page;
+	}
+
+	/* Check version */
+	val = le16_to_cpu(p->revision);
+	if (val & (1 << 2))
+		jedec_version = 10;
+	else if (val & (1 << 1))
+		jedec_version = 1; /* vendor specific version */
+
+	if (!jedec_version) {
+		pr_info("unsupported JEDEC version: %d\n", val);
+		goto free_jedec_param_page;
+	}
+
+	sanitize_string(p->manufacturer, sizeof(p->manufacturer));
+	sanitize_string(p->model, sizeof(p->model));
+	chip->parameters.model = kstrdup(p->model, GFP_KERNEL);
+	if (!chip->parameters.model) {
+		ret = -ENOMEM;
+		goto free_jedec_param_page;
+	}
+
+	mtd->writesize = le32_to_cpu(p->byte_per_page);
+
+	/* Please reference to the comment for nand_flash_detect_onfi. */
+	mtd->erasesize = 1 << (fls(le32_to_cpu(p->pages_per_block)) - 1);
+	mtd->erasesize *= mtd->writesize;
+
+	mtd->oobsize = le16_to_cpu(p->spare_bytes_per_page);
+
+	/* Please reference to the comment for nand_flash_detect_onfi. */
+	chip->chipsize = 1 << (fls(le32_to_cpu(p->blocks_per_lun)) - 1);
+	chip->chipsize *= (uint64_t)mtd->erasesize * p->lun_count;
+	chip->bits_per_cell = p->bits_per_cell;
+
+	if (le16_to_cpu(p->features) & JEDEC_FEATURE_16_BIT_BUS)
+		chip->options |= NAND_BUSWIDTH_16;
+
+	/* ECC info */
+	ecc = &p->ecc_info[0];
+
+	if (ecc->codeword_size >= 9) {
+		chip->ecc_strength_ds = ecc->ecc_bits;
+		chip->ecc_step_ds = 1 << ecc->codeword_size;
+	} else {
+		pr_warn("Invalid codeword size\n");
+	}
+
+free_jedec_param_page:
+	kfree(p);
+	return ret;
+}
diff --git a/include/linux/mtd/jedec.h b/include/linux/mtd/jedec.h
new file mode 100644
index 000000000000..0b6b59f7cfbd
--- /dev/null
+++ b/include/linux/mtd/jedec.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright © 2000-2010 David Woodhouse <dwmw2@infradead.org>
+ *			 Steven J. Hill <sjhill@realitydiluted.com>
+ *			 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * Contains all JEDEC related definitions
+ */
+
+#ifndef __LINUX_MTD_JEDEC_H
+#define __LINUX_MTD_JEDEC_H
+
+struct jedec_ecc_info {
+	u8 ecc_bits;
+	u8 codeword_size;
+	__le16 bb_per_lun;
+	__le16 block_endurance;
+	u8 reserved[2];
+} __packed;
+
+/* JEDEC features */
+#define JEDEC_FEATURE_16_BIT_BUS	(1 << 0)
+
+struct nand_jedec_params {
+	/* rev info and features block */
+	/* 'J' 'E' 'S' 'D'  */
+	u8 sig[4];
+	__le16 revision;
+	__le16 features;
+	u8 opt_cmd[3];
+	__le16 sec_cmd;
+	u8 num_of_param_pages;
+	u8 reserved0[18];
+
+	/* manufacturer information block */
+	char manufacturer[12];
+	char model[20];
+	u8 jedec_id[6];
+	u8 reserved1[10];
+
+	/* memory organization block */
+	__le32 byte_per_page;
+	__le16 spare_bytes_per_page;
+	u8 reserved2[6];
+	__le32 pages_per_block;
+	__le32 blocks_per_lun;
+	u8 lun_count;
+	u8 addr_cycles;
+	u8 bits_per_cell;
+	u8 programs_per_page;
+	u8 multi_plane_addr;
+	u8 multi_plane_op_attr;
+	u8 reserved3[38];
+
+	/* electrical parameter block */
+	__le16 async_sdr_speed_grade;
+	__le16 toggle_ddr_speed_grade;
+	__le16 sync_ddr_speed_grade;
+	u8 async_sdr_features;
+	u8 toggle_ddr_features;
+	u8 sync_ddr_features;
+	__le16 t_prog;
+	__le16 t_bers;
+	__le16 t_r;
+	__le16 t_r_multi_plane;
+	__le16 t_ccs;
+	__le16 io_pin_capacitance_typ;
+	__le16 input_pin_capacitance_typ;
+	__le16 clk_pin_capacitance_typ;
+	u8 driver_strength_support;
+	__le16 t_adl;
+	u8 reserved4[36];
+
+	/* ECC and endurance block */
+	u8 guaranteed_good_blocks;
+	__le16 guaranteed_block_endurance;
+	struct jedec_ecc_info ecc_info[4];
+	u8 reserved5[29];
+
+	/* reserved */
+	u8 reserved6[148];
+
+	/* vendor */
+	__le16 vendor_rev_num;
+	u8 reserved7[88];
+
+	/* CRC for Parameter Page */
+	__le16 crc;
+} __packed;
+
+#endif /* __LINUX_MTD_JEDEC_H */
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index f4fc0cce7c55..68d09e01fa56 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -21,6 +21,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/flashchip.h>
 #include <linux/mtd/bbm.h>
+#include <linux/mtd/jedec.h>
 #include <linux/mtd/onfi.h>
 #include <linux/of.h>
 #include <linux/types.h>
@@ -205,84 +206,6 @@ enum nand_ecc_algo {
 #define NAND_CI_CELLTYPE_MSK	0x0C
 #define NAND_CI_CELLTYPE_SHIFT	2
 
-struct jedec_ecc_info {
-	u8 ecc_bits;
-	u8 codeword_size;
-	__le16 bb_per_lun;
-	__le16 block_endurance;
-	u8 reserved[2];
-} __packed;
-
-/* JEDEC features */
-#define JEDEC_FEATURE_16_BIT_BUS	(1 << 0)
-
-struct nand_jedec_params {
-	/* rev info and features block */
-	/* 'J' 'E' 'S' 'D'  */
-	u8 sig[4];
-	__le16 revision;
-	__le16 features;
-	u8 opt_cmd[3];
-	__le16 sec_cmd;
-	u8 num_of_param_pages;
-	u8 reserved0[18];
-
-	/* manufacturer information block */
-	char manufacturer[12];
-	char model[20];
-	u8 jedec_id[6];
-	u8 reserved1[10];
-
-	/* memory organization block */
-	__le32 byte_per_page;
-	__le16 spare_bytes_per_page;
-	u8 reserved2[6];
-	__le32 pages_per_block;
-	__le32 blocks_per_lun;
-	u8 lun_count;
-	u8 addr_cycles;
-	u8 bits_per_cell;
-	u8 programs_per_page;
-	u8 multi_plane_addr;
-	u8 multi_plane_op_attr;
-	u8 reserved3[38];
-
-	/* electrical parameter block */
-	__le16 async_sdr_speed_grade;
-	__le16 toggle_ddr_speed_grade;
-	__le16 sync_ddr_speed_grade;
-	u8 async_sdr_features;
-	u8 toggle_ddr_features;
-	u8 sync_ddr_features;
-	__le16 t_prog;
-	__le16 t_bers;
-	__le16 t_r;
-	__le16 t_r_multi_plane;
-	__le16 t_ccs;
-	__le16 io_pin_capacitance_typ;
-	__le16 input_pin_capacitance_typ;
-	__le16 clk_pin_capacitance_typ;
-	u8 driver_strength_support;
-	__le16 t_adl;
-	u8 reserved4[36];
-
-	/* ECC and endurance block */
-	u8 guaranteed_good_blocks;
-	__le16 guaranteed_block_endurance;
-	struct jedec_ecc_info ecc_info[4];
-	u8 reserved5[29];
-
-	/* reserved */
-	u8 reserved6[148];
-
-	/* vendor */
-	__le16 vendor_rev_num;
-	u8 reserved7[88];
-
-	/* CRC for Parameter Page */
-	__le16 crc;
-} __packed;
-
 /**
  * struct nand_parameters - NAND generic parameters from the parameter page
  * @model: Model name
-- 
cgit v1.2.3


From 309600c14e36d0e78c22fe3df58317965b90f4d1 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Tue, 4 Sep 2018 16:23:28 +0200
Subject: mtd: rawnand: Allow selection of ECC byte ordering at runtime

Currently, the selection of ECC byte ordering for software hamming is
done at compilation time, which doesn't make sense when ECC byte
calculation is done in hardware and byte ordering is forced by the
hardware engine.
In this case, only the correction is done in software and we want to
force the byte-ordering no matter the value of CONFIG_MTD_NAND_ECC_SMC.

This is typically the case for the FSMC (Smart Media ordering), TMIO and
TXX9NDFMC (regular byte ordering) blocks.

For all other use cases (pure software implementation, SM FTL and
nandecctest), we keep selecting the byte ordering based on the
CONFIG_MTD_NAND_ECC_SMC value. It might not be ideal for SM FTL (I'd
expect Smart Media ordering to be employed by the Smart Media FTL), but
this option doesn't seem to be enabled in the existing _defconfig, so
I can't tell setting sm_order to true is the right choice.

Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/mtd/nand/raw/fsmc_nand.c    |  1 +
 drivers/mtd/nand/raw/nand_base.c    |  4 ++
 drivers/mtd/nand/raw/nand_ecc.c     | 89 ++++++++++++++++---------------------
 drivers/mtd/nand/raw/tmio_nand.c    |  5 ++-
 drivers/mtd/nand/raw/txx9ndfmc.c    |  3 +-
 drivers/mtd/sm_ftl.c                | 20 ++++++---
 drivers/mtd/tests/mtd_nandecctest.c | 21 ++++++---
 include/linux/mtd/nand_ecc.h        |  4 +-
 include/linux/mtd/rawnand.h         |  6 +++
 9 files changed, 84 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c
index f9874fc72f30..70ac8d875218 100644
--- a/drivers/mtd/nand/raw/fsmc_nand.c
+++ b/drivers/mtd/nand/raw/fsmc_nand.c
@@ -949,6 +949,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand)
 		nand->ecc.correct = nand_correct_data;
 		nand->ecc.bytes = 3;
 		nand->ecc.strength = 1;
+		nand->ecc.options |= NAND_ECC_SOFT_HAMMING_SM_ORDER;
 		break;
 
 	case NAND_ECC_SOFT:
diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c
index dc3955da0426..05bd0779fe9b 100644
--- a/drivers/mtd/nand/raw/nand_base.c
+++ b/drivers/mtd/nand/raw/nand_base.c
@@ -5045,6 +5045,10 @@ static int nand_set_ecc_soft_ops(struct mtd_info *mtd)
 			ecc->size = 256;
 		ecc->bytes = 3;
 		ecc->strength = 1;
+
+		if (IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC))
+			ecc->options |= NAND_ECC_SOFT_HAMMING_SM_ORDER;
+
 		return 0;
 	case NAND_ECC_BCH:
 		if (!mtd_nand_has_bch()) {
diff --git a/drivers/mtd/nand/raw/nand_ecc.c b/drivers/mtd/nand/raw/nand_ecc.c
index 93df8e73f577..4f4347533058 100644
--- a/drivers/mtd/nand/raw/nand_ecc.c
+++ b/drivers/mtd/nand/raw/nand_ecc.c
@@ -132,9 +132,10 @@ static const char addressbits[256] = {
  * @buf:	input buffer with raw data
  * @eccsize:	data bytes per ECC step (256 or 512)
  * @code:	output buffer with ECC
+ * @sm_order:	Smart Media byte ordering
  */
 void __nand_calculate_ecc(const unsigned char *buf, unsigned int eccsize,
-		       unsigned char *code)
+			  unsigned char *code, bool sm_order)
 {
 	int i;
 	const uint32_t *bp = (uint32_t *)buf;
@@ -330,45 +331,26 @@ void __nand_calculate_ecc(const unsigned char *buf, unsigned int eccsize,
 	 * possible, but benchmarks showed that on the system this is developed
 	 * the code below is the fastest
 	 */
-#ifdef CONFIG_MTD_NAND_ECC_SMC
-	code[0] =
-	    (invparity[rp7] << 7) |
-	    (invparity[rp6] << 6) |
-	    (invparity[rp5] << 5) |
-	    (invparity[rp4] << 4) |
-	    (invparity[rp3] << 3) |
-	    (invparity[rp2] << 2) |
-	    (invparity[rp1] << 1) |
-	    (invparity[rp0]);
-	code[1] =
-	    (invparity[rp15] << 7) |
-	    (invparity[rp14] << 6) |
-	    (invparity[rp13] << 5) |
-	    (invparity[rp12] << 4) |
-	    (invparity[rp11] << 3) |
-	    (invparity[rp10] << 2) |
-	    (invparity[rp9] << 1)  |
-	    (invparity[rp8]);
-#else
-	code[1] =
-	    (invparity[rp7] << 7) |
-	    (invparity[rp6] << 6) |
-	    (invparity[rp5] << 5) |
-	    (invparity[rp4] << 4) |
-	    (invparity[rp3] << 3) |
-	    (invparity[rp2] << 2) |
-	    (invparity[rp1] << 1) |
-	    (invparity[rp0]);
-	code[0] =
-	    (invparity[rp15] << 7) |
-	    (invparity[rp14] << 6) |
-	    (invparity[rp13] << 5) |
-	    (invparity[rp12] << 4) |
-	    (invparity[rp11] << 3) |
-	    (invparity[rp10] << 2) |
-	    (invparity[rp9] << 1)  |
-	    (invparity[rp8]);
-#endif
+	if (sm_order) {
+		code[0] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
+			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
+			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
+			  (invparity[rp1] << 1) | (invparity[rp0]);
+		code[1] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
+			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
+			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
+			  (invparity[rp9] << 1) | (invparity[rp8]);
+	} else {
+		code[1] = (invparity[rp7] << 7) | (invparity[rp6] << 6) |
+			  (invparity[rp5] << 5) | (invparity[rp4] << 4) |
+			  (invparity[rp3] << 3) | (invparity[rp2] << 2) |
+			  (invparity[rp1] << 1) | (invparity[rp0]);
+		code[0] = (invparity[rp15] << 7) | (invparity[rp14] << 6) |
+			  (invparity[rp13] << 5) | (invparity[rp12] << 4) |
+			  (invparity[rp11] << 3) | (invparity[rp10] << 2) |
+			  (invparity[rp9] << 1) | (invparity[rp8]);
+	}
+
 	if (eccsize_mult == 1)
 		code[2] =
 		    (invparity[par & 0xf0] << 7) |
@@ -401,7 +383,9 @@ EXPORT_SYMBOL(__nand_calculate_ecc);
 int nand_calculate_ecc(struct nand_chip *chip, const unsigned char *buf,
 		       unsigned char *code)
 {
-	__nand_calculate_ecc(buf, chip->ecc.size, code);
+	bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
+
+	__nand_calculate_ecc(buf, chip->ecc.size, code, sm_order);
 
 	return 0;
 }
@@ -413,12 +397,13 @@ EXPORT_SYMBOL(nand_calculate_ecc);
  * @read_ecc:	ECC from the chip
  * @calc_ecc:	the ECC calculated from raw data
  * @eccsize:	data bytes per ECC step (256 or 512)
+ * @sm_order:	Smart Media byte order
  *
  * Detect and correct a 1 bit error for eccsize byte block
  */
 int __nand_correct_data(unsigned char *buf,
 			unsigned char *read_ecc, unsigned char *calc_ecc,
-			unsigned int eccsize)
+			unsigned int eccsize, bool sm_order)
 {
 	unsigned char b0, b1, b2, bit_addr;
 	unsigned int byte_addr;
@@ -430,13 +415,14 @@ int __nand_correct_data(unsigned char *buf,
 	 * we might need the xor result  more than once,
 	 * so keep them in a local var
 	*/
-#ifdef CONFIG_MTD_NAND_ECC_SMC
-	b0 = read_ecc[0] ^ calc_ecc[0];
-	b1 = read_ecc[1] ^ calc_ecc[1];
-#else
-	b0 = read_ecc[1] ^ calc_ecc[1];
-	b1 = read_ecc[0] ^ calc_ecc[0];
-#endif
+	if (sm_order) {
+		b0 = read_ecc[0] ^ calc_ecc[0];
+		b1 = read_ecc[1] ^ calc_ecc[1];
+	} else {
+		b0 = read_ecc[1] ^ calc_ecc[1];
+		b1 = read_ecc[0] ^ calc_ecc[0];
+	}
+
 	b2 = read_ecc[2] ^ calc_ecc[2];
 
 	/* check if there are any bitfaults */
@@ -500,7 +486,10 @@ EXPORT_SYMBOL(__nand_correct_data);
 int nand_correct_data(struct nand_chip *chip, unsigned char *buf,
 		      unsigned char *read_ecc, unsigned char *calc_ecc)
 {
-	return __nand_correct_data(buf, read_ecc, calc_ecc, chip->ecc.size);
+	bool sm_order = chip->ecc.options & NAND_ECC_SOFT_HAMMING_SM_ORDER;
+
+	return __nand_correct_data(buf, read_ecc, calc_ecc, chip->ecc.size,
+				   sm_order);
 }
 EXPORT_SYMBOL(nand_correct_data);
 
diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c
index 3297621241d2..f3b59e649b7d 100644
--- a/drivers/mtd/nand/raw/tmio_nand.c
+++ b/drivers/mtd/nand/raw/tmio_nand.c
@@ -295,10 +295,11 @@ static int tmio_nand_correct_data(struct nand_chip *chip, unsigned char *buf,
 	int r0, r1;
 
 	/* assume ecc.size = 512 and ecc.bytes = 6 */
-	r0 = __nand_correct_data(buf, read_ecc, calc_ecc, 256);
+	r0 = __nand_correct_data(buf, read_ecc, calc_ecc, 256, false);
 	if (r0 < 0)
 		return r0;
-	r1 = __nand_correct_data(buf + 256, read_ecc + 3, calc_ecc + 3, 256);
+	r1 = __nand_correct_data(buf + 256, read_ecc + 3, calc_ecc + 3, 256,
+				 false);
 	if (r1 < 0)
 		return r1;
 	return r0 + r1;
diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c
index 3a99c8e3f944..ddf0420c0997 100644
--- a/drivers/mtd/nand/raw/txx9ndfmc.c
+++ b/drivers/mtd/nand/raw/txx9ndfmc.c
@@ -198,7 +198,8 @@ static int txx9ndfmc_correct_data(struct nand_chip *chip, unsigned char *buf,
 	int stat;
 
 	for (eccsize = chip->ecc.size; eccsize > 0; eccsize -= 256) {
-		stat = __nand_correct_data(buf, read_ecc, calc_ecc, 256);
+		stat = __nand_correct_data(buf, read_ecc, calc_ecc, 256,
+					   false);
 		if (stat < 0)
 			return stat;
 		corrected += stat;
diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c
index f3bd86e13603..89227b1d036a 100644
--- a/drivers/mtd/sm_ftl.c
+++ b/drivers/mtd/sm_ftl.c
@@ -221,14 +221,18 @@ static int sm_correct_sector(uint8_t *buffer, struct sm_oob *oob)
 {
 	uint8_t ecc[3];
 
-	__nand_calculate_ecc(buffer, SM_SMALL_PAGE, ecc);
-	if (__nand_correct_data(buffer, ecc, oob->ecc1, SM_SMALL_PAGE) < 0)
+	__nand_calculate_ecc(buffer, SM_SMALL_PAGE, ecc,
+			     IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
+	if (__nand_correct_data(buffer, ecc, oob->ecc1, SM_SMALL_PAGE,
+				IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC)) < 0)
 		return -EIO;
 
 	buffer += SM_SMALL_PAGE;
 
-	__nand_calculate_ecc(buffer, SM_SMALL_PAGE, ecc);
-	if (__nand_correct_data(buffer, ecc, oob->ecc2, SM_SMALL_PAGE) < 0)
+	__nand_calculate_ecc(buffer, SM_SMALL_PAGE, ecc,
+			     IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
+	if (__nand_correct_data(buffer, ecc, oob->ecc2, SM_SMALL_PAGE,
+				IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC)) < 0)
 		return -EIO;
 	return 0;
 }
@@ -393,11 +397,13 @@ restart:
 		}
 
 		if (ftl->smallpagenand) {
-			__nand_calculate_ecc(buf + boffset,
-						SM_SMALL_PAGE, oob.ecc1);
+			__nand_calculate_ecc(buf + boffset, SM_SMALL_PAGE,
+					oob.ecc1,
+					IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
 
 			__nand_calculate_ecc(buf + boffset + SM_SMALL_PAGE,
-						SM_SMALL_PAGE, oob.ecc2);
+					SM_SMALL_PAGE, oob.ecc2,
+					IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
 		}
 		if (!sm_write_sector(ftl, zone, block, boffset,
 							buf + boffset, &oob))
diff --git a/drivers/mtd/tests/mtd_nandecctest.c b/drivers/mtd/tests/mtd_nandecctest.c
index 88b6c81cebbe..c71523e94580 100644
--- a/drivers/mtd/tests/mtd_nandecctest.c
+++ b/drivers/mtd/tests/mtd_nandecctest.c
@@ -121,8 +121,10 @@ static int no_bit_error_verify(void *error_data, void *error_ecc,
 	unsigned char calc_ecc[3];
 	int ret;
 
-	__nand_calculate_ecc(error_data, size, calc_ecc);
-	ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size);
+	__nand_calculate_ecc(error_data, size, calc_ecc,
+			     IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
+	ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size,
+				  IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
 	if (ret == 0 && !memcmp(correct_data, error_data, size))
 		return 0;
 
@@ -149,8 +151,10 @@ static int single_bit_error_correct(void *error_data, void *error_ecc,
 	unsigned char calc_ecc[3];
 	int ret;
 
-	__nand_calculate_ecc(error_data, size, calc_ecc);
-	ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size);
+	__nand_calculate_ecc(error_data, size, calc_ecc,
+			     IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
+	ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size,
+				  IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
 	if (ret == 1 && !memcmp(correct_data, error_data, size))
 		return 0;
 
@@ -184,8 +188,10 @@ static int double_bit_error_detect(void *error_data, void *error_ecc,
 	unsigned char calc_ecc[3];
 	int ret;
 
-	__nand_calculate_ecc(error_data, size, calc_ecc);
-	ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size);
+	__nand_calculate_ecc(error_data, size, calc_ecc,
+			     IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
+	ret = __nand_correct_data(error_data, error_ecc, calc_ecc, size,
+				  IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
 
 	return (ret == -EBADMSG) ? 0 : -EINVAL;
 }
@@ -259,7 +265,8 @@ static int nand_ecc_test_run(const size_t size)
 	}
 
 	prandom_bytes(correct_data, size);
-	__nand_calculate_ecc(correct_data, size, correct_ecc);
+	__nand_calculate_ecc(correct_data, size, correct_ecc,
+			     IS_ENABLED(CONFIG_MTD_NAND_ECC_SMC));
 
 	for (i = 0; i < ARRAY_SIZE(nand_ecc_test); i++) {
 		nand_ecc_test[i].prepare(error_data, error_ecc,
diff --git a/include/linux/mtd/nand_ecc.h b/include/linux/mtd/nand_ecc.h
index b81fecd5e719..0b3bb156c344 100644
--- a/include/linux/mtd/nand_ecc.h
+++ b/include/linux/mtd/nand_ecc.h
@@ -19,7 +19,7 @@ struct nand_chip;
  * Calculate 3 byte ECC code for eccsize byte block
  */
 void __nand_calculate_ecc(const u_char *dat, unsigned int eccsize,
-				u_char *ecc_code);
+			  u_char *ecc_code, bool sm_order);
 
 /*
  * Calculate 3 byte ECC code for 256/512 byte block
@@ -31,7 +31,7 @@ int nand_calculate_ecc(struct nand_chip *chip, const u_char *dat,
  * Detect and correct a 1 bit error for eccsize byte block
  */
 int __nand_correct_data(u_char *dat, u_char *read_ecc, u_char *calc_ecc,
-			unsigned int eccsize);
+			unsigned int eccsize, bool sm_order);
 
 /*
  * Detect and correct a 1 bit error for 256/512 byte block
diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h
index 68d09e01fa56..e10b126e148f 100644
--- a/include/linux/mtd/rawnand.h
+++ b/include/linux/mtd/rawnand.h
@@ -121,6 +121,12 @@ enum nand_ecc_algo {
 #define NAND_ECC_GENERIC_ERASED_CHECK	BIT(0)
 #define NAND_ECC_MAXIMIZE		BIT(1)
 
+/*
+ * When using software implementation of Hamming, we can specify which byte
+ * ordering should be used.
+ */
+#define NAND_ECC_SOFT_HAMMING_SM_ORDER	BIT(2)
+
 /*
  * Option constants for bizarre disfunctionality and real
  * features.
-- 
cgit v1.2.3


From 2130b789b3ef6a518b9c9c6f245642620e2b0c0c Mon Sep 17 00:00:00 2001
From: Julien Thierry <julien.thierry@arm.com>
Date: Tue, 28 Aug 2018 16:51:18 +0100
Subject: irqchip/gic: Unify GIC priority definitions

LPIs use the same priority value as other GIC interrupts.

Make the GIC default priority definition visible to ITS implementation
and use this same definition for LPI priorities.

Tested-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Julien Thierry <julien.thierry@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 drivers/irqchip/irq-gic-v3-its.c       | 2 +-
 include/linux/irqchip/arm-gic-common.h | 6 ++++++
 include/linux/irqchip/arm-gic.h        | 5 -----
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 4251d2d8e6e7..db20e992a40f 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -68,7 +68,7 @@ static u32 lpi_id_bits;
 #define LPI_PROPBASE_SZ		ALIGN(BIT(LPI_NRBITS), SZ_64K)
 #define LPI_PENDBASE_SZ		ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K)
 
-#define LPI_PROP_DEFAULT_PRIO	0xa0
+#define LPI_PROP_DEFAULT_PRIO	GICD_INT_DEF_PRI
 
 /*
  * Collection structure - just an ID, and a redistributor address to
diff --git a/include/linux/irqchip/arm-gic-common.h b/include/linux/irqchip/arm-gic-common.h
index 0a83b4379f34..9a1a479a2bf4 100644
--- a/include/linux/irqchip/arm-gic-common.h
+++ b/include/linux/irqchip/arm-gic-common.h
@@ -13,6 +13,12 @@
 #include <linux/types.h>
 #include <linux/ioport.h>
 
+#define GICD_INT_DEF_PRI		0xa0
+#define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
+					(GICD_INT_DEF_PRI << 16) |\
+					(GICD_INT_DEF_PRI << 8) |\
+					GICD_INT_DEF_PRI)
+
 enum gic_type {
 	GIC_V2,
 	GIC_V3,
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 6c4aaf04046c..626179077bb0 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -65,11 +65,6 @@
 #define GICD_INT_EN_CLR_X32		0xffffffff
 #define GICD_INT_EN_SET_SGI		0x0000ffff
 #define GICD_INT_EN_CLR_PPI		0xffff0000
-#define GICD_INT_DEF_PRI		0xa0
-#define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
-					(GICD_INT_DEF_PRI << 16) |\
-					(GICD_INT_DEF_PRI << 8) |\
-					GICD_INT_DEF_PRI)
 
 #define GICD_IIDR_IMPLEMENTER_SHIFT	0
 #define GICD_IIDR_IMPLEMENTER_MASK	(0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
-- 
cgit v1.2.3


From 47b00dcf141172c4c1c583701ec91923672cec39 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 10 Aug 2018 11:29:09 +0300
Subject: clk: ti: clkctrl: support multiple clkctrl nodes under a cm node

Currently, only one clkctrl node can be added under a specific CM node
due to limitation with the implementation. Modify the code to pick-up
clockdomain name from the clkctrl node instead of CM node if provided.
Also, add a new flag to the TI clock driver so that both modes can
be supported simultaneously.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Tested-by: Tony Lindgren <tony@atomide.com>
---
 drivers/clk/ti/clk.c     |  7 ++++--
 drivers/clk/ti/clkctrl.c | 61 +++++++++++++++++++++++++++++++++++-------------
 drivers/clk/ti/clock.h   |  2 ++
 include/linux/clk/ti.h   |  1 +
 4 files changed, 53 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 27e0979b3158..8b89be18e39e 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -34,7 +34,7 @@
 struct ti_clk_ll_ops *ti_clk_ll_ops;
 static struct device_node *clocks_node_ptr[CLK_MAX_MEMMAPS];
 
-static struct ti_clk_features ti_clk_features;
+struct ti_clk_features ti_clk_features;
 
 struct clk_iomap {
 	struct regmap *regmap;
@@ -140,6 +140,9 @@ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[])
 	int ret;
 	static bool clkctrl_nodes_missing;
 	static bool has_clkctrl_data;
+	static bool compat_mode;
+
+	compat_mode = ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT;
 
 	for (c = oclks; c->node_name != NULL; c++) {
 		strcpy(buf, c->node_name);
@@ -164,7 +167,7 @@ void __init ti_dt_clocks_register(struct ti_dt_clk oclks[])
 			continue;
 
 		node = of_find_node_by_name(NULL, buf);
-		if (num_args) {
+		if (num_args && compat_mode) {
 			parent = node;
 			node = of_get_child_by_name(parent, "clk");
 			of_node_put(parent);
diff --git a/drivers/clk/ti/clkctrl.c b/drivers/clk/ti/clkctrl.c
index 421b05392220..9bff57f0345d 100644
--- a/drivers/clk/ti/clkctrl.c
+++ b/drivers/clk/ti/clkctrl.c
@@ -259,8 +259,13 @@ _ti_clkctrl_clk_register(struct omap_clkctrl_provider *provider,
 	struct omap_clkctrl_clk *clkctrl_clk;
 	int ret = 0;
 
-	init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d", node->parent->name,
-			      node->name, offset, bit);
+	if (ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT)
+		init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d",
+				      node->parent->name, node->name, offset,
+				      bit);
+	else
+		init.name = kasprintf(GFP_KERNEL, "%s:%04x:%d", node->name,
+				      offset, bit);
 	clkctrl_clk = kzalloc(sizeof(*clkctrl_clk), GFP_KERNEL);
 	if (!init.name || !clkctrl_clk) {
 		ret = -ENOMEM;
@@ -441,6 +446,10 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 	u32 addr;
 	int ret;
 
+	if (!(ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT) &&
+	    !strcmp(node->name, "clk"))
+		ti_clk_features.flags |= TI_CLK_CLKCTRL_COMPAT;
+
 	addrp = of_get_address(node, 0, NULL, NULL);
 	addr = (u32)of_translate_address(node, addrp);
 
@@ -492,19 +501,35 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 
 	provider->base = of_iomap(node, 0);
 
-	provider->clkdm_name = kmalloc(strlen(node->parent->name) + 3,
-				       GFP_KERNEL);
-	if (!provider->clkdm_name) {
-		kfree(provider);
-		return;
+	if (ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT) {
+		provider->clkdm_name = kmalloc(strlen(node->parent->name) + 3,
+					       GFP_KERNEL);
+		if (!provider->clkdm_name) {
+			kfree(provider);
+			return;
+		}
+
+		/*
+		 * Create default clkdm name, replace _cm from end of parent
+		 * node name with _clkdm
+		 */
+		strcpy(provider->clkdm_name, node->parent->name);
+		provider->clkdm_name[strlen(provider->clkdm_name) - 2] = 0;
+	} else {
+		provider->clkdm_name = kmalloc(strlen(node->name), GFP_KERNEL);
+		if (!provider->clkdm_name) {
+			kfree(provider);
+			return;
+		}
+
+		/*
+		 * Create default clkdm name, replace _clkctrl from end of
+		 * node name with _clkdm
+		 */
+		strcpy(provider->clkdm_name, node->name);
+		provider->clkdm_name[strlen(provider->clkdm_name) - 7] = 0;
 	}
 
-	/*
-	 * Create default clkdm name, replace _cm from end of parent node
-	 * name with _clkdm
-	 */
-	strcpy(provider->clkdm_name, node->parent->name);
-	provider->clkdm_name[strlen(provider->clkdm_name) - 2] = 0;
 	strcat(provider->clkdm_name, "clkdm");
 
 	INIT_LIST_HEAD(&provider->clocks);
@@ -539,9 +564,13 @@ static void __init _ti_omap4_clkctrl_setup(struct device_node *node)
 		init.flags = 0;
 		if (reg_data->flags & CLKF_SET_RATE_PARENT)
 			init.flags |= CLK_SET_RATE_PARENT;
-		init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d",
-				      node->parent->name, node->name,
-				      reg_data->offset, 0);
+		if (ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT)
+			init.name = kasprintf(GFP_KERNEL, "%s:%s:%04x:%d",
+					      node->parent->name, node->name,
+					      reg_data->offset, 0);
+		else
+			init.name = kasprintf(GFP_KERNEL, "%s:%04x:%d",
+					      node->name, reg_data->offset, 0);
 		clkctrl_clk = kzalloc(sizeof(*clkctrl_clk), GFP_KERNEL);
 		if (!init.name || !clkctrl_clk)
 			goto cleanup;
diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index b58278077226..ce4aad6c4c7c 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -233,6 +233,8 @@ extern const struct clk_ops ti_clk_divider_ops;
 extern const struct clk_ops ti_clk_mux_ops;
 extern const struct clk_ops omap_gate_clk_ops;
 
+extern struct ti_clk_features ti_clk_features;
+
 void omap2_init_clk_clkdm(struct clk_hw *hw);
 int omap2_clkops_enable_clkdm(struct clk_hw *hw);
 void omap2_clkops_disable_clkdm(struct clk_hw *hw);
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index a8faa38b1ed6..3301bd025a2c 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -290,6 +290,7 @@ struct ti_clk_features {
 #define TI_CLK_DPLL4_DENY_REPROGRAM		BIT(1)
 #define TI_CLK_DISABLE_CLKDM_CONTROL		BIT(2)
 #define TI_CLK_ERRATA_I810			BIT(3)
+#define TI_CLK_CLKCTRL_COMPAT			BIT(4)
 
 void ti_clk_setup_features(struct ti_clk_features *features);
 const struct ti_clk_features *ti_clk_get_features(void);
-- 
cgit v1.2.3


From 8b95d1ce3300c411728954473316bd04d0ba9883 Mon Sep 17 00:00:00 2001
From: Russ Dill <Russ.Dill@ti.com>
Date: Tue, 4 Sep 2018 12:19:35 +0530
Subject: clk: Add functions to save/restore clock context en-masse

Deep enough power saving mode can result into losing context of the clock
registers also, and they need to be restored once coming back from the power
saving mode. Hence add functions to save/restore clock context.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Russ Dill <Russ.Dill@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/clk.c            | 74 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk-provider.h |  7 +++++
 include/linux/clk.h          | 25 +++++++++++++++
 3 files changed, 106 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index d31055ae6ec6..8a0254a1c303 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -923,6 +923,80 @@ static int clk_core_enable_lock(struct clk_core *core)
 	return ret;
 }
 
+static int _clk_save_context(struct clk_core *clk)
+{
+	struct clk_core *child;
+	int ret = 0;
+
+	hlist_for_each_entry(child, &clk->children, child_node) {
+		ret = _clk_save_context(child);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (clk->ops && clk->ops->save_context)
+		ret = clk->ops->save_context(clk->hw);
+
+	return ret;
+}
+
+static void _clk_restore_context(struct clk_core *clk)
+{
+	struct clk_core *child;
+
+	if (clk->ops && clk->ops->restore_context)
+		clk->ops->restore_context(clk->hw);
+
+	hlist_for_each_entry(child, &clk->children, child_node)
+		_clk_restore_context(child);
+}
+
+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code.  Returns 0 on success.
+ */
+int clk_save_context(void)
+{
+	struct clk_core *clk;
+	int ret;
+
+	hlist_for_each_entry(clk, &clk_root_list, child_node) {
+		ret = _clk_save_context(clk);
+		if (ret < 0)
+			return ret;
+	}
+
+	hlist_for_each_entry(clk, &clk_orphan_list, child_node) {
+		ret = _clk_save_context(clk);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(clk_save_context);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * Restore the saved clock context upon resume.
+ *
+ */
+void clk_restore_context(void)
+{
+	struct clk_core *clk;
+
+	hlist_for_each_entry(clk, &clk_root_list, child_node)
+		_clk_restore_context(clk);
+
+	hlist_for_each_entry(clk, &clk_orphan_list, child_node)
+		_clk_restore_context(clk);
+}
+EXPORT_SYMBOL_GPL(clk_restore_context);
+
 /**
  * clk_enable - ungate a clock
  * @clk: the clk being ungated
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 08b1aa70a38d..df7379da6269 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -119,6 +119,11 @@ struct clk_duty {
  *		Called with enable_lock held.  This function must not
  *		sleep.
  *
+ * @save_context: Save the context of the clock in prepration for poweroff.
+ *
+ * @restore_context: Restore the context of the clock after a restoration
+ *		of power.
+ *
  * @recalc_rate	Recalculate the rate of this clock, by querying hardware. The
  *		parent rate is an input parameter.  It is up to the caller to
  *		ensure that the prepare_mutex is held across this call.
@@ -223,6 +228,8 @@ struct clk_ops {
 	void		(*disable)(struct clk_hw *hw);
 	int		(*is_enabled)(struct clk_hw *hw);
 	void		(*disable_unused)(struct clk_hw *hw);
+	int		(*save_context)(struct clk_hw *hw);
+	void		(*restore_context)(struct clk_hw *hw);
 	unsigned long	(*recalc_rate)(struct clk_hw *hw,
 					unsigned long parent_rate);
 	long		(*round_rate)(struct clk_hw *hw, unsigned long rate,
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 4f750c481b82..7da754d79f9d 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -629,6 +629,23 @@ struct clk *clk_get_parent(struct clk *clk);
  */
 struct clk *clk_get_sys(const char *dev_id, const char *con_id);
 
+/**
+ * clk_save_context - save clock context for poweroff
+ *
+ * Saves the context of the clock register for powerstates in which the
+ * contents of the registers will be lost. Occurs deep within the suspend
+ * code so locking is not necessary.
+ */
+int clk_save_context(void);
+
+/**
+ * clk_restore_context - restore clock context after poweroff
+ *
+ * This occurs with all clocks enabled. Occurs deep within the resume code
+ * so locking is not necessary.
+ */
+void clk_restore_context(void);
+
 #else /* !CONFIG_HAVE_CLK */
 
 static inline struct clk *clk_get(struct device *dev, const char *id)
@@ -728,6 +745,14 @@ static inline struct clk *clk_get_sys(const char *dev_id, const char *con_id)
 {
 	return NULL;
 }
+
+static inline int clk_save_context(void)
+{
+	return 0;
+}
+
+static inline void clk_restore_context(void) {}
+
 #endif
 
 /* clk_prepare_enable helps cases using clk_enable in non-atomic context. */
-- 
cgit v1.2.3


From 435365485f40cf12747d1daa2253a4f4b46b8148 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Tue, 4 Sep 2018 12:19:36 +0530
Subject: clk: clk: Add clk_gate_restore_context function

The clock gate restore context function enables or disables
the gate clocks based on the enable_count. This is done in cases
where the clock context is lost and based on the enable_count
the clock either needs to be enabled/disabled.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/clk.c            | 19 +++++++++++++++++++
 include/linux/clk-provider.h |  2 ++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 8a0254a1c303..dd775771a7cc 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -923,6 +923,25 @@ static int clk_core_enable_lock(struct clk_core *core)
 	return ret;
 }
 
+/**
+ * clk_gate_restore_context - restore context for poweroff
+ * @hw: the clk_hw pointer of clock whose state is to be restored
+ *
+ * The clock gate restore context function enables or disables
+ * the gate clocks based on the enable_count. This is done in cases
+ * where the clock context is lost and based on the enable_count
+ * the clock either needs to be enabled/disabled. This
+ * helps restore the state of gate clocks.
+ */
+void clk_gate_restore_context(struct clk_hw *hw)
+{
+	if (hw->clk->core->enable_count)
+		hw->clk->core->ops->enable(hw);
+	else
+		hw->clk->core->ops->disable(hw);
+}
+EXPORT_SYMBOL_GPL(clk_gate_restore_context);
+
 static int _clk_save_context(struct clk_core *clk)
 {
 	struct clk_core *child;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index df7379da6269..60c51871b04b 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -1018,5 +1018,7 @@ static inline void clk_writel(u32 val, u32 __iomem *reg)
 
 #endif	/* platform dependent I/O accessors */
 
+void clk_gate_restore_context(struct clk_hw *hw);
+
 #endif /* CONFIG_COMMON_CLK */
 #endif /* CLK_PROVIDER_H */
-- 
cgit v1.2.3


From d6e7bbc148f9fbec8a0117b0d0f420c9710e6d81 Mon Sep 17 00:00:00 2001
From: Russ Dill <Russ.Dill@ti.com>
Date: Tue, 4 Sep 2018 12:19:37 +0530
Subject: clk: ti: Add functions to save/restore clk context

SoCs like AM43XX lose clock registers context during RTC-only
suspend. Hence add functions to save/restore the clock registers
context.

Signed-off-by: Keerthy <j-keerthy@ti.com>
Signed-off-by: Russ Dill <Russ.Dill@ti.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
---
 drivers/clk/ti/clock.h    |   2 +
 drivers/clk/ti/divider.c  |  36 ++++++++++++++
 drivers/clk/ti/dpll.c     |   6 +++
 drivers/clk/ti/dpll3xxx.c | 124 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/clk/ti/gate.c     |   3 ++
 drivers/clk/ti/mux.c      |  29 +++++++++++
 include/linux/clk/ti.h    |   6 +++
 7 files changed, 206 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/ti/clock.h b/drivers/clk/ti/clock.h
index 5a781067a0e7..9f312a219510 100644
--- a/drivers/clk/ti/clock.h
+++ b/drivers/clk/ti/clock.h
@@ -24,6 +24,7 @@ struct clk_omap_divider {
 	u8			flags;
 	s8			latch;
 	const struct clk_div_table	*table;
+	u32		context;
 };
 
 #define to_clk_omap_divider(_hw) container_of(_hw, struct clk_omap_divider, hw)
@@ -36,6 +37,7 @@ struct clk_omap_mux {
 	u8			shift;
 	s8			latch;
 	u8			flags;
+	u8			saved_parent;
 };
 
 #define to_clk_omap_mux(_hw) container_of(_hw, struct clk_omap_mux, hw)
diff --git a/drivers/clk/ti/divider.c b/drivers/clk/ti/divider.c
index ccfb4d9a152a..373f620f49cb 100644
--- a/drivers/clk/ti/divider.c
+++ b/drivers/clk/ti/divider.c
@@ -268,10 +268,46 @@ static int ti_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
 	return 0;
 }
 
+/**
+ * clk_divider_save_context - Save the divider value
+ * @hw: pointer  struct clk_hw
+ *
+ * Save the divider value
+ */
+static int clk_divider_save_context(struct clk_hw *hw)
+{
+	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
+	u32 val;
+
+	val = ti_clk_ll_ops->clk_readl(&divider->reg) >> divider->shift;
+	divider->context = val & div_mask(divider);
+
+	return 0;
+}
+
+/**
+ * clk_divider_restore_context - restore the saved the divider value
+ * @hw: pointer  struct clk_hw
+ *
+ * Restore the saved the divider value
+ */
+static void clk_divider_restore_context(struct clk_hw *hw)
+{
+	struct clk_omap_divider *divider = to_clk_omap_divider(hw);
+	u32 val;
+
+	val = ti_clk_ll_ops->clk_readl(&divider->reg);
+	val &= ~(div_mask(divider) << divider->shift);
+	val |= divider->context << divider->shift;
+	ti_clk_ll_ops->clk_writel(val, &divider->reg);
+}
+
 const struct clk_ops ti_clk_divider_ops = {
 	.recalc_rate = ti_clk_divider_recalc_rate,
 	.round_rate = ti_clk_divider_round_rate,
 	.set_rate = ti_clk_divider_set_rate,
+	.save_context = clk_divider_save_context,
+	.restore_context = clk_divider_restore_context,
 };
 
 static struct clk *_register_divider(struct device *dev, const char *name,
diff --git a/drivers/clk/ti/dpll.c b/drivers/clk/ti/dpll.c
index dc86d07d0921..25d86d5ebb36 100644
--- a/drivers/clk/ti/dpll.c
+++ b/drivers/clk/ti/dpll.c
@@ -39,6 +39,8 @@ static const struct clk_ops dpll_m4xen_ck_ops = {
 	.set_rate_and_parent	= &omap3_noncore_dpll_set_rate_and_parent,
 	.determine_rate	= &omap4_dpll_regm4xen_determine_rate,
 	.get_parent	= &omap2_init_dpll_parent,
+	.save_context	= &omap3_core_dpll_save_context,
+	.restore_context = &omap3_core_dpll_restore_context,
 };
 #else
 static const struct clk_ops dpll_m4xen_ck_ops = {};
@@ -62,6 +64,8 @@ static const struct clk_ops dpll_ck_ops = {
 	.set_rate_and_parent	= &omap3_noncore_dpll_set_rate_and_parent,
 	.determine_rate	= &omap3_noncore_dpll_determine_rate,
 	.get_parent	= &omap2_init_dpll_parent,
+	.save_context	= &omap3_noncore_dpll_save_context,
+	.restore_context = &omap3_noncore_dpll_restore_context,
 };
 
 static const struct clk_ops dpll_no_gate_ck_ops = {
@@ -72,6 +76,8 @@ static const struct clk_ops dpll_no_gate_ck_ops = {
 	.set_parent	= &omap3_noncore_dpll_set_parent,
 	.set_rate_and_parent	= &omap3_noncore_dpll_set_rate_and_parent,
 	.determine_rate	= &omap3_noncore_dpll_determine_rate,
+	.save_context	= &omap3_noncore_dpll_save_context,
+	.restore_context = &omap3_noncore_dpll_restore_context
 };
 #else
 static const struct clk_ops dpll_core_ck_ops = {};
diff --git a/drivers/clk/ti/dpll3xxx.c b/drivers/clk/ti/dpll3xxx.c
index 4534de2ef455..44b6b6403753 100644
--- a/drivers/clk/ti/dpll3xxx.c
+++ b/drivers/clk/ti/dpll3xxx.c
@@ -782,6 +782,130 @@ unsigned long omap3_clkoutx2_recalc(struct clk_hw *hw,
 	return rate;
 }
 
+/**
+ * omap3_core_dpll_save_context - Save the m and n values of the divider
+ * @hw: pointer  struct clk_hw
+ *
+ * Before the dpll registers are lost save the last rounded rate m and n
+ * and the enable mask.
+ */
+int omap3_core_dpll_save_context(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *dd;
+	u32 v;
+
+	dd = clk->dpll_data;
+
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
+	clk->context = (v & dd->enable_mask) >> __ffs(dd->enable_mask);
+
+	if (clk->context == DPLL_LOCKED) {
+		v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
+		dd->last_rounded_m = (v & dd->mult_mask) >>
+						__ffs(dd->mult_mask);
+		dd->last_rounded_n = ((v & dd->div1_mask) >>
+						__ffs(dd->div1_mask)) + 1;
+	}
+
+	return 0;
+}
+
+/**
+ * omap3_core_dpll_restore_context - restore the m and n values of the divider
+ * @hw: pointer  struct clk_hw
+ *
+ * Restore the last rounded rate m and n
+ * and the enable mask.
+ */
+void omap3_core_dpll_restore_context(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	const struct dpll_data *dd;
+	u32 v;
+
+	dd = clk->dpll_data;
+
+	if (clk->context == DPLL_LOCKED) {
+		_omap3_dpll_write_clken(clk, 0x4);
+		_omap3_wait_dpll_status(clk, 0);
+
+		v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
+		v &= ~(dd->mult_mask | dd->div1_mask);
+		v |= dd->last_rounded_m << __ffs(dd->mult_mask);
+		v |= (dd->last_rounded_n - 1) << __ffs(dd->div1_mask);
+		ti_clk_ll_ops->clk_writel(v, &dd->mult_div1_reg);
+
+		_omap3_dpll_write_clken(clk, DPLL_LOCKED);
+		_omap3_wait_dpll_status(clk, 1);
+	} else {
+		_omap3_dpll_write_clken(clk, clk->context);
+	}
+}
+
+/**
+ * omap3_non_core_dpll_save_context - Save the m and n values of the divider
+ * @hw: pointer  struct clk_hw
+ *
+ * Before the dpll registers are lost save the last rounded rate m and n
+ * and the enable mask.
+ */
+int omap3_noncore_dpll_save_context(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *dd;
+	u32 v;
+
+	dd = clk->dpll_data;
+
+	v = ti_clk_ll_ops->clk_readl(&dd->control_reg);
+	clk->context = (v & dd->enable_mask) >> __ffs(dd->enable_mask);
+
+	if (clk->context == DPLL_LOCKED) {
+		v = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
+		dd->last_rounded_m = (v & dd->mult_mask) >>
+						__ffs(dd->mult_mask);
+		dd->last_rounded_n = ((v & dd->div1_mask) >>
+						__ffs(dd->div1_mask)) + 1;
+	}
+
+	return 0;
+}
+
+/**
+ * omap3_core_dpll_restore_context - restore the m and n values of the divider
+ * @hw: pointer  struct clk_hw
+ *
+ * Restore the last rounded rate m and n
+ * and the enable mask.
+ */
+void omap3_noncore_dpll_restore_context(struct clk_hw *hw)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	const struct dpll_data *dd;
+	u32 ctrl, mult_div1;
+
+	dd = clk->dpll_data;
+
+	ctrl = ti_clk_ll_ops->clk_readl(&dd->control_reg);
+	mult_div1 = ti_clk_ll_ops->clk_readl(&dd->mult_div1_reg);
+
+	if (clk->context == ((ctrl & dd->enable_mask) >>
+			     __ffs(dd->enable_mask)) &&
+	    dd->last_rounded_m == ((mult_div1 & dd->mult_mask) >>
+				   __ffs(dd->mult_mask)) &&
+	    dd->last_rounded_n == ((mult_div1 & dd->div1_mask) >>
+				   __ffs(dd->div1_mask)) + 1) {
+		/* nothing to be done */
+		return;
+	}
+
+	if (clk->context == DPLL_LOCKED)
+		omap3_noncore_dpll_program(clk, 0);
+	else
+		_omap3_dpll_write_clken(clk, clk->context);
+}
+
 /* OMAP3/4 non-CORE DPLL clkops */
 const struct clk_hw_omap_ops clkhwops_omap3_dpll = {
 	.allow_idle	= omap3_dpll_allow_idle,
diff --git a/drivers/clk/ti/gate.c b/drivers/clk/ti/gate.c
index 935b2de5fb88..cf9e9f5fc007 100644
--- a/drivers/clk/ti/gate.c
+++ b/drivers/clk/ti/gate.c
@@ -33,6 +33,7 @@ static const struct clk_ops omap_gate_clkdm_clk_ops = {
 	.init		= &omap2_init_clk_clkdm,
 	.enable		= &omap2_clkops_enable_clkdm,
 	.disable	= &omap2_clkops_disable_clkdm,
+	.restore_context = clk_gate_restore_context,
 };
 
 const struct clk_ops omap_gate_clk_ops = {
@@ -40,6 +41,7 @@ const struct clk_ops omap_gate_clk_ops = {
 	.enable		= &omap2_dflt_clk_enable,
 	.disable	= &omap2_dflt_clk_disable,
 	.is_enabled	= &omap2_dflt_clk_is_enabled,
+	.restore_context = clk_gate_restore_context,
 };
 
 static const struct clk_ops omap_gate_clk_hsdiv_restore_ops = {
@@ -47,6 +49,7 @@ static const struct clk_ops omap_gate_clk_hsdiv_restore_ops = {
 	.enable		= &omap36xx_gate_clk_enable_with_hsdiv_restore,
 	.disable	= &omap2_dflt_clk_disable,
 	.is_enabled	= &omap2_dflt_clk_is_enabled,
+	.restore_context = clk_gate_restore_context,
 };
 
 /**
diff --git a/drivers/clk/ti/mux.c b/drivers/clk/ti/mux.c
index 69a4308a5a98..5749b2b4fa61 100644
--- a/drivers/clk/ti/mux.c
+++ b/drivers/clk/ti/mux.c
@@ -91,10 +91,39 @@ static int ti_clk_mux_set_parent(struct clk_hw *hw, u8 index)
 	return 0;
 }
 
+/**
+ * clk_mux_save_context - Save the parent selcted in the mux
+ * @hw: pointer  struct clk_hw
+ *
+ * Save the parent mux value.
+ */
+static int clk_mux_save_context(struct clk_hw *hw)
+{
+	struct clk_omap_mux *mux = to_clk_omap_mux(hw);
+
+	mux->saved_parent = ti_clk_mux_get_parent(hw);
+	return 0;
+}
+
+/**
+ * clk_mux_restore_context - Restore the parent in the mux
+ * @hw: pointer  struct clk_hw
+ *
+ * Restore the saved parent mux value.
+ */
+static void clk_mux_restore_context(struct clk_hw *hw)
+{
+	struct clk_omap_mux *mux = to_clk_omap_mux(hw);
+
+	ti_clk_mux_set_parent(hw, mux->saved_parent);
+}
+
 const struct clk_ops ti_clk_mux_ops = {
 	.get_parent = ti_clk_mux_get_parent,
 	.set_parent = ti_clk_mux_set_parent,
 	.determine_rate = __clk_mux_determine_rate,
+	.save_context = clk_mux_save_context,
+	.restore_context = clk_mux_restore_context,
 };
 
 static struct clk *_register_mux(struct device *dev, const char *name,
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 3301bd025a2c..eacc5df57b99 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -159,6 +159,7 @@ struct clk_hw_omap {
 	const char		*clkdm_name;
 	struct clockdomain	*clkdm;
 	const struct clk_hw_omap_ops	*ops;
+	u32			context;
 };
 
 /*
@@ -294,6 +295,11 @@ struct ti_clk_features {
 
 void ti_clk_setup_features(struct ti_clk_features *features);
 const struct ti_clk_features *ti_clk_get_features(void);
+int omap3_noncore_dpll_save_context(struct clk_hw *hw);
+void omap3_noncore_dpll_restore_context(struct clk_hw *hw);
+
+int omap3_core_dpll_save_context(struct clk_hw *hw);
+void omap3_core_dpll_restore_context(struct clk_hw *hw);
 
 extern const struct clk_hw_omap_ops clkhwops_omap2xxx_dpll;
 
-- 
cgit v1.2.3


From 9414229c9c53d3604032aa80f3d2e9ba5770cd4a Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Mon, 24 Sep 2018 06:08:07 +0200
Subject: clocksource: Remove obsolete CLOCKSOURCE_OF_DECLARE

The macro CLOCKSOURCE_OF_DECLARE was renamed more TIMER_OF_DECLARE, and we
kept an alias CLOCKSOURCE_OF_DECLARE in order to smooth the transition for
drivers.

This change was done 1.5 year ago, we can reasonably remove this backward
compatible macro as it is no longer used anywhere.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 include/linux/clocksource.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 308918928767..4c58c1e64080 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -257,9 +257,6 @@ extern int clocksource_i8253_init(void);
 #define TIMER_OF_DECLARE(name, compat, fn) \
 	OF_DECLARE_1_RET(timer, name, compat, fn)
 
-#define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \
-	TIMER_OF_DECLARE(name, compat, fn)
-
 #ifdef CONFIG_TIMER_PROBE
 extern void timer_probe(void);
 #else
-- 
cgit v1.2.3


From 16fc087b9cb22c9a97307cc24a5413d0df68fe11 Mon Sep 17 00:00:00 2001
From: Yashaswini Raghuram Prathivadi Bhayankaram
 <yashaswini.raghuram.prathivadi.bhayankaram@intel.com>
Date: Fri, 10 Aug 2018 00:17:44 -0700
Subject: virtchnl: Added support to exchange additional speed values

Introduced a new virtchnl capability flag and a struct to support exchange
of additional supported speeds.

Signed-off-by: Yashaswini Raghuram Prathivadi Bhayankaram <yashaswini.raghuram.prathivadi.bhayankaram@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index b41f7bc958ef..2c9756bd9c4c 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -252,6 +252,8 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
 #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
 #define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
 
+/* Define below the capability flags that are not offloads */
+#define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		0x00000080
 #define VF_BASE_MODE_OFFLOADS (VIRTCHNL_VF_OFFLOAD_L2 | \
 			       VIRTCHNL_VF_OFFLOAD_VLAN | \
 			       VIRTCHNL_VF_OFFLOAD_RSS_PF)
@@ -596,10 +598,23 @@ enum virtchnl_event_codes {
 struct virtchnl_pf_event {
 	enum virtchnl_event_codes event;
 	union {
+		/* If the PF driver does not support the new speed reporting
+		 * capabilities then use link_event else use link_event_adv to
+		 * get the speed and link information. The ability to understand
+		 * new speeds is indicated by setting the capability flag
+		 * VIRTCHNL_VF_CAP_ADV_LINK_SPEED in vf_cap_flags parameter
+		 * in virtchnl_vf_resource struct and can be used to determine
+		 * which link event struct to use below.
+		 */
 		struct {
 			enum virtchnl_link_speed link_speed;
 			bool link_status;
 		} link_event;
+		struct {
+			/* link_speed provided in Mbps */
+			u32 link_speed;
+			u8 link_status;
+		} link_event_adv;
 	} event_data;
 
 	int severity;
-- 
cgit v1.2.3


From 4cd2e0e70af6897ca2247fa1ffb1553ca16b4903 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 18 Apr 2018 17:30:19 -0500
Subject: signal: Introduce copy_siginfo_from_user and use it's return value

In preparation for using a smaller version of siginfo in the kernel
introduce copy_siginfo_from_user and use it when siginfo is copied from
userspace.

Make the pattern for using copy_siginfo_from_user and
copy_siginfo_from_user32 to capture the return value and return that
value on error.

This is a necessary prerequisite for using a smaller siginfo
in the kernel than the kernel exports to userspace.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal.h |  1 +
 kernel/ptrace.c        | 12 +++++-------
 kernel/signal.c        | 25 ++++++++++++++++---------
 3 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 3d4cd5db30a9..de94c159bfb0 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -22,6 +22,7 @@ static inline void clear_siginfo(struct siginfo *info)
 }
 
 int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
+int copy_siginfo_from_user(struct siginfo *to, const struct siginfo __user *from);
 
 enum siginfo_layout {
 	SIL_KILL,
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 45f77a1b9c97..a807ff5cc1a9 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -919,9 +919,8 @@ int ptrace_request(struct task_struct *child, long request,
 		break;
 
 	case PTRACE_SETSIGINFO:
-		if (copy_from_user(&siginfo, datavp, sizeof siginfo))
-			ret = -EFAULT;
-		else
+		ret = copy_siginfo_from_user(&siginfo, datavp);
+		if (!ret)
 			ret = ptrace_setsiginfo(child, &siginfo);
 		break;
 
@@ -1215,10 +1214,9 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 		break;
 
 	case PTRACE_SETSIGINFO:
-		if (copy_siginfo_from_user32(
-			    &siginfo, (struct compat_siginfo __user *) datap))
-			ret = -EFAULT;
-		else
+		ret = copy_siginfo_from_user32(
+			&siginfo, (struct compat_siginfo __user *) datap);
+		if (!ret)
 			ret = ptrace_setsiginfo(child, &siginfo);
 		break;
 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
diff --git a/kernel/signal.c b/kernel/signal.c
index debb485a76db..c0e289e62d77 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2896,6 +2896,13 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
 	return 0;
 }
 
+int copy_siginfo_from_user(siginfo_t *to, const siginfo_t __user *from)
+{
+	if (copy_from_user(to, from, sizeof(struct siginfo)))
+		return -EFAULT;
+	return 0;
+}
+
 #ifdef CONFIG_COMPAT
 int copy_siginfo_to_user32(struct compat_siginfo __user *to,
 			   const struct siginfo *from)
@@ -3323,8 +3330,9 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
 		siginfo_t __user *, uinfo)
 {
 	siginfo_t info;
-	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
-		return -EFAULT;
+	int ret = copy_siginfo_from_user(&info, uinfo);
+	if (unlikely(ret))
+		return ret;
 	return do_rt_sigqueueinfo(pid, sig, &info);
 }
 
@@ -3365,10 +3373,9 @@ SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
 		siginfo_t __user *, uinfo)
 {
 	siginfo_t info;
-
-	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
-		return -EFAULT;
-
+	int ret = copy_siginfo_from_user(&info, uinfo);
+	if (unlikely(ret))
+		return ret;
 	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
 }
 
@@ -3380,9 +3387,9 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
 			struct compat_siginfo __user *, uinfo)
 {
 	siginfo_t info;
-
-	if (copy_siginfo_from_user32(&info, uinfo))
-		return -EFAULT;
+	int ret = copy_siginfo_from_user32(&info, uinfo);
+	if (unlikely(ret))
+		return ret;
 	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
 }
 #endif
-- 
cgit v1.2.3


From ae7795bc6187a15ec51cf258abae656a625f9980 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 25 Sep 2018 11:27:20 +0200
Subject: signal: Distinguish between kernel_siginfo and siginfo

Linus recently observed that if we did not worry about the padding
member in struct siginfo it is only about 48 bytes, and 48 bytes is
much nicer than 128 bytes for allocating on the stack and copying
around in the kernel.

The obvious thing of only adding the padding when userspace is
including siginfo.h won't work as there are sigframe definitions in
the kernel that embed struct siginfo.

So split siginfo in two; kernel_siginfo and siginfo.  Keeping the
traditional name for the userspace definition.  While the version that
is used internally to the kernel and ultimately will not be padded to
128 bytes is called kernel_siginfo.

The definition of struct kernel_siginfo I have put in include/signal_types.h

A set of buildtime checks has been added to verify the two structures have
the same field offsets.

To make it easy to verify the change kernel_siginfo retains the same
size as siginfo.  The reduction in size comes in a following change.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/x86/include/asm/compat.h |   2 +-
 drivers/usb/core/devio.c      |   4 +-
 fs/binfmt_elf.c               |   6 +-
 fs/coredump.c                 |   2 +-
 fs/fcntl.c                    |   2 +-
 fs/signalfd.c                 |   6 +-
 include/linux/binfmts.h       |   2 +-
 include/linux/compat.h        |   4 +-
 include/linux/coredump.h      |   4 +-
 include/linux/lsm_hooks.h     |   4 +-
 include/linux/posix-timers.h  |   2 +-
 include/linux/ptrace.h        |   2 +-
 include/linux/sched.h         |   2 +-
 include/linux/sched/signal.h  |  18 ++---
 include/linux/security.h      |   6 +-
 include/linux/signal.h        |  15 +++--
 include/linux/signal_types.h  |  11 ++-
 include/trace/events/signal.h |   4 +-
 ipc/mqueue.c                  |   2 +-
 kernel/ptrace.c               |  10 +--
 kernel/seccomp.c              |   6 +-
 kernel/signal.c               | 151 +++++++++++++++++++++++++++---------------
 kernel/time/posix-timers.c    |   2 +-
 security/apparmor/lsm.c       |   2 +-
 security/security.c           |   2 +-
 security/selinux/hooks.c      |   2 +-
 security/smack/smack_lsm.c    |   2 +-
 27 files changed, 165 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index fb97cf7c4137..a0f46bdd9f24 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -240,6 +240,6 @@ static inline bool in_compat_syscall(void)
 
 struct compat_siginfo;
 int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-		const siginfo_t *from, bool x32_ABI);
+		const kernel_siginfo_t *from, bool x32_ABI);
 
 #endif /* _ASM_X86_COMPAT_H */
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 6ce77b33da61..c260ea8808b0 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -582,7 +582,7 @@ static void async_completed(struct urb *urb)
 {
 	struct async *as = urb->context;
 	struct usb_dev_state *ps = as->ps;
-	struct siginfo sinfo;
+	struct kernel_siginfo sinfo;
 	struct pid *pid = NULL;
 	const struct cred *cred = NULL;
 	unsigned long flags;
@@ -2599,7 +2599,7 @@ const struct file_operations usbdev_file_operations = {
 static void usbdev_remove(struct usb_device *udev)
 {
 	struct usb_dev_state *ps;
-	struct siginfo sinfo;
+	struct kernel_siginfo sinfo;
 
 	while (!list_empty(&udev->filelist)) {
 		ps = list_entry(udev->filelist.next, struct usb_dev_state, list);
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index efae2fb0930a..54207327f98f 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1580,7 +1580,7 @@ static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
 }
 
 static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
-		const siginfo_t *siginfo)
+		const kernel_siginfo_t *siginfo)
 {
 	mm_segment_t old_fs = get_fs();
 	set_fs(KERNEL_DS);
@@ -1782,7 +1782,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  struct elf_note_info *info,
-			  const siginfo_t *siginfo, struct pt_regs *regs)
+			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
 {
 	struct task_struct *dump_task = current;
 	const struct user_regset_view *view = task_user_regset_view(dump_task);
@@ -2031,7 +2031,7 @@ static int elf_note_info_init(struct elf_note_info *info)
 
 static int fill_note_info(struct elfhdr *elf, int phdrs,
 			  struct elf_note_info *info,
-			  const siginfo_t *siginfo, struct pt_regs *regs)
+			  const kernel_siginfo_t *siginfo, struct pt_regs *regs)
 {
 	struct list_head *t;
 	struct core_thread *ct;
diff --git a/fs/coredump.c b/fs/coredump.c
index 1e2c87acac9b..e42e17e55bfd 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -536,7 +536,7 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 	return err;
 }
 
-void do_coredump(const siginfo_t *siginfo)
+void do_coredump(const kernel_siginfo_t *siginfo)
 {
 	struct core_state core_state;
 	struct core_name cn;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 4137d96534a6..083185174c6d 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -735,7 +735,7 @@ static void send_sigio_to_task(struct task_struct *p,
 		return;
 
 	switch (signum) {
-		siginfo_t si;
+		kernel_siginfo_t si;
 		default:
 			/* Queue a rt signal with the appropriate fd as its
 			   value.  We use SI_SIGIO as the source, not 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 4fcd1498acf5..757afc7c5895 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -79,7 +79,7 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait)
  * Copied from copy_siginfo_to_user() in kernel/signal.c
  */
 static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
-			     siginfo_t const *kinfo)
+			     kernel_siginfo_t const *kinfo)
 {
 	struct signalfd_siginfo new;
 
@@ -163,7 +163,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
 	return sizeof(*uinfo);
 }
 
-static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
+static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info,
 				int nonblock)
 {
 	ssize_t ret;
@@ -215,7 +215,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
 	struct signalfd_siginfo __user *siginfo;
 	int nonblock = file->f_flags & O_NONBLOCK;
 	ssize_t ret, total = 0;
-	siginfo_t info;
+	kernel_siginfo_t info;
 
 	count /= sizeof(struct signalfd_siginfo);
 	if (!count)
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c05f24fac4f6..e9f5fe69df31 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -78,7 +78,7 @@ struct linux_binprm {
 
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
-	const siginfo_t *siginfo;
+	const kernel_siginfo_t *siginfo;
 	struct pt_regs *regs;
 	struct file *file;
 	unsigned long limit;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 1a3c4f37e908..4565d65b1776 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -452,8 +452,8 @@ long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size);
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
-int copy_siginfo_from_user32(siginfo_t *to, const struct compat_siginfo __user *from);
-int copy_siginfo_to_user32(struct compat_siginfo __user *to, const siginfo_t *from);
+int copy_siginfo_from_user32(kernel_siginfo_t *to, const struct compat_siginfo __user *from);
+int copy_siginfo_to_user32(struct compat_siginfo __user *to, const kernel_siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
 		const struct compat_sigevent __user *u_event);
 
diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index 207aed96a5b7..abf4b4e65dbb 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -17,9 +17,9 @@ extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
 extern int dump_align(struct coredump_params *cprm, int align);
 extern void dump_truncate(struct coredump_params *cprm);
 #ifdef CONFIG_COREDUMP
-extern void do_coredump(const siginfo_t *siginfo);
+extern void do_coredump(const kernel_siginfo_t *siginfo);
 #else
-static inline void do_coredump(const siginfo_t *siginfo) {}
+static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
 #endif
 
 #endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 97a020c616ad..bb40f6d34163 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -672,7 +672,7 @@
  *	Return 0 if permission is granted.
  * @task_kill:
  *	Check permission before sending signal @sig to @p.  @info can be NULL,
- *	the constant 1, or a pointer to a siginfo structure.  If @info is 1 or
+ *	the constant 1, or a pointer to a kernel_siginfo structure.  If @info is 1 or
  *	SI_FROMKERNEL(info) is true, then the signal should be viewed as coming
  *	from the kernel and should typically be permitted.
  *	SIGIO signals are handled separately by the send_sigiotask hook in
@@ -1606,7 +1606,7 @@ union security_list_options {
 	int (*task_setscheduler)(struct task_struct *p);
 	int (*task_getscheduler)(struct task_struct *p);
 	int (*task_movememory)(struct task_struct *p);
-	int (*task_kill)(struct task_struct *p, struct siginfo *info,
+	int (*task_kill)(struct task_struct *p, struct kernel_siginfo *info,
 				int sig, const struct cred *cred);
 	int (*task_prctl)(int option, unsigned long arg2, unsigned long arg3,
 				unsigned long arg4, unsigned long arg5);
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index ee7e987ea1b4..e96581ca7c9d 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -126,5 +126,5 @@ void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx,
 
 void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new);
 
-void posixtimer_rearm(struct siginfo *info);
+void posixtimer_rearm(struct kernel_siginfo *info);
 #endif
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 1de2235511c8..d19a795100da 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -341,7 +341,7 @@ extern void user_single_step_report(struct pt_regs *regs);
 #else
 static inline void user_single_step_report(struct pt_regs *regs)
 {
-	siginfo_t info;
+	kernel_siginfo_t info;
 	clear_siginfo(&info);
 	info.si_signo = SIGTRAP;
 	info.si_errno = 0;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 977cb57d7bc9..2ba88082e1ef 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -960,7 +960,7 @@ struct task_struct {
 
 	/* Ptrace state: */
 	unsigned long			ptrace_message;
-	siginfo_t			*last_siginfo;
+	kernel_siginfo_t		*last_siginfo;
 
 	struct task_io_accounting	ioac;
 #ifdef CONFIG_TASK_XACCT
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 9e07f3521549..13789d10a50e 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -270,12 +270,12 @@ static inline int signal_group_exit(const struct signal_struct *sig)
 extern void flush_signals(struct task_struct *);
 extern void ignore_signals(struct task_struct *);
 extern void flush_signal_handlers(struct task_struct *, int force_default);
-extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
+extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *info);
 
 static inline int kernel_dequeue_signal(void)
 {
 	struct task_struct *tsk = current;
-	siginfo_t __info;
+	kernel_siginfo_t __info;
 	int ret;
 
 	spin_lock_irq(&tsk->sighand->siglock);
@@ -322,12 +322,12 @@ int force_sig_pkuerr(void __user *addr, u32 pkey);
 
 int force_sig_ptrace_errno_trap(int errno, void __user *addr);
 
-extern int send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int send_sig_info(int, struct kernel_siginfo *, struct task_struct *);
 extern void force_sigsegv(int sig, struct task_struct *p);
-extern int force_sig_info(int, struct siginfo *, struct task_struct *);
-extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
-extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
-extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *,
+extern int force_sig_info(int, struct kernel_siginfo *, struct task_struct *);
+extern int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp);
+extern int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid);
+extern int kill_pid_info_as_cred(int, struct kernel_siginfo *, struct pid *,
 				const struct cred *);
 extern int kill_pgrp(struct pid *pid, int sig, int priv);
 extern int kill_pid(struct pid *pid, int sig, int priv);
@@ -475,8 +475,8 @@ static inline int kill_cad_pid(int sig, int priv)
 }
 
 /* These can be the second arg to send_sig_info/send_group_sig_info.  */
-#define SEND_SIG_NOINFO ((struct siginfo *) 0)
-#define SEND_SIG_PRIV	((struct siginfo *) 1)
+#define SEND_SIG_NOINFO ((struct kernel_siginfo *) 0)
+#define SEND_SIG_PRIV	((struct kernel_siginfo *) 1)
 
 /*
  * True if we are on the alternate signal stack.
diff --git a/include/linux/security.h b/include/linux/security.h
index 75f4156c84d7..d170a5b031f3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -35,7 +35,7 @@
 struct linux_binprm;
 struct cred;
 struct rlimit;
-struct siginfo;
+struct kernel_siginfo;
 struct sembuf;
 struct kern_ipc_perm;
 struct audit_context;
@@ -361,7 +361,7 @@ int security_task_setrlimit(struct task_struct *p, unsigned int resource,
 int security_task_setscheduler(struct task_struct *p);
 int security_task_getscheduler(struct task_struct *p);
 int security_task_movememory(struct task_struct *p);
-int security_task_kill(struct task_struct *p, struct siginfo *info,
+int security_task_kill(struct task_struct *p, struct kernel_siginfo *info,
 			int sig, const struct cred *cred);
 int security_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			unsigned long arg4, unsigned long arg5);
@@ -1020,7 +1020,7 @@ static inline int security_task_movememory(struct task_struct *p)
 }
 
 static inline int security_task_kill(struct task_struct *p,
-				     struct siginfo *info, int sig,
+				     struct kernel_siginfo *info, int sig,
 				     const struct cred *cred)
 {
 	return 0;
diff --git a/include/linux/signal.h b/include/linux/signal.h
index de94c159bfb0..70031b10b918 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -11,18 +11,19 @@ struct task_struct;
 /* for sysctl */
 extern int print_fatal_signals;
 
-static inline void copy_siginfo(struct siginfo *to, const struct siginfo *from)
+static inline void copy_siginfo(kernel_siginfo_t *to,
+				const kernel_siginfo_t *from)
 {
 	memcpy(to, from, sizeof(*to));
 }
 
-static inline void clear_siginfo(struct siginfo *info)
+static inline void clear_siginfo(kernel_siginfo_t *info)
 {
 	memset(info, 0, sizeof(*info));
 }
 
-int copy_siginfo_to_user(struct siginfo __user *to, const struct siginfo *from);
-int copy_siginfo_from_user(struct siginfo *to, const struct siginfo __user *from);
+int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from);
+int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from);
 
 enum siginfo_layout {
 	SIL_KILL,
@@ -258,11 +259,11 @@ struct pt_regs;
 enum pid_type;
 
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
-extern int do_send_sig_info(int sig, struct siginfo *info,
+extern int do_send_sig_info(int sig, struct kernel_siginfo *info,
 				struct task_struct *p, enum pid_type type);
-extern int group_send_sig_info(int sig, struct siginfo *info,
+extern int group_send_sig_info(int sig, struct kernel_siginfo *info,
 			       struct task_struct *p, enum pid_type type);
-extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern void set_current_blocked(sigset_t *);
 extern void __set_current_blocked(const sigset_t *);
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 222ae696000b..2a40a9c5e4ad 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -9,6 +9,13 @@
 #include <linux/list.h>
 #include <uapi/linux/signal.h>
 
+typedef struct kernel_siginfo {
+	union {
+		__SIGINFO;
+		int _si_pad[SI_MAX_SIZE/sizeof(int)];
+	};
+} kernel_siginfo_t;
+
 /*
  * Real Time signals may be queued.
  */
@@ -16,7 +23,7 @@
 struct sigqueue {
 	struct list_head list;
 	int flags;
-	siginfo_t info;
+	kernel_siginfo_t info;
 	struct user_struct *user;
 };
 
@@ -60,7 +67,7 @@ struct old_sigaction {
 
 struct ksignal {
 	struct k_sigaction ka;
-	siginfo_t info;
+	kernel_siginfo_t info;
 	int sig;
 };
 
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index 3deeed50ffd0..1db7e4b07c01 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -49,7 +49,7 @@ enum {
  */
 TRACE_EVENT(signal_generate,
 
-	TP_PROTO(int sig, struct siginfo *info, struct task_struct *task,
+	TP_PROTO(int sig, struct kernel_siginfo *info, struct task_struct *task,
 			int group, int result),
 
 	TP_ARGS(sig, info, task, group, result),
@@ -95,7 +95,7 @@ TRACE_EVENT(signal_generate,
  */
 TRACE_EVENT(signal_deliver,
 
-	TP_PROTO(int sig, struct siginfo *info, struct k_sigaction *ka),
+	TP_PROTO(int sig, struct kernel_siginfo *info, struct k_sigaction *ka),
 
 	TP_ARGS(sig, info, ka),
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index c0d58f390c3b..cc41de3b8deb 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -655,7 +655,7 @@ static void __do_notify(struct mqueue_inode_info *info)
 	 * synchronously. */
 	if (info->notify_owner &&
 	    info->attr.mq_curmsgs == 1) {
-		struct siginfo sig_i;
+		struct kernel_siginfo sig_i;
 		switch (info->notify.sigev_notify) {
 		case SIGEV_NONE:
 			break;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index a807ff5cc1a9..c2cee9db5204 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -651,7 +651,7 @@ static int ptrace_setoptions(struct task_struct *child, unsigned long data)
 	return 0;
 }
 
-static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info)
+static int ptrace_getsiginfo(struct task_struct *child, kernel_siginfo_t *info)
 {
 	unsigned long flags;
 	int error = -ESRCH;
@@ -667,7 +667,7 @@ static int ptrace_getsiginfo(struct task_struct *child, siginfo_t *info)
 	return error;
 }
 
-static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
+static int ptrace_setsiginfo(struct task_struct *child, const kernel_siginfo_t *info)
 {
 	unsigned long flags;
 	int error = -ESRCH;
@@ -709,7 +709,7 @@ static int ptrace_peek_siginfo(struct task_struct *child,
 		pending = &child->pending;
 
 	for (i = 0; i < arg.nr; ) {
-		siginfo_t info;
+		kernel_siginfo_t info;
 		s32 off = arg.off + i;
 
 		spin_lock_irq(&child->sighand->siglock);
@@ -885,7 +885,7 @@ int ptrace_request(struct task_struct *child, long request,
 {
 	bool seized = child->ptrace & PT_SEIZED;
 	int ret = -EIO;
-	siginfo_t siginfo, *si;
+	kernel_siginfo_t siginfo, *si;
 	void __user *datavp = (void __user *) data;
 	unsigned long __user *datalp = datavp;
 	unsigned long flags;
@@ -1180,7 +1180,7 @@ int compat_ptrace_request(struct task_struct *child, compat_long_t request,
 {
 	compat_ulong_t __user *datap = compat_ptr(data);
 	compat_ulong_t word;
-	siginfo_t siginfo;
+	kernel_siginfo_t siginfo;
 	int ret;
 
 	switch (request) {
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index fd023ac24e10..4d7809cdd27d 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -522,7 +522,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	__put_seccomp_filter(tsk->seccomp.filter);
 }
 
-static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason)
+static void seccomp_init_siginfo(kernel_siginfo_t *info, int syscall, int reason)
 {
 	clear_siginfo(info);
 	info->si_signo = SIGSYS;
@@ -542,7 +542,7 @@ static void seccomp_init_siginfo(siginfo_t *info, int syscall, int reason)
  */
 static void seccomp_send_sigsys(int syscall, int reason)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 	seccomp_init_siginfo(&info, syscall, reason);
 	force_sig_info(SIGSYS, &info, current);
 }
@@ -747,7 +747,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		/* Dump core only if this is the last remaining thread. */
 		if (action == SECCOMP_RET_KILL_PROCESS ||
 		    get_nr_threads(current) == 1) {
-			siginfo_t info;
+			kernel_siginfo_t info;
 
 			/* Show the original registers in the dump. */
 			syscall_rollback(current, task_pt_regs(current));
diff --git a/kernel/signal.c b/kernel/signal.c
index c0e289e62d77..161cad4e448c 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -549,7 +549,7 @@ bool unhandled_signal(struct task_struct *tsk, int sig)
 	return !tsk->ptrace;
 }
 
-static void collect_signal(int sig, struct sigpending *list, siginfo_t *info,
+static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *info,
 			   bool *resched_timer)
 {
 	struct sigqueue *q, *first = NULL;
@@ -595,7 +595,7 @@ still_pending:
 }
 
 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
-			siginfo_t *info, bool *resched_timer)
+			kernel_siginfo_t *info, bool *resched_timer)
 {
 	int sig = next_signal(pending, mask);
 
@@ -610,7 +610,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
  *
  * All callers have to hold the siglock.
  */
-int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
+int dequeue_signal(struct task_struct *tsk, sigset_t *mask, kernel_siginfo_t *info)
 {
 	bool resched_timer = false;
 	int signr;
@@ -737,12 +737,12 @@ static void flush_sigqueue_mask(sigset_t *mask, struct sigpending *s)
 	}
 }
 
-static inline int is_si_special(const struct siginfo *info)
+static inline int is_si_special(const struct kernel_siginfo *info)
 {
 	return info <= SEND_SIG_PRIV;
 }
 
-static inline bool si_fromuser(const struct siginfo *info)
+static inline bool si_fromuser(const struct kernel_siginfo *info)
 {
 	return info == SEND_SIG_NOINFO ||
 		(!is_si_special(info) && SI_FROMUSER(info));
@@ -767,7 +767,7 @@ static bool kill_ok_by_cred(struct task_struct *t)
  * Bad permissions for sending the signal
  * - the caller must hold the RCU read lock
  */
-static int check_kill_permission(int sig, struct siginfo *info,
+static int check_kill_permission(int sig, struct kernel_siginfo *info,
 				 struct task_struct *t)
 {
 	struct pid *sid;
@@ -1010,7 +1010,7 @@ static inline bool legacy_queue(struct sigpending *signals, int sig)
 }
 
 #ifdef CONFIG_USER_NS
-static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t)
+static inline void userns_fixup_signal_uid(struct kernel_siginfo *info, struct task_struct *t)
 {
 	if (current_user_ns() == task_cred_xxx(t, user_ns))
 		return;
@@ -1024,13 +1024,13 @@ static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_str
 	rcu_read_unlock();
 }
 #else
-static inline void userns_fixup_signal_uid(struct siginfo *info, struct task_struct *t)
+static inline void userns_fixup_signal_uid(struct kernel_siginfo *info, struct task_struct *t)
 {
 	return;
 }
 #endif
 
-static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
+static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
 			enum pid_type type, int from_ancestor_ns)
 {
 	struct sigpending *pending;
@@ -1150,7 +1150,7 @@ ret:
 	return ret;
 }
 
-static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
+static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t,
 			enum pid_type type)
 {
 	int from_ancestor_ns = 0;
@@ -1197,12 +1197,12 @@ static int __init setup_print_fatal_signals(char *str)
 __setup("print-fatal-signals=", setup_print_fatal_signals);
 
 int
-__group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+__group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p)
 {
 	return send_signal(sig, info, p, PIDTYPE_TGID);
 }
 
-int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
+int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p,
 			enum pid_type type)
 {
 	unsigned long flags;
@@ -1228,7 +1228,7 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
  * that is why we also clear SIGNAL_UNKILLABLE.
  */
 int
-force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
+force_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *t)
 {
 	unsigned long int flags;
 	int ret, blocked, ignored;
@@ -1316,8 +1316,8 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk,
 /*
  * send signal info to all the members of a group
  */
-int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
-			enum pid_type type)
+int group_send_sig_info(int sig, struct kernel_siginfo *info,
+			struct task_struct *p, enum pid_type type)
 {
 	int ret;
 
@@ -1336,7 +1336,7 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
  * control characters do (^C, ^Z etc)
  * - the caller must hold at least a readlock on tasklist_lock
  */
-int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
+int __kill_pgrp_info(int sig, struct kernel_siginfo *info, struct pid *pgrp)
 {
 	struct task_struct *p = NULL;
 	int retval, success;
@@ -1351,7 +1351,7 @@ int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp)
 	return success ? 0 : retval;
 }
 
-int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
+int kill_pid_info(int sig, struct kernel_siginfo *info, struct pid *pid)
 {
 	int error = -ESRCH;
 	struct task_struct *p;
@@ -1373,7 +1373,7 @@ int kill_pid_info(int sig, struct siginfo *info, struct pid *pid)
 	}
 }
 
-static int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+static int kill_proc_info(int sig, struct kernel_siginfo *info, pid_t pid)
 {
 	int error;
 	rcu_read_lock();
@@ -1394,7 +1394,7 @@ static inline bool kill_as_cred_perm(const struct cred *cred,
 }
 
 /* like kill_pid_info(), but doesn't use uid/euid of "current" */
-int kill_pid_info_as_cred(int sig, struct siginfo *info, struct pid *pid,
+int kill_pid_info_as_cred(int sig, struct kernel_siginfo *info, struct pid *pid,
 			 const struct cred *cred)
 {
 	int ret = -EINVAL;
@@ -1438,7 +1438,7 @@ EXPORT_SYMBOL_GPL(kill_pid_info_as_cred);
  * is probably wrong.  Should make it like BSD or SYSV.
  */
 
-static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
+static int kill_something_info(int sig, struct kernel_siginfo *info, pid_t pid)
 {
 	int ret;
 
@@ -1482,7 +1482,7 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
  * These are for backward compatibility with the rest of the kernel source.
  */
 
-int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+int send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p)
 {
 	/*
 	 * Make sure legacy kernel users don't send in bad values
@@ -1533,7 +1533,7 @@ int force_sig_fault(int sig, int code, void __user *addr
 	___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
 	, struct task_struct *t)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
 	info.si_signo = sig;
@@ -1556,7 +1556,7 @@ int send_sig_fault(int sig, int code, void __user *addr
 	___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
 	, struct task_struct *t)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
 	info.si_signo = sig;
@@ -1576,7 +1576,7 @@ int send_sig_fault(int sig, int code, void __user *addr
 
 int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR));
 	clear_siginfo(&info);
@@ -1590,7 +1590,7 @@ int force_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct
 
 int send_sig_mceerr(int code, void __user *addr, short lsb, struct task_struct *t)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	WARN_ON((code != BUS_MCEERR_AO) && (code != BUS_MCEERR_AR));
 	clear_siginfo(&info);
@@ -1605,7 +1605,7 @@ EXPORT_SYMBOL(send_sig_mceerr);
 
 int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
 	info.si_signo = SIGSEGV;
@@ -1620,7 +1620,7 @@ int force_sig_bnderr(void __user *addr, void __user *lower, void __user *upper)
 #ifdef SEGV_PKUERR
 int force_sig_pkuerr(void __user *addr, u32 pkey)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
 	info.si_signo = SIGSEGV;
@@ -1637,7 +1637,7 @@ int force_sig_pkuerr(void __user *addr, u32 pkey)
  */
 int force_sig_ptrace_errno_trap(int errno, void __user *addr)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
 	info.si_signo = SIGTRAP;
@@ -1766,7 +1766,7 @@ ret:
  */
 bool do_notify_parent(struct task_struct *tsk, int sig)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 	unsigned long flags;
 	struct sighand_struct *psig;
 	bool autoreap = false;
@@ -1871,7 +1871,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 static void do_notify_parent_cldstop(struct task_struct *tsk,
 				     bool for_ptracer, int why)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 	unsigned long flags;
 	struct task_struct *parent;
 	struct sighand_struct *sighand;
@@ -1971,7 +1971,7 @@ static bool sigkill_pending(struct task_struct *tsk)
  * If we actually decide not to stop at all because the tracer
  * is gone, we keep current->exit_code unless clear_code.
  */
-static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
+static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t *info)
 	__releases(&current->sighand->siglock)
 	__acquires(&current->sighand->siglock)
 {
@@ -2108,7 +2108,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info)
 
 static void ptrace_do_notify(int signr, int exit_code, int why)
 {
-	siginfo_t info;
+	kernel_siginfo_t info;
 
 	clear_siginfo(&info);
 	info.si_signo = signr;
@@ -2289,7 +2289,7 @@ static void do_jobctl_trap(void)
 	}
 }
 
-static int ptrace_signal(int signr, siginfo_t *info)
+static int ptrace_signal(int signr, kernel_siginfo_t *info)
 {
 	/*
 	 * We do not check sig_kernel_stop(signr) but set this marker
@@ -2889,14 +2889,14 @@ enum siginfo_layout siginfo_layout(int sig, int si_code)
 	return layout;
 }
 
-int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
+int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from)
 {
-	if (copy_to_user(to, from , sizeof(struct siginfo)))
+	if (copy_to_user(to, from , sizeof(struct kernel_siginfo)))
 		return -EFAULT;
 	return 0;
 }
 
-int copy_siginfo_from_user(siginfo_t *to, const siginfo_t __user *from)
+int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
 {
 	if (copy_from_user(to, from, sizeof(struct siginfo)))
 		return -EFAULT;
@@ -2905,13 +2905,13 @@ int copy_siginfo_from_user(siginfo_t *to, const siginfo_t __user *from)
 
 #ifdef CONFIG_COMPAT
 int copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			   const struct siginfo *from)
+			   const struct kernel_siginfo *from)
 #if defined(CONFIG_X86_X32_ABI) || defined(CONFIG_IA32_EMULATION)
 {
 	return __copy_siginfo_to_user32(to, from, in_x32_syscall());
 }
 int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
-			     const struct siginfo *from, bool x32_ABI)
+			     const struct kernel_siginfo *from, bool x32_ABI)
 #endif
 {
 	struct compat_siginfo new;
@@ -2995,7 +2995,7 @@ int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
 	return 0;
 }
 
-int copy_siginfo_from_user32(struct siginfo *to,
+int copy_siginfo_from_user32(struct kernel_siginfo *to,
 			     const struct compat_siginfo __user *ufrom)
 {
 	struct compat_siginfo from;
@@ -3085,7 +3085,7 @@ int copy_siginfo_from_user32(struct siginfo *to,
  *  @info: if non-null, the signal's siginfo is returned here
  *  @ts: upper bound on process time suspension
  */
-static int do_sigtimedwait(const sigset_t *which, siginfo_t *info,
+static int do_sigtimedwait(const sigset_t *which, kernel_siginfo_t *info,
 		    const struct timespec *ts)
 {
 	ktime_t *to = NULL, timeout = KTIME_MAX;
@@ -3149,7 +3149,7 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
 {
 	sigset_t these;
 	struct timespec ts;
-	siginfo_t info;
+	kernel_siginfo_t info;
 	int ret;
 
 	/* XXX: Don't preclude handling different sized sigset_t's.  */
@@ -3181,7 +3181,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
 {
 	sigset_t s;
 	struct timespec t;
-	siginfo_t info;
+	kernel_siginfo_t info;
 	long ret;
 
 	if (sigsetsize != sizeof(sigset_t))
@@ -3213,7 +3213,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigtimedwait, compat_sigset_t __user *, uthese,
  */
 SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
 	info.si_signo = sig;
@@ -3226,7 +3226,7 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
 }
 
 static int
-do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
+do_send_specific(pid_t tgid, pid_t pid, int sig, struct kernel_siginfo *info)
 {
 	struct task_struct *p;
 	int error = -ESRCH;
@@ -3257,7 +3257,7 @@ do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 
 static int do_tkill(pid_t tgid, pid_t pid, int sig)
 {
-	struct siginfo info;
+	struct kernel_siginfo info;
 
 	clear_siginfo(&info);
 	info.si_signo = sig;
@@ -3304,7 +3304,7 @@ SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
 	return do_tkill(0, pid, sig);
 }
 
-static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info)
+static int do_rt_sigqueueinfo(pid_t pid, int sig, kernel_siginfo_t *info)
 {
 	/* Not even root can pretend to send signals from the kernel.
 	 * Nor can they impersonate a kill()/tgkill(), which adds source info.
@@ -3329,7 +3329,7 @@ static int do_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t *info)
 SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
 		siginfo_t __user *, uinfo)
 {
-	siginfo_t info;
+	kernel_siginfo_t info;
 	int ret = copy_siginfo_from_user(&info, uinfo);
 	if (unlikely(ret))
 		return ret;
@@ -3342,7 +3342,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
 			int, sig,
 			struct compat_siginfo __user *, uinfo)
 {
-	siginfo_t info;
+	kernel_siginfo_t info;
 	int ret = copy_siginfo_from_user32(&info, uinfo);
 	if (unlikely(ret))
 		return ret;
@@ -3350,7 +3350,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
 }
 #endif
 
-static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, kernel_siginfo_t *info)
 {
 	/* This is only valid for single tasks */
 	if (pid <= 0 || tgid <= 0)
@@ -3372,7 +3372,7 @@ static int do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
 SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
 		siginfo_t __user *, uinfo)
 {
-	siginfo_t info;
+	kernel_siginfo_t info;
 	int ret = copy_siginfo_from_user(&info, uinfo);
 	if (unlikely(ret))
 		return ret;
@@ -3386,7 +3386,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
 			int, sig,
 			struct compat_siginfo __user *, uinfo)
 {
-	siginfo_t info;
+	kernel_siginfo_t info;
 	int ret = copy_siginfo_from_user32(&info, uinfo);
 	if (unlikely(ret))
 		return ret;
@@ -3968,10 +3968,57 @@ __weak const char *arch_vma_name(struct vm_area_struct *vma)
 	return NULL;
 }
 
-void __init signals_init(void)
+static inline void siginfo_buildtime_checks(void)
 {
 	BUILD_BUG_ON(sizeof(struct siginfo) != SI_MAX_SIZE);
 
+	/* Verify the offsets in the two siginfos match */
+#define CHECK_OFFSET(field) \
+	BUILD_BUG_ON(offsetof(siginfo_t, field) != offsetof(kernel_siginfo_t, field))
+
+	/* kill */
+	CHECK_OFFSET(si_pid);
+	CHECK_OFFSET(si_uid);
+
+	/* timer */
+	CHECK_OFFSET(si_tid);
+	CHECK_OFFSET(si_overrun);
+	CHECK_OFFSET(si_value);
+
+	/* rt */
+	CHECK_OFFSET(si_pid);
+	CHECK_OFFSET(si_uid);
+	CHECK_OFFSET(si_value);
+
+	/* sigchld */
+	CHECK_OFFSET(si_pid);
+	CHECK_OFFSET(si_uid);
+	CHECK_OFFSET(si_status);
+	CHECK_OFFSET(si_utime);
+	CHECK_OFFSET(si_stime);
+
+	/* sigfault */
+	CHECK_OFFSET(si_addr);
+	CHECK_OFFSET(si_addr_lsb);
+	CHECK_OFFSET(si_lower);
+	CHECK_OFFSET(si_upper);
+	CHECK_OFFSET(si_pkey);
+
+	/* sigpoll */
+	CHECK_OFFSET(si_band);
+	CHECK_OFFSET(si_fd);
+
+	/* sigsys */
+	CHECK_OFFSET(si_call_addr);
+	CHECK_OFFSET(si_syscall);
+	CHECK_OFFSET(si_arch);
+#undef CHECK_OFFSET
+}
+
+void __init signals_init(void)
+{
+	siginfo_buildtime_checks();
+
 	sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
 }
 
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 4b9127e95430..eabb4c22728d 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -308,7 +308,7 @@ static void common_hrtimer_rearm(struct k_itimer *timr)
  * To protect against the timer going away while the interrupt is queued,
  * we require that the it_requeue_pending flag be set.
  */
-void posixtimer_rearm(struct siginfo *info)
+void posixtimer_rearm(struct kernel_siginfo *info)
 {
 	struct k_itimer *timr;
 	unsigned long flags;
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 8b8b70620bbe..cbcb8ba51142 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -732,7 +732,7 @@ static int apparmor_task_setrlimit(struct task_struct *task,
 	return error;
 }
 
-static int apparmor_task_kill(struct task_struct *target, struct siginfo *info,
+static int apparmor_task_kill(struct task_struct *target, struct kernel_siginfo *info,
 			      int sig, const struct cred *cred)
 {
 	struct aa_label *cl, *tl;
diff --git a/security/security.c b/security/security.c
index 736e78da1ab9..0d504fceda8b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1147,7 +1147,7 @@ int security_task_movememory(struct task_struct *p)
 	return call_int_hook(task_movememory, 0, p);
 }
 
-int security_task_kill(struct task_struct *p, struct siginfo *info,
+int security_task_kill(struct task_struct *p, struct kernel_siginfo *info,
 			int sig, const struct cred *cred)
 {
 	return call_int_hook(task_kill, 0, p, info, sig, cred);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index ad9a9b8e9979..1b500b4c78a7 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4186,7 +4186,7 @@ static int selinux_task_movememory(struct task_struct *p)
 			    PROCESS__SETSCHED, NULL);
 }
 
-static int selinux_task_kill(struct task_struct *p, struct siginfo *info,
+static int selinux_task_kill(struct task_struct *p, struct kernel_siginfo *info,
 				int sig, const struct cred *cred)
 {
 	u32 secid;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 340fc30ad85d..025de76af1db 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -2251,7 +2251,7 @@ static int smack_task_movememory(struct task_struct *p)
  * Return 0 if write access is permitted
  *
  */
-static int smack_task_kill(struct task_struct *p, struct siginfo *info,
+static int smack_task_kill(struct task_struct *p, struct kernel_siginfo *info,
 			   int sig, const struct cred *cred)
 {
 	struct smk_audit_info ad;
-- 
cgit v1.2.3


From 4ce5f9c9e7546915c559ffae594e6d73f918db00 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 25 Sep 2018 12:59:31 +0200
Subject: signal: Use a smaller struct siginfo in the kernel

We reserve 128 bytes for struct siginfo but only use about 48 bytes on
64bit and 32 bytes on 32bit.  Someday we might use more but it is unlikely
to be anytime soon.

Userspace seems content with just enough bytes of siginfo to implement
sigqueue.  Or in the case of checkpoint/restart reinjecting signals
the kernel has sent.

Reducing the stack footprint and the work to copy siginfo around from
2 cachelines to 1 cachelines seems worth doing even if I don't have
benchmarks to show a performance difference.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal.h       |  2 ++
 include/linux/signal_types.h |  5 +--
 kernel/signal.c              | 82 ++++++++++++++++++++++++++++++++++----------
 3 files changed, 67 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 70031b10b918..706a499d1eb1 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -22,6 +22,8 @@ static inline void clear_siginfo(kernel_siginfo_t *info)
 	memset(info, 0, sizeof(*info));
 }
 
+#define SI_EXPANSION_SIZE (sizeof(struct siginfo) - sizeof(struct kernel_siginfo))
+
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from);
 int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from);
 
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index 2a40a9c5e4ad..f8a90ae9c6ec 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -10,10 +10,7 @@
 #include <uapi/linux/signal.h>
 
 typedef struct kernel_siginfo {
-	union {
-		__SIGINFO;
-		int _si_pad[SI_MAX_SIZE/sizeof(int)];
-	};
+	__SIGINFO;
 } kernel_siginfo_t;
 
 /*
diff --git a/kernel/signal.c b/kernel/signal.c
index 161cad4e448c..1c2dd117fee0 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2844,27 +2844,48 @@ COMPAT_SYSCALL_DEFINE2(rt_sigpending, compat_sigset_t __user *, uset,
 }
 #endif
 
+static const struct {
+	unsigned char limit, layout;
+} sig_sicodes[] = {
+	[SIGILL]  = { NSIGILL,  SIL_FAULT },
+	[SIGFPE]  = { NSIGFPE,  SIL_FAULT },
+	[SIGSEGV] = { NSIGSEGV, SIL_FAULT },
+	[SIGBUS]  = { NSIGBUS,  SIL_FAULT },
+	[SIGTRAP] = { NSIGTRAP, SIL_FAULT },
+#if defined(SIGEMT)
+	[SIGEMT]  = { NSIGEMT,  SIL_FAULT },
+#endif
+	[SIGCHLD] = { NSIGCHLD, SIL_CHLD },
+	[SIGPOLL] = { NSIGPOLL, SIL_POLL },
+	[SIGSYS]  = { NSIGSYS,  SIL_SYS },
+};
+
+static bool known_siginfo_layout(int sig, int si_code)
+{
+	if (si_code == SI_KERNEL)
+		return true;
+	else if ((si_code > SI_USER)) {
+		if (sig_specific_sicodes(sig)) {
+			if (si_code <= sig_sicodes[sig].limit)
+				return true;
+		}
+		else if (si_code <= NSIGPOLL)
+			return true;
+	}
+	else if (si_code >= SI_DETHREAD)
+		return true;
+	else if (si_code == SI_ASYNCNL)
+		return true;
+	return false;
+}
+
 enum siginfo_layout siginfo_layout(int sig, int si_code)
 {
 	enum siginfo_layout layout = SIL_KILL;
 	if ((si_code > SI_USER) && (si_code < SI_KERNEL)) {
-		static const struct {
-			unsigned char limit, layout;
-		} filter[] = {
-			[SIGILL]  = { NSIGILL,  SIL_FAULT },
-			[SIGFPE]  = { NSIGFPE,  SIL_FAULT },
-			[SIGSEGV] = { NSIGSEGV, SIL_FAULT },
-			[SIGBUS]  = { NSIGBUS,  SIL_FAULT },
-			[SIGTRAP] = { NSIGTRAP, SIL_FAULT },
-#if defined(SIGEMT)
-			[SIGEMT]  = { NSIGEMT,  SIL_FAULT },
-#endif
-			[SIGCHLD] = { NSIGCHLD, SIL_CHLD },
-			[SIGPOLL] = { NSIGPOLL, SIL_POLL },
-			[SIGSYS]  = { NSIGSYS,  SIL_SYS },
-		};
-		if ((sig < ARRAY_SIZE(filter)) && (si_code <= filter[sig].limit)) {
-			layout = filter[sig].layout;
+		if ((sig < ARRAY_SIZE(sig_sicodes)) &&
+		    (si_code <= sig_sicodes[sig].limit)) {
+			layout = sig_sicodes[sig].layout;
 			/* Handle the exceptions */
 			if ((sig == SIGBUS) &&
 			    (si_code >= BUS_MCEERR_AR) && (si_code <= BUS_MCEERR_AO))
@@ -2889,17 +2910,42 @@ enum siginfo_layout siginfo_layout(int sig, int si_code)
 	return layout;
 }
 
+static inline char __user *si_expansion(const siginfo_t __user *info)
+{
+	return ((char __user *)info) + sizeof(struct kernel_siginfo);
+}
+
 int copy_siginfo_to_user(siginfo_t __user *to, const kernel_siginfo_t *from)
 {
+	char __user *expansion = si_expansion(to);
 	if (copy_to_user(to, from , sizeof(struct kernel_siginfo)))
 		return -EFAULT;
+	if (clear_user(expansion, SI_EXPANSION_SIZE))
+		return -EFAULT;
 	return 0;
 }
 
 int copy_siginfo_from_user(kernel_siginfo_t *to, const siginfo_t __user *from)
 {
-	if (copy_from_user(to, from, sizeof(struct siginfo)))
+	if (copy_from_user(to, from, sizeof(struct kernel_siginfo)))
 		return -EFAULT;
+	if (unlikely(!known_siginfo_layout(to->si_signo, to->si_code))) {
+		char __user *expansion = si_expansion(from);
+		char buf[SI_EXPANSION_SIZE];
+		int i;
+		/*
+		 * An unknown si_code might need more than
+		 * sizeof(struct kernel_siginfo) bytes.  Verify all of the
+		 * extra bytes are 0.  This guarantees copy_siginfo_to_user
+		 * will return this data to userspace exactly.
+		 */
+		if (copy_from_user(&buf, expansion, SI_EXPANSION_SIZE))
+			return -EFAULT;
+		for (i = 0; i < SI_EXPANSION_SIZE; i++) {
+			if (buf[i] != 0)
+				return -E2BIG;
+		}
+	}
 	return 0;
 }
 
-- 
cgit v1.2.3


From 608a0ab2f54ab0e301ad76a41aad979ea0d02670 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Mon, 1 Oct 2018 10:41:44 -0400
Subject: SUNRPC: Add lockless lookup of the server's auth domain

Avoid taking the global auth_domain_lock in most lookups of the auth domain
by adding an RCU protected lookup.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svcauth.h    |  1 +
 net/sunrpc/auth_gss/svcauth_gss.c |  9 ++++++++-
 net/sunrpc/svcauth.c              | 22 +++++++++++++++++++---
 net/sunrpc/svcauth_unix.c         | 10 ++++++++--
 4 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 04e404a07882..3e53a6e2ada7 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -82,6 +82,7 @@ struct auth_domain {
 	struct hlist_node	hash;
 	char			*name;
 	struct auth_ops		*flavour;
+	struct rcu_head		rcu_head;
 };
 
 /*
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 860f2a1bbb67..87c71fb0f0ea 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1764,14 +1764,21 @@ out_err:
 }
 
 static void
-svcauth_gss_domain_release(struct auth_domain *dom)
+svcauth_gss_domain_release_rcu(struct rcu_head *head)
 {
+	struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
 	struct gss_domain *gd = container_of(dom, struct gss_domain, h);
 
 	kfree(dom->name);
 	kfree(gd);
 }
 
+static void
+svcauth_gss_domain_release(struct auth_domain *dom)
+{
+	call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu);
+}
+
 static struct auth_ops svcauthops_gss = {
 	.name		= "rpcsec_gss",
 	.owner		= THIS_MODULE,
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index f83443856cd1..775b8c94265b 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -143,10 +143,11 @@ static struct hlist_head	auth_domain_table[DN_HASHMAX];
 static DEFINE_SPINLOCK(auth_domain_lock);
 
 static void auth_domain_release(struct kref *kref)
+	__releases(&auth_domain_lock)
 {
 	struct auth_domain *dom = container_of(kref, struct auth_domain, ref);
 
-	hlist_del(&dom->hash);
+	hlist_del_rcu(&dom->hash);
 	dom->flavour->domain_release(dom);
 	spin_unlock(&auth_domain_lock);
 }
@@ -175,7 +176,7 @@ auth_domain_lookup(char *name, struct auth_domain *new)
 		}
 	}
 	if (new)
-		hlist_add_head(&new->hash, head);
+		hlist_add_head_rcu(&new->hash, head);
 	spin_unlock(&auth_domain_lock);
 	return new;
 }
@@ -183,6 +184,21 @@ EXPORT_SYMBOL_GPL(auth_domain_lookup);
 
 struct auth_domain *auth_domain_find(char *name)
 {
-	return auth_domain_lookup(name, NULL);
+	struct auth_domain *hp;
+	struct hlist_head *head;
+
+	head = &auth_domain_table[hash_str(name, DN_HASHBITS)];
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(hp, head, hash) {
+		if (strcmp(hp->name, name)==0) {
+			if (!kref_get_unless_zero(&hp->ref))
+				hp = NULL;
+			rcu_read_unlock();
+			return hp;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(auth_domain_find);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index af7f28fb8102..84cf39021a03 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -37,20 +37,26 @@ struct unix_domain {
 extern struct auth_ops svcauth_null;
 extern struct auth_ops svcauth_unix;
 
-static void svcauth_unix_domain_release(struct auth_domain *dom)
+static void svcauth_unix_domain_release_rcu(struct rcu_head *head)
 {
+	struct auth_domain *dom = container_of(head, struct auth_domain, rcu_head);
 	struct unix_domain *ud = container_of(dom, struct unix_domain, h);
 
 	kfree(dom->name);
 	kfree(ud);
 }
 
+static void svcauth_unix_domain_release(struct auth_domain *dom)
+{
+	call_rcu(&dom->rcu_head, svcauth_unix_domain_release_rcu);
+}
+
 struct auth_domain *unix_domain_find(char *name)
 {
 	struct auth_domain *rv;
 	struct unix_domain *new = NULL;
 
-	rv = auth_domain_lookup(name, NULL);
+	rv = auth_domain_find(name);
 	while(1) {
 		if (rv) {
 			if (new && rv != &new->h)
-- 
cgit v1.2.3


From fcd29ad17c6ff885dfae58f557e9323941e63ba2 Mon Sep 17 00:00:00 2001
From: Feras Daoud <ferasda@mellanox.com>
Date: Thu, 9 Aug 2018 09:55:21 +0300
Subject: net/mlx5: Add Fast teardown support

Today mlx5 devices support two teardown modes:
1- Regular teardown
2- Force teardown

This change introduces the enhanced version of the "Force teardown" that
allows SW to perform teardown in a faster way without the need to reclaim
all the pages.

Fast teardown provides the following advantages:
1- Fix a FW race condition that could cause command timeout
2- Avoid moving to polling mode
3- Close the vport to prevent PCI ACK to be sent without been scatter
to memory

Signed-off-by: Feras Daoud <ferasda@mellanox.com>
Reviewed-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c       | 50 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/health.c   | 25 ++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/main.c     | 29 +++++++++----
 .../net/ethernet/mellanox/mlx5/core/mlx5_core.h    | 12 ++++++
 include/linux/mlx5/device.h                        |  4 ++
 include/linux/mlx5/mlx5_ifc.h                      |  6 ++-
 6 files changed, 103 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 41ad24f0de2c..1ab6f7e3bec6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -250,7 +250,7 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
 	if (ret)
 		return ret;
 
-	force_state = MLX5_GET(teardown_hca_out, out, force_state);
+	force_state = MLX5_GET(teardown_hca_out, out, state);
 	if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
 		mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n");
 		return -EIO;
@@ -259,6 +259,54 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
 	return 0;
 }
 
+#define MLX5_FAST_TEARDOWN_WAIT_MS   3000
+int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
+{
+	unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS;
+	u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+	int state;
+	int ret;
+
+	if (!MLX5_CAP_GEN(dev, fast_teardown)) {
+		mlx5_core_dbg(dev, "fast teardown is not supported in the firmware\n");
+		return -EOPNOTSUPP;
+	}
+
+	MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+	MLX5_SET(teardown_hca_in, in, profile,
+		 MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN);
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret)
+		return ret;
+
+	state = MLX5_GET(teardown_hca_out, out, state);
+	if (state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+		mlx5_core_warn(dev, "teardown with fast mode failed\n");
+		return -EIO;
+	}
+
+	mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED);
+
+	/* Loop until device state turns to disable */
+	end = jiffies + msecs_to_jiffies(delay_ms);
+	do {
+		if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+			break;
+
+		cond_resched();
+	} while (!time_after(jiffies, end));
+
+	if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+		dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+			mlx5_get_nic_state(dev), delay_ms);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 enum mlxsw_reg_mcc_instruction {
 	MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01,
 	MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 9f39aeca863f..43118de8ee99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -58,23 +58,26 @@ enum {
 	MLX5_HEALTH_SYNDR_HIGH_TEMP		= 0x10
 };
 
-enum {
-	MLX5_NIC_IFC_FULL		= 0,
-	MLX5_NIC_IFC_DISABLED		= 1,
-	MLX5_NIC_IFC_NO_DRAM_NIC	= 2,
-	MLX5_NIC_IFC_INVALID		= 3
-};
-
 enum {
 	MLX5_DROP_NEW_HEALTH_WORK,
 	MLX5_DROP_NEW_RECOVERY_WORK,
 };
 
-static u8 get_nic_state(struct mlx5_core_dev *dev)
+u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
 {
 	return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
 }
 
+void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
+{
+	u32 cur_cmdq_addr_l_sz;
+
+	cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
+	iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
+		    state << MLX5_NIC_IFC_OFFSET,
+		    &dev->iseg->cmdq_addr_l_sz);
+}
+
 static void trigger_cmd_completions(struct mlx5_core_dev *dev)
 {
 	unsigned long flags;
@@ -103,7 +106,7 @@ static int in_fatal(struct mlx5_core_dev *dev)
 	struct mlx5_core_health *health = &dev->priv.health;
 	struct health_buffer __iomem *h = health->health;
 
-	if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+	if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
 		return 1;
 
 	if (ioread32be(&h->fw_ver) == 0xffffffff)
@@ -133,7 +136,7 @@ unlock:
 
 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 {
-	u8 nic_interface = get_nic_state(dev);
+	u8 nic_interface = mlx5_get_nic_state(dev);
 
 	switch (nic_interface) {
 	case MLX5_NIC_IFC_FULL:
@@ -168,7 +171,7 @@ static void health_recover(struct work_struct *work)
 	priv = container_of(health, struct mlx5_priv, health);
 	dev = container_of(priv, struct mlx5_core_dev, priv);
 
-	nic_state = get_nic_state(dev);
+	nic_state = mlx5_get_nic_state(dev);
 	if (nic_state == MLX5_NIC_IFC_INVALID) {
 		dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
 		return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b5e9f664fc66..28132c7dc05f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1594,12 +1594,17 @@ static const struct pci_error_handlers mlx5_err_handler = {
 
 static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
 {
-	int ret;
+	bool fast_teardown = false, force_teardown = false;
+	int ret = 1;
+
+	fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
+	force_teardown = MLX5_CAP_GEN(dev, force_teardown);
+
+	mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
+	mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
 
-	if (!MLX5_CAP_GEN(dev, force_teardown)) {
-		mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+	if (!fast_teardown && !force_teardown)
 		return -EOPNOTSUPP;
-	}
 
 	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
 		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
@@ -1612,13 +1617,19 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
 	mlx5_drain_health_wq(dev);
 	mlx5_stop_health_poll(dev, false);
 
+	ret = mlx5_cmd_fast_teardown_hca(dev);
+	if (!ret)
+		goto succeed;
+
 	ret = mlx5_cmd_force_teardown_hca(dev);
-	if (ret) {
-		mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
-		mlx5_start_health_poll(dev);
-		return ret;
-	}
+	if (!ret)
+		goto succeed;
+
+	mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
+	mlx5_start_health_poll(dev);
+	return ret;
 
+succeed:
 	mlx5_enter_error_state(dev, true);
 
 	/* Some platforms requiring freeing the IRQ's in the shutdown
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index b4134fa0bba3..cc298527baf1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -95,6 +95,8 @@ int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
+
 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 		     unsigned long param);
 void mlx5_core_page_fault(struct mlx5_core_dev *dev,
@@ -214,4 +216,14 @@ int mlx5_lag_allow(struct mlx5_core_dev *dev);
 int mlx5_lag_forbid(struct mlx5_core_dev *dev);
 
 void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+
+enum {
+	MLX5_NIC_IFC_FULL		= 0,
+	MLX5_NIC_IFC_DISABLED		= 1,
+	MLX5_NIC_IFC_NO_DRAM_NIC	= 2,
+	MLX5_NIC_IFC_INVALID		= 3
+};
+
+u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
+void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state);
 #endif /* __MLX5_CORE_H__ */
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 11fa4e66afc5..e9b502d5bcc1 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -504,6 +504,10 @@ struct health_buffer {
 	__be16		ext_synd;
 };
 
+enum mlx5_cmd_addr_l_sz_offset {
+	MLX5_NIC_IFC_OFFSET = 8,
+};
+
 struct mlx5_init_seg {
 	__be32			fw_rev;
 	__be32			cmdif_rev_fw_sub;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f043d65b9bac..6e8a882052b1 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -896,7 +896,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         log_max_mkey[0x6];
 	u8         reserved_at_f0[0x8];
 	u8         dump_fill_mkey[0x1];
-	u8         reserved_at_f9[0x3];
+	u8         reserved_at_f9[0x2];
+	u8         fast_teardown[0x1];
 	u8         log_max_eq[0x4];
 
 	u8         max_indirection[0x8];
@@ -3352,12 +3353,13 @@ struct mlx5_ifc_teardown_hca_out_bits {
 
 	u8         reserved_at_40[0x3f];
 
-	u8         force_state[0x1];
+	u8         state[0x1];
 };
 
 enum {
 	MLX5_TEARDOWN_HCA_IN_PROFILE_GRACEFUL_CLOSE  = 0x0,
 	MLX5_TEARDOWN_HCA_IN_PROFILE_FORCE_CLOSE     = 0x1,
+	MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN = 0x2,
 };
 
 struct mlx5_ifc_teardown_hca_in_bits {
-- 
cgit v1.2.3


From 5f26bdceb9c0a5e6c696aa2899d077cd3ae93413 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Oct 2018 23:42:02 +0200
Subject: cpuidle: menu: Fix wakeup statistics updates for polling state

If the CPU exits the "polling" state due to the time limit in the
loop in poll_idle(), this is not a real wakeup and it just means
that the "polling" state selection was not adequate.  The governor
mispredicted short idle duration, but had a more suitable state been
selected, the CPU might have spent more time in it.  In fact, there
is no reason to expect that there would have been a wakeup event
earlier than the next timer in that case.

Handling such cases as regular wakeups in menu_update() may cause the
menu governor to make suboptimal decisions going forward, but ignoring
them altogether would not be correct either, because every time
menu_select() is invoked, it makes a separate new attempt to predict
the idle duration taking distinct time to the closest timer event as
input and the outcomes of all those attempts should be recorded.

For this reason, make menu_update() always assume that if the
"polling" state was exited due to the time limit, the next proper
wakeup event for the CPU would be the next timer event (not
including the tick).

Fixes: a37b969a61c1 "cpuidle: poll_state: Add time limit to poll_idle()"
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/cpuidle/governors/menu.c | 10 ++++++++++
 drivers/cpuidle/poll_state.c     |  6 +++++-
 include/linux/cpuidle.h          |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index b6684fd89085..8b3f9c7bf221 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -511,6 +511,16 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		 * duration predictor do a better job next time.
 		 */
 		measured_us = 9 * MAX_INTERESTING / 10;
+	} else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
+		   dev->poll_time_limit) {
+		/*
+		 * The CPU exited the "polling" state due to a time limit, so
+		 * the idle duration prediction leading to the selection of that
+		 * state was inaccurate.  If a better prediction had been made,
+		 * the CPU might have been woken up from idle by the next timer.
+		 * Assume that to be the case.
+		 */
+		measured_us = data->next_timer_us;
 	} else {
 		/* measured value */
 		measured_us = dev->last_residency;
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 3f86d23c592e..36ff5a1d9422 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -17,6 +17,8 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
 {
 	u64 time_start = local_clock();
 
+	dev->poll_time_limit = false;
+
 	local_irq_enable();
 	if (!current_set_polling_and_test()) {
 		unsigned int loop_count = 0;
@@ -27,8 +29,10 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
 				continue;
 
 			loop_count = 0;
-			if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT)
+			if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) {
+				dev->poll_time_limit = true;
 				break;
+			}
 		}
 	}
 	current_clr_polling();
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index d262f620890e..faed7a8977e8 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -81,6 +81,7 @@ struct cpuidle_device {
 	unsigned int		registered:1;
 	unsigned int		enabled:1;
 	unsigned int		use_deepest_state:1;
+	unsigned int		poll_time_limit:1;
 	unsigned int		cpu;
 
 	int			last_residency;
-- 
cgit v1.2.3


From c06c4d8090513f2974dfdbed2ac98634357ac475 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Wed, 3 Oct 2018 14:30:53 -0700
Subject: x86/objtool: Use asm macros to work around GCC inlining bugs

As described in:

  77b0bf55bc67: ("kbuild/Makefile: Prepare for using macros in inline assembly code to work around asm() related GCC inlining bugs")

GCC's inlining heuristics are broken with common asm() patterns used in
kernel code, resulting in the effective disabling of inlining.

In the case of objtool the resulting borkage can be significant, since all the
annotations of objtool are discarded during linkage and never inlined,
yet GCC bogusly considers most functions affected by objtool annotations
as 'too large'.

The workaround is to set an assembly macro and call it from the inline
assembly block. As a result GCC considers the inline assembly block as
a single instruction. (Which it isn't, but that's the best we can get.)

This increases the kernel size slightly:

      text     data     bss      dec     hex filename
  18140829 10224724 2957312 31322865 1ddf2f1 ./vmlinux before
  18140970 10225412 2957312 31323694 1ddf62e ./vmlinux after (+829)

The number of static text symbols (i.e. non-inlined functions) is reduced:

  Before:  40321
  After:   40302 (-19)

[ mingo: Rewrote the changelog. ]

Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Nadav Amit <namit@vmware.com>
Reviewed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Christopher Li <sparse@chrisli.org>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-sparse@vger.kernel.org
Link: http://lkml.kernel.org/r/20181003213100.189959-4-namit@vmware.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/macros.S |  2 ++
 include/linux/compiler.h | 56 +++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 45 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/macros.S b/arch/x86/kernel/macros.S
index cfc1c7d1a6eb..cee28c3246dc 100644
--- a/arch/x86/kernel/macros.S
+++ b/arch/x86/kernel/macros.S
@@ -5,3 +5,5 @@
  * commonly used. The macros are precompiled into assmebly file which is later
  * assembled together with each compiled file.
  */
+
+#include <linux/compiler.h>
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 681d866efb1e..1921545c6351 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -99,22 +99,13 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
  * unique, to convince GCC not to merge duplicate inline asm statements.
  */
 #define annotate_reachable() ({						\
-	asm volatile("%c0:\n\t"						\
-		     ".pushsection .discard.reachable\n\t"		\
-		     ".long %c0b - .\n\t"				\
-		     ".popsection\n\t" : : "i" (__COUNTER__));		\
+	asm volatile("ANNOTATE_REACHABLE counter=%c0"			\
+		     : : "i" (__COUNTER__));				\
 })
 #define annotate_unreachable() ({					\
-	asm volatile("%c0:\n\t"						\
-		     ".pushsection .discard.unreachable\n\t"		\
-		     ".long %c0b - .\n\t"				\
-		     ".popsection\n\t" : : "i" (__COUNTER__));		\
+	asm volatile("ANNOTATE_UNREACHABLE counter=%c0"			\
+		     : : "i" (__COUNTER__));				\
 })
-#define ASM_UNREACHABLE							\
-	"999:\n\t"							\
-	".pushsection .discard.unreachable\n\t"				\
-	".long 999b - .\n\t"						\
-	".popsection\n\t"
 #else
 #define annotate_reachable()
 #define annotate_unreachable()
@@ -299,6 +290,45 @@ static inline void *offset_to_ptr(const int *off)
 	return (void *)((unsigned long)off + *off);
 }
 
+#else /* __ASSEMBLY__ */
+
+#ifdef __KERNEL__
+#ifndef LINKER_SCRIPT
+
+#ifdef CONFIG_STACK_VALIDATION
+.macro ANNOTATE_UNREACHABLE counter:req
+\counter:
+	.pushsection .discard.unreachable
+	.long \counter\()b -.
+	.popsection
+.endm
+
+.macro ANNOTATE_REACHABLE counter:req
+\counter:
+	.pushsection .discard.reachable
+	.long \counter\()b -.
+	.popsection
+.endm
+
+.macro ASM_UNREACHABLE
+999:
+	.pushsection .discard.unreachable
+	.long 999b - .
+	.popsection
+.endm
+#else /* CONFIG_STACK_VALIDATION */
+.macro ANNOTATE_UNREACHABLE counter:req
+.endm
+
+.macro ANNOTATE_REACHABLE counter:req
+.endm
+
+.macro ASM_UNREACHABLE
+.endm
+#endif /* CONFIG_STACK_VALIDATION */
+
+#endif /* LINKER_SCRIPT */
+#endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
 #ifndef __optimize
-- 
cgit v1.2.3


From 34845c939082093354a6ffbb1ebff599e30b9b22 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 26 Sep 2018 15:20:03 +0200
Subject: reset: Grammar s/more then once/more than once/

Fix grammar in reset_control_get_exclusive() documentation comment.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/linux/reset.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/reset.h b/include/linux/reset.h
index 09732c36f351..29af6d6b2f4b 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -116,7 +116,7 @@ static inline int device_reset_optional(struct device *dev)
  * @id: reset line name
  *
  * Returns a struct reset_control or IS_ERR() condition containing errno.
- * If this function is called more then once for the same reset_control it will
+ * If this function is called more than once for the same reset_control it will
  * return -EBUSY.
  *
  * See reset_control_get_shared for details on shared references to
-- 
cgit v1.2.3


From b723a7911d028fb55f67a63c4c4d895f72e059d4 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:32 +0300
Subject: fanotify: fix collision of internal and uapi mark flags

The new mark flag FAN_MARK_FILESYSTEMS collides with existing internal
flag FAN_MARK_ONDIR. Change internal flag value to avoid the collision.

Fixes: d54f4fba889b ("fanotify: add API to attach/detach super block mark")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fanotify.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 9c5ea3bdfaa0..e70fccc3757e 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -5,7 +5,7 @@
 #include <uapi/linux/fanotify.h>
 
 /* not valid from userspace, only kernel internal */
-#define FAN_MARK_ONDIR		0x00000100
+#define FAN_MARK_ONDIR		0x80000000
 
 #define FAN_GROUP_FLAG(group, flag) \
 	((group)->fanotify_data.flags & (flag))
-- 
cgit v1.2.3


From 007d1e8395eaa59b0e7ad9eb2b53a40859446a88 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:33 +0300
Subject: fsnotify: generalize handling of extra event flags

FS_EVENT_ON_CHILD gets a special treatment in fsnotify() because it is
not a flag specifying an event type, but rather an extra flags that may
be reported along with another event and control the handling of the
event by the backend.

FS_ISDIR is also an "extra flag" and not an "event type" and therefore
desrves the same treatment. With inotify/dnotify backends it was never
possible to set FS_ISDIR in mark masks, so it did not matter.
With fanotify backend, mark adding code jumps through hoops to avoid
setting the FS_ISDIR in the commulative object mask.

Separate the constant ALL_FSNOTIFY_EVENTS to ALL_FSNOTIFY_FLAGS and
ALL_FSNOTIFY_EVENTS, so the latter can be used to test for specific
event types.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.c             | 7 +++----
 include/linux/fsnotify_backend.h | 9 +++++++--
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 422fbc6dffde..7391a02bf723 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -196,7 +196,7 @@ static int send_to_group(struct inode *to_tell,
 			 struct fsnotify_iter_info *iter_info)
 {
 	struct fsnotify_group *group = NULL;
-	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
 	__u32 marks_mask = 0;
 	__u32 marks_ignored_mask = 0;
 	struct fsnotify_mark *mark;
@@ -329,8 +329,7 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 	struct mount *mnt = NULL;
 	__u32 mnt_or_sb_mask = 0;
 	int ret = 0;
-	/* global tests shouldn't care about events on child only the specific event */
-	__u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
+	__u32 test_mask = (mask & ALL_FSNOTIFY_EVENTS);
 
 	if (data_is == FSNOTIFY_EVENT_PATH) {
 		mnt = real_mount(((const struct path *)data)->mnt);
@@ -396,7 +395,7 @@ static __init int fsnotify_init(void)
 {
 	int ret;
 
-	BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
+	BUG_ON(hweight32(ALL_FSNOTIFY_BITS) != 23);
 
 	ret = init_srcu_struct(&fsnotify_mark_srcu);
 	if (ret)
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 8e91341cbd8a..135b973e44d1 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -68,15 +68,20 @@
 
 #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM)
 
+/* Events that can be reported to backends */
 #define ALL_FSNOTIFY_EVENTS (FS_ACCESS | FS_MODIFY | FS_ATTRIB | \
 			     FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN | \
 			     FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE | \
 			     FS_DELETE | FS_DELETE_SELF | FS_MOVE_SELF | \
 			     FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \
-			     FS_OPEN_PERM | FS_ACCESS_PERM | FS_EXCL_UNLINK | \
-			     FS_ISDIR | FS_IN_ONESHOT | FS_DN_RENAME | \
+			     FS_OPEN_PERM | FS_ACCESS_PERM | FS_DN_RENAME)
+
+/* Extra flags that may be reported with event or control handling of events */
+#define ALL_FSNOTIFY_FLAGS  (FS_EXCL_UNLINK | FS_ISDIR | FS_IN_ONESHOT | \
 			     FS_DN_MULTISHOT | FS_EVENT_ON_CHILD)
 
+#define ALL_FSNOTIFY_BITS   (ALL_FSNOTIFY_EVENTS | ALL_FSNOTIFY_FLAGS)
+
 struct fsnotify_group;
 struct fsnotify_event;
 struct fsnotify_mark;
-- 
cgit v1.2.3


From a72fd224e37bf6a0630bce302deebbccbc236ba2 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:34 +0300
Subject: fanotify: simplify handling of FAN_ONDIR

fanotify mark add/remove code jumps through hoops to avoid setting the
FS_ISDIR in the commulative object mask.

That was just papering over a bug in fsnotify() handling of the FS_ISDIR
extra flag. This bug is now fixed, so all the hoops can be removed along
with the unneeded internal flag FAN_MARK_ONDIR.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify_user.c | 32 +++++---------------------------
 include/linux/fanotify.h           |  3 ---
 2 files changed, 5 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 15719d4aa4b5..34b511407035 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -506,18 +506,10 @@ static __u32 fanotify_mark_remove_from_mask(struct fsnotify_mark *fsn_mark,
 
 	spin_lock(&fsn_mark->lock);
 	if (!(flags & FAN_MARK_IGNORED_MASK)) {
-		__u32 tmask = fsn_mark->mask & ~mask;
-
-		if (flags & FAN_MARK_ONDIR)
-			tmask &= ~FAN_ONDIR;
-
 		oldmask = fsn_mark->mask;
-		fsn_mark->mask = tmask;
+		fsn_mark->mask &= ~mask;
 	} else {
-		__u32 tmask = fsn_mark->ignored_mask & ~mask;
-		if (flags & FAN_MARK_ONDIR)
-			tmask &= ~FAN_ONDIR;
-		fsn_mark->ignored_mask = tmask;
+		fsn_mark->ignored_mask &= ~mask;
 	}
 	*destroy = !(fsn_mark->mask | fsn_mark->ignored_mask);
 	spin_unlock(&fsn_mark->lock);
@@ -586,19 +578,10 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 
 	spin_lock(&fsn_mark->lock);
 	if (!(flags & FAN_MARK_IGNORED_MASK)) {
-		__u32 tmask = fsn_mark->mask | mask;
-
-		if (flags & FAN_MARK_ONDIR)
-			tmask |= FAN_ONDIR;
-
 		oldmask = fsn_mark->mask;
-		fsn_mark->mask = tmask;
+		fsn_mark->mask |= mask;
 	} else {
-		__u32 tmask = fsn_mark->ignored_mask | mask;
-		if (flags & FAN_MARK_ONDIR)
-			tmask |= FAN_ONDIR;
-
-		fsn_mark->ignored_mask = tmask;
+		fsn_mark->ignored_mask |= mask;
 		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
 			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
 	}
@@ -820,7 +803,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
-	u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD;
+	u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR;
 	unsigned int mark_type = flags & FAN_MARK_TYPE_MASK;
 	int ret;
 
@@ -857,11 +840,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		return -EINVAL;
 	}
 
-	if (mask & FAN_ONDIR) {
-		flags |= FAN_MARK_ONDIR;
-		mask &= ~FAN_ONDIR;
-	}
-
 	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
 		valid_mask |= FAN_ALL_PERM_EVENTS;
 
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index e70fccc3757e..a8c3fc54276d 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -4,9 +4,6 @@
 
 #include <uapi/linux/fanotify.h>
 
-/* not valid from userspace, only kernel internal */
-#define FAN_MARK_ONDIR		0x80000000
-
 #define FAN_GROUP_FLAG(group, flag) \
 	((group)->fanotify_data.flags & (flag))
 
-- 
cgit v1.2.3


From 23c9deeb3285d34fd243abb3d6b9f07db60c3cf4 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:35 +0300
Subject: fanotify: deprecate uapi FAN_ALL_* constants

We do not want to add new bits to the FAN_ALL_* uapi constants
because they have been exposed to userspace.  If there are programs
out there using these constants, those programs could break if
re-compiled with modified FAN_ALL_* constants and run on an old kernel.

We deprecate the uapi constants FAN_ALL_* and define new FANOTIFY_*
constants for internal use to replace them. New feature bits will be
added only to the new constants.

Cc: <linux-api@vger.kernel.org>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      |  6 ++---
 fs/notify/fanotify/fanotify.h      |  2 +-
 fs/notify/fanotify/fanotify_user.c | 22 +++++++++---------
 include/linux/fanotify.h           | 47 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/fanotify.h      | 18 +++++++--------
 5 files changed, 71 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 94b52157bf8d..03498eb995be 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -131,8 +131,8 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
 	    !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
 		return false;
 
-	if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
-				 ~marks_ignored_mask)
+	if (event_mask & FANOTIFY_OUTGOING_EVENTS &
+	    marks_mask & ~marks_ignored_mask)
 		return true;
 
 	return false;
@@ -236,7 +236,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	ret = fsnotify_add_event(group, fsn_event, fanotify_merge);
 	if (ret) {
 		/* Permission events shouldn't be merged */
-		BUG_ON(ret == 1 && mask & FAN_ALL_PERM_EVENTS);
+		BUG_ON(ret == 1 && mask & FANOTIFY_PERM_EVENTS);
 		/* Our event wasn't used in the end. Free it. */
 		fsnotify_destroy_event(group, fsn_event);
 
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 8609ba06f474..88a8290a61cb 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -44,7 +44,7 @@ FANOTIFY_PE(struct fsnotify_event *fse)
 static inline bool fanotify_is_perm_event(u32 mask)
 {
 	return IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS) &&
-		mask & FAN_ALL_PERM_EVENTS;
+		mask & FANOTIFY_PERM_EVENTS;
 }
 
 static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 34b511407035..530e5e486105 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -131,7 +131,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
 	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->reserved = 0;
-	metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
+	metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS;
 	metadata->pid = pid_vnr(event->tgid);
 	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
 		metadata->fd = FAN_NOFD;
@@ -395,7 +395,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 	 */
 	while (!fsnotify_notify_queue_is_empty(group)) {
 		fsn_event = fsnotify_remove_first_event(group);
-		if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS)) {
+		if (!(fsn_event->mask & FANOTIFY_PERM_EVENTS)) {
 			spin_unlock(&group->notification_lock);
 			fsnotify_destroy_event(group, fsn_event);
 			spin_lock(&group->notification_lock);
@@ -691,9 +691,9 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 		return -EPERM;
 
 #ifdef CONFIG_AUDITSYSCALL
-	if (flags & ~(FAN_ALL_INIT_FLAGS | FAN_ENABLE_AUDIT))
+	if (flags & ~(FANOTIFY_INIT_FLAGS | FAN_ENABLE_AUDIT))
 #else
-	if (flags & ~FAN_ALL_INIT_FLAGS)
+	if (flags & ~FANOTIFY_INIT_FLAGS)
 #endif
 		return -EINVAL;
 
@@ -745,7 +745,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	group->fanotify_data.f_flags = event_f_flags;
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
-	switch (flags & FAN_ALL_CLASS_BITS) {
+	switch (flags & FANOTIFY_CLASS_BITS) {
 	case FAN_CLASS_NOTIF:
 		group->priority = FS_PRIO_0;
 		break;
@@ -803,8 +803,8 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
-	u32 valid_mask = FAN_ALL_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR;
-	unsigned int mark_type = flags & FAN_MARK_TYPE_MASK;
+	u32 valid_mask = FANOTIFY_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR;
+	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
 	int ret;
 
 	pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
@@ -814,7 +814,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	if (mask & ((__u64)0xffffffff << 32))
 		return -EINVAL;
 
-	if (flags & ~FAN_ALL_MARK_FLAGS)
+	if (flags & ~FANOTIFY_MARK_FLAGS)
 		return -EINVAL;
 
 	switch (mark_type) {
@@ -833,7 +833,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 			return -EINVAL;
 		break;
 	case FAN_MARK_FLUSH:
-		if (flags & ~(FAN_MARK_TYPE_MASK | FAN_MARK_FLUSH))
+		if (flags & ~(FANOTIFY_MARK_TYPE_BITS | FAN_MARK_FLUSH))
 			return -EINVAL;
 		break;
 	default:
@@ -841,7 +841,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	}
 
 	if (IS_ENABLED(CONFIG_FANOTIFY_ACCESS_PERMISSIONS))
-		valid_mask |= FAN_ALL_PERM_EVENTS;
+		valid_mask |= FANOTIFY_PERM_EVENTS;
 
 	if (mask & ~valid_mask)
 		return -EINVAL;
@@ -861,7 +861,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	 * allowed to set permissions events.
 	 */
 	ret = -EINVAL;
-	if (mask & FAN_ALL_PERM_EVENTS &&
+	if (mask & FANOTIFY_PERM_EVENTS &&
 	    group->priority == FS_PRIO_0)
 		goto fput_and_out;
 
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index a8c3fc54276d..4519b0988afe 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -7,4 +7,51 @@
 #define FAN_GROUP_FLAG(group, flag) \
 	((group)->fanotify_data.flags & (flag))
 
+/*
+ * Flags allowed to be passed from/to userspace.
+ *
+ * We intentionally do not add new bits to the old FAN_ALL_* constants, because
+ * they are uapi exposed constants. If there are programs out there using
+ * these constant, the programs may break if re-compiled with new uapi headers
+ * and then run on an old kernel.
+ */
+#define FANOTIFY_CLASS_BITS	(FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
+				 FAN_CLASS_PRE_CONTENT)
+
+#define FANOTIFY_INIT_FLAGS	(FANOTIFY_CLASS_BITS | \
+				 FAN_CLOEXEC | FAN_NONBLOCK | \
+				 FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
+
+#define FANOTIFY_MARK_TYPE_BITS	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
+				 FAN_MARK_FILESYSTEM)
+
+#define FANOTIFY_MARK_FLAGS	(FANOTIFY_MARK_TYPE_BITS | \
+				 FAN_MARK_ADD | \
+				 FAN_MARK_REMOVE | \
+				 FAN_MARK_DONT_FOLLOW | \
+				 FAN_MARK_ONLYDIR | \
+				 FAN_MARK_IGNORED_MASK | \
+				 FAN_MARK_IGNORED_SURV_MODIFY | \
+				 FAN_MARK_FLUSH)
+
+/* Events that user can request to be notified on */
+#define FANOTIFY_EVENTS		(FAN_ACCESS | FAN_MODIFY | \
+				 FAN_CLOSE | FAN_OPEN)
+
+/* Events that require a permission response from user */
+#define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM)
+
+/* Events that may be reported to user */
+#define FANOTIFY_OUTGOING_EVENTS	(FANOTIFY_EVENTS | \
+					 FANOTIFY_PERM_EVENTS | \
+					 FAN_Q_OVERFLOW)
+
+/* Do not use these old uapi constants internally */
+#undef FAN_ALL_CLASS_BITS
+#undef FAN_ALL_INIT_FLAGS
+#undef FAN_ALL_MARK_FLAGS
+#undef FAN_ALL_EVENTS
+#undef FAN_ALL_PERM_EVENTS
+#undef FAN_ALL_OUTGOING_EVENTS
+
 #endif /* _LINUX_FANOTIFY_H */
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index ad81234d1919..d0c05de670ef 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -31,6 +31,8 @@
 #define FAN_CLASS_NOTIF		0x00000000
 #define FAN_CLASS_CONTENT	0x00000004
 #define FAN_CLASS_PRE_CONTENT	0x00000008
+
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_CLASS_BITS	(FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \
 				 FAN_CLASS_PRE_CONTENT)
 
@@ -38,6 +40,7 @@
 #define FAN_UNLIMITED_MARKS	0x00000020
 #define FAN_ENABLE_AUDIT	0x00000040
 
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
 				 FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
 				 FAN_UNLIMITED_MARKS)
@@ -57,23 +60,18 @@
 #define FAN_MARK_INODE		0x00000000
 #define FAN_MARK_MOUNT		0x00000010
 #define FAN_MARK_FILESYSTEM	0x00000100
-#define FAN_MARK_TYPE_MASK	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
-				 FAN_MARK_FILESYSTEM)
 
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_MARK_FLAGS	(FAN_MARK_ADD |\
 				 FAN_MARK_REMOVE |\
 				 FAN_MARK_DONT_FOLLOW |\
 				 FAN_MARK_ONLYDIR |\
+				 FAN_MARK_MOUNT |\
 				 FAN_MARK_IGNORED_MASK |\
 				 FAN_MARK_IGNORED_SURV_MODIFY |\
-				 FAN_MARK_FLUSH|\
-				 FAN_MARK_TYPE_MASK)
+				 FAN_MARK_FLUSH)
 
-/*
- * All of the events - we build the list by hand so that we can add flags in
- * the future and not break backward compatibility.  Apps will get only the
- * events that they originally wanted.  Be sure to add new events here!
- */
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_EVENTS (FAN_ACCESS |\
 			FAN_MODIFY |\
 			FAN_CLOSE |\
@@ -82,9 +80,11 @@
 /*
  * All events which require a permission response from userspace
  */
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
 			     FAN_ACCESS_PERM)
 
+/* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_OUTGOING_EVENTS	(FAN_ALL_EVENTS |\
 				 FAN_ALL_PERM_EVENTS |\
 				 FAN_Q_OVERFLOW)
-- 
cgit v1.2.3


From bdd5a46fe30653cb4d26c7c787a22159bf79eed9 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:37 +0300
Subject: fanotify: add BUILD_BUG_ON() to count the bits of fanotify constants

Also define the FANOTIFY_EVENT_FLAGS consisting of the extra flags
FAN_ONDIR and FAN_ON_CHILD.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      | 2 ++
 fs/notify/fanotify/fanotify_user.c | 5 ++++-
 include/linux/fanotify.h           | 6 ++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 03498eb995be..361e3a0a445c 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -205,6 +205,8 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
 	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
 
+	BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 10);
+
 	if (!fanotify_should_send_event(iter_info, mask, data, data_type))
 		return 0;
 
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 530e5e486105..14594e491d2b 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -803,7 +803,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
-	u32 valid_mask = FANOTIFY_EVENTS | FAN_EVENT_ON_CHILD | FAN_ONDIR;
+	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
 	int ret;
 
@@ -944,6 +944,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 6);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
+
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
 					 SLAB_PANIC|SLAB_ACCOUNT);
 	fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 4519b0988afe..caf55c67fc6c 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -41,11 +41,17 @@
 /* Events that require a permission response from user */
 #define FANOTIFY_PERM_EVENTS	(FAN_OPEN_PERM | FAN_ACCESS_PERM)
 
+/* Extra flags that may be reported with event or control handling of events */
+#define FANOTIFY_EVENT_FLAGS	(FAN_EVENT_ON_CHILD | FAN_ONDIR)
+
 /* Events that may be reported to user */
 #define FANOTIFY_OUTGOING_EVENTS	(FANOTIFY_EVENTS | \
 					 FANOTIFY_PERM_EVENTS | \
 					 FAN_Q_OVERFLOW)
 
+#define ALL_FANOTIFY_EVENT_BITS		(FANOTIFY_OUTGOING_EVENTS | \
+					 FANOTIFY_EVENT_FLAGS)
+
 /* Do not use these old uapi constants internally */
 #undef FAN_ALL_CLASS_BITS
 #undef FAN_ALL_INIT_FLAGS
-- 
cgit v1.2.3


From bbb4c4323a4d9cb5ca04db904aa3050a7586839a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 4 Oct 2018 14:27:55 +0100
Subject: dns: Allow the dns resolver to retrieve a server set

Allow the DNS resolver to retrieve a set of servers and their associated
addresses, ports, preference and weight ratings.

In terms of communication with userspace, "srv=1" is added to the callout
string (the '1' indicating the maximum data version supported by the
kernel) to ask the userspace side for this.

If the userspace side doesn't recognise it, it will ignore the option and
return the usual text address list.

If the userspace side does recognise it, it will return some binary data
that begins with a zero byte that would cause the string parsers to give an
error.  The second byte contains the version of the data in the blob (this
may be between 1 and the version specified in the callout data).  The
remainder of the payload is version-specific.

In version 1, the payload looks like (note that this is packed):

	u8	Non-string marker (ie. 0)
	u8	Content (0 => Server list)
	u8	Version (ie. 1)
	u8	Source (eg. DNS_RECORD_FROM_DNS_SRV)
	u8	Status (eg. DNS_LOOKUP_GOOD)
	u8	Number of servers
	foreach-server {
		u16	Name length (LE)
		u16	Priority (as per SRV record) (LE)
		u16	Weight (as per SRV record) (LE)
		u16	Port (LE)
		u8	Source (eg. DNS_RECORD_FROM_NSS)
		u8	Status (eg. DNS_LOOKUP_GOT_NOT_FOUND)
		u8	Protocol (eg. DNS_SERVER_PROTOCOL_UDP)
		u8	Number of addresses
		char[]	Name (not NUL-terminated)
		foreach-address {
			u8		Family (AF_INET{,6})
			union {
				u8[4]	ipv4_addr
				u8[16]	ipv6_addr
			}
		}
	}

This can then be used to fetch a whole cell's VL-server configuration for
AFS, for example.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dns_resolver.h      |   4 +-
 include/uapi/linux/dns_resolver.h | 116 ++++++++++++++++++++++++++++++++++++++
 net/dns_resolver/dns_key.c        |  67 +++++++++++++++++++++-
 net/dns_resolver/dns_query.c      |   5 +-
 4 files changed, 182 insertions(+), 10 deletions(-)
 create mode 100644 include/uapi/linux/dns_resolver.h

(limited to 'include/linux')

diff --git a/include/linux/dns_resolver.h b/include/linux/dns_resolver.h
index 6ac3cad9aef1..34a744a1bafc 100644
--- a/include/linux/dns_resolver.h
+++ b/include/linux/dns_resolver.h
@@ -24,11 +24,9 @@
 #ifndef _LINUX_DNS_RESOLVER_H
 #define _LINUX_DNS_RESOLVER_H
 
-#ifdef __KERNEL__
+#include <uapi/linux/dns_resolver.h>
 
 extern int dns_query(const char *type, const char *name, size_t namelen,
 		     const char *options, char **_result, time64_t *_expiry);
 
-#endif /* KERNEL */
-
 #endif /* _LINUX_DNS_RESOLVER_H */
diff --git a/include/uapi/linux/dns_resolver.h b/include/uapi/linux/dns_resolver.h
new file mode 100644
index 000000000000..129745f9c794
--- /dev/null
+++ b/include/uapi/linux/dns_resolver.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* DNS resolver interface definitions.
+ *
+ * Copyright (C) 2018 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_DNS_RESOLVER_H
+#define _UAPI_LINUX_DNS_RESOLVER_H
+
+#include <linux/types.h>
+
+/*
+ * Type of payload.
+ */
+enum dns_payload_content_type {
+	DNS_PAYLOAD_IS_SERVER_LIST	= 0, /* List of servers, requested by srv=1 */
+};
+
+/*
+ * Type of address that might be found in an address record.
+ */
+enum dns_payload_address_type {
+	DNS_ADDRESS_IS_IPV4		= 0, /* 4-byte AF_INET address */
+	DNS_ADDRESS_IS_IPV6		= 1, /* 16-byte AF_INET6 address */
+};
+
+/*
+ * Type of protocol used to access a server.
+ */
+enum dns_payload_protocol_type {
+	DNS_SERVER_PROTOCOL_UNSPECIFIED	= 0,
+	DNS_SERVER_PROTOCOL_UDP		= 1, /* Use UDP to talk to the server */
+	DNS_SERVER_PROTOCOL_TCP		= 2, /* Use TCP to talk to the server */
+};
+
+/*
+ * Source of record included in DNS resolver payload.
+ */
+enum dns_record_source {
+	DNS_RECORD_UNAVAILABLE		= 0, /* No source available (empty record) */
+	DNS_RECORD_FROM_CONFIG		= 1, /* From local configuration data */
+	DNS_RECORD_FROM_DNS_A		= 2, /* From DNS A or AAAA record */
+	DNS_RECORD_FROM_DNS_AFSDB	= 3, /* From DNS AFSDB record */
+	DNS_RECORD_FROM_DNS_SRV		= 4, /* From DNS SRV record */
+	DNS_RECORD_FROM_NSS		= 5, /* From NSS */
+	NR__dns_record_source
+};
+
+/*
+ * Status of record included in DNS resolver payload.
+ */
+enum dns_lookup_status {
+	DNS_LOOKUP_NOT_DONE		= 0, /* No lookup has been made */
+	DNS_LOOKUP_GOOD			= 1, /* Good records obtained */
+	DNS_LOOKUP_GOOD_WITH_BAD	= 2, /* Good records, some decoding errors */
+	DNS_LOOKUP_BAD			= 3, /* Couldn't decode results */
+	DNS_LOOKUP_GOT_NOT_FOUND	= 4, /* Got a "Not Found" result */
+	DNS_LOOKUP_GOT_LOCAL_FAILURE	= 5, /* Local failure during lookup */
+	DNS_LOOKUP_GOT_TEMP_FAILURE	= 6, /* Temporary failure during lookup */
+	DNS_LOOKUP_GOT_NS_FAILURE	= 7, /* Name server failure */
+	NR__dns_lookup_status
+};
+
+/*
+ * Header at the beginning of binary format payload.
+ */
+struct dns_payload_header {
+	__u8		zero;		/* Zero byte: marks this as not being text */
+	__u8		content;	/* enum dns_payload_content_type */
+	__u8		version;	/* Encoding version */
+} __packed;
+
+/*
+ * Header at the beginning of a V1 server list.  This is followed directly by
+ * the server records.  Each server records begins with a struct of type
+ * dns_server_list_v1_server.
+ */
+struct dns_server_list_v1_header {
+	struct dns_payload_header hdr;
+	__u8		source;		/* enum dns_record_source */
+	__u8		status;		/* enum dns_lookup_status */
+	__u8		nr_servers;	/* Number of server records following this */
+} __packed;
+
+/*
+ * Header at the beginning of each V1 server record.  This is followed by the
+ * characters of the name with no NUL-terminator, followed by the address
+ * records for that server.  Each address record begins with a struct of type
+ * struct dns_server_list_v1_address.
+ */
+struct dns_server_list_v1_server {
+	__u16		name_len;	/* Length of name (LE) */
+	__u16		priority;	/* Priority (as SRV record) (LE) */
+	__u16		weight;		/* Weight (as SRV record) (LE) */
+	__u16		port;		/* UDP/TCP port number (LE) */
+	__u8		source;		/* enum dns_record_source */
+	__u8		status;		/* enum dns_lookup_status */
+	__u8		protocol;	/* enum dns_payload_protocol_type */
+	__u8		nr_addrs;
+} __packed;
+
+/*
+ * Header at the beginning of each V1 address record.  This is followed by the
+ * bytes of the address, 4 for IPV4 and 16 for IPV6.
+ */
+struct dns_server_list_v1_address {
+	__u8		address_type;	/* enum dns_payload_address_type */
+} __packed;
+
+#endif /* _UAPI_LINUX_DNS_RESOLVER_H */
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 7f4534828f6c..a65d553e730d 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -29,6 +29,7 @@
 #include <linux/keyctl.h>
 #include <linux/err.h>
 #include <linux/seq_file.h>
+#include <linux/dns_resolver.h>
 #include <keys/dns_resolver-type.h>
 #include <keys/user-type.h>
 #include "internal.h"
@@ -48,27 +49,86 @@ const struct cred *dns_resolver_cache;
 /*
  * Preparse instantiation data for a dns_resolver key.
  *
- * The data must be a NUL-terminated string, with the NUL char accounted in
- * datalen.
+ * For normal hostname lookups, the data must be a NUL-terminated string, with
+ * the NUL char accounted in datalen.
  *
  * If the data contains a '#' characters, then we take the clause after each
  * one to be an option of the form 'key=value'.  The actual data of interest is
  * the string leading up to the first '#'.  For instance:
  *
  *        "ip1,ip2,...#foo=bar"
+ *
+ * For server list requests, the data must begin with a NUL char and be
+ * followed by a byte indicating the version of the data format.  Version 1
+ * looks something like (note this is packed):
+ *
+ *	u8      Non-string marker (ie. 0)
+ *	u8	Content (DNS_PAYLOAD_IS_*)
+ *	u8	Version (e.g. 1)
+ *	u8	Source of server list
+ *	u8	Lookup status of server list
+ *	u8	Number of servers
+ *	foreach-server {
+ *		__le16	Name length
+ *		__le16	Priority (as per SRV record, low first)
+ *		__le16	Weight (as per SRV record, higher first)
+ *		__le16	Port
+ *		u8	Source of address list
+ *		u8	Lookup status of address list
+ *		u8	Protocol (DNS_SERVER_PROTOCOL_*)
+ *		u8	Number of addresses
+ *		char[]	Name (not NUL-terminated)
+ *		foreach-address {
+ *			u8		Family (DNS_ADDRESS_IS_*)
+ *			union {
+ *				u8[4]	ipv4_addr
+ *				u8[16]	ipv6_addr
+ *			}
+ *		}
+ *	}
+ *
  */
 static int
 dns_resolver_preparse(struct key_preparsed_payload *prep)
 {
+	const struct dns_payload_header *bin;
 	struct user_key_payload *upayload;
 	unsigned long derrno;
 	int ret;
 	int datalen = prep->datalen, result_len = 0;
 	const char *data = prep->data, *end, *opt;
 
+	if (datalen <= 1 || !data)
+		return -EINVAL;
+
+	if (data[0] == 0) {
+		/* It may be a server list. */
+		if (datalen <= sizeof(*bin))
+			return -EINVAL;
+
+		bin = (const struct dns_payload_header *)data;
+		kenter("[%u,%u],%u", bin->content, bin->version, datalen);
+		if (bin->content != DNS_PAYLOAD_IS_SERVER_LIST) {
+			pr_warn_ratelimited(
+				"dns_resolver: Unsupported content type (%u)\n",
+				bin->content);
+			return -EINVAL;
+		}
+
+		if (bin->version != 1) {
+			pr_warn_ratelimited(
+				"dns_resolver: Unsupported server list version (%u)\n",
+				bin->version);
+			return -EINVAL;
+		}
+
+		result_len = datalen;
+		goto store_result;
+	}
+
 	kenter("'%*.*s',%u", datalen, datalen, data, datalen);
 
-	if (datalen <= 1 || !data || data[datalen - 1] != '\0')
+	if (!data || data[datalen - 1] != '\0')
 		return -EINVAL;
 	datalen--;
 
@@ -144,6 +204,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 		return 0;
 	}
 
+store_result:
 	kdebug("store result");
 	prep->quotalen = result_len;
 
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index 49da67034f29..76338c38738a 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -148,12 +148,9 @@ int dns_query(const char *type, const char *name, size_t namelen,
 
 	if (_result) {
 		ret = -ENOMEM;
-		*_result = kmalloc(len + 1, GFP_KERNEL);
+		*_result = kmemdup_nul(upayload->data, len, GFP_KERNEL);
 		if (!*_result)
 			goto put;
-
-		memcpy(*_result, upayload->data, len);
-		(*_result)[len] = '\0';
 	}
 
 	if (_expiry)
-- 
cgit v1.2.3


From 817e9bc8cc04e5c70592525b96812c46c1aa1c46 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 4 Oct 2018 09:57:23 -0700
Subject: Revert "serial:serial_core: Allow use of CTS for PPS line discipline"

This reverts commit c550f01c810f2197c98e6e3103f81797f5e063be.

Turns out the samsung tty driver is mucking around in the "unused" port
fields and this patch breaks that code :(

So we need to fix that driver up before this can be accepted.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Steve Sakoman <steve@sakoman.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-tty |  9 -----
 drivers/tty/serial/serial_core.c    | 70 +------------------------------------
 include/linux/serial_core.h         |  3 +-
 3 files changed, 2 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
index 441105a75d1f..9eb3c2b6b040 100644
--- a/Documentation/ABI/testing/sysfs-tty
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -154,12 +154,3 @@ Description:
 		 device specification. For example, when user sets 7bytes on
 		 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
 		 automatically changed to 4 bytes.
-
-What:		/sys/class/tty/ttyS0/pps_4wire
-Date:		September 2018
-Contact:	Steve Sakoman <steve@sakoman.com>
-Description:
-		 Shows/sets "4 wire" mode for the PPS input to the serial driver.
-		 For fully implemented serial ports PPS is normally provided
-		 on the DCD line. For partial "4 wire" implementations CTS is
-		 used instead of DCD.
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 0a4e6eeb5ff3..70402cdb4d8c 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2724,57 +2724,6 @@ static ssize_t uart_get_attr_iomem_reg_shift(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", tmp.iomem_reg_shift);
 }
 
-static ssize_t pps_4wire_show(struct device *dev,
-	struct device_attribute *attr, char *buf)
-{
-	struct tty_port *port = dev_get_drvdata(dev);
-	struct uart_state *state = container_of(port, struct uart_state, port);
-	struct uart_port *uport;
-	int mode = 0;
-
-	mutex_lock(&port->mutex);
-	uport = uart_port_check(state);
-	if (!uport)
-		goto out;
-
-	mode = uport->pps_4wire;
-
-out:
-	mutex_unlock(&port->mutex);
-	return sprintf(buf, mode ? "yes\n" : "no\n");
-}
-
-static ssize_t pps_4wire_store(struct device *dev,
-	struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct tty_port *port = dev_get_drvdata(dev);
-	struct uart_state *state = container_of(port, struct uart_state, port);
-	struct uart_port *uport;
-	bool mode;
-	int ret;
-
-	if (!count)
-		return -EINVAL;
-
-	ret = kstrtobool(buf, &mode);
-	if (ret < 0)
-		return ret;
-
-	mutex_lock(&port->mutex);
-	uport = uart_port_check(state);
-	if (!uport)
-		goto out;
-
-	spin_lock_irq(&uport->lock);
-	uport->pps_4wire = mode;
-	spin_unlock_irq(&uport->lock);
-
-out:
-	mutex_unlock(&port->mutex);
-	return count;
-}
-static DEVICE_ATTR_RW(pps_4wire);
-
 static DEVICE_ATTR(type, S_IRUSR | S_IRGRP, uart_get_attr_type, NULL);
 static DEVICE_ATTR(line, S_IRUSR | S_IRGRP, uart_get_attr_line, NULL);
 static DEVICE_ATTR(port, S_IRUSR | S_IRGRP, uart_get_attr_port, NULL);
@@ -2803,7 +2752,6 @@ static struct attribute *tty_dev_attrs[] = {
 	&dev_attr_io_type.attr,
 	&dev_attr_iomem_base.attr,
 	&dev_attr_iomem_reg_shift.attr,
-	&dev_attr_pps_4wire.attr,
 	NULL,
 	};
 
@@ -2860,9 +2808,6 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
 		goto out;
 	}
 
-	/* assert that pps handling is done via DCD as default */
-	uport->pps_4wire = 0;
-
 	/*
 	 * If this port is a console, then the spinlock is already
 	 * initialised.
@@ -3038,7 +2983,7 @@ void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 	lockdep_assert_held_once(&uport->lock);
 
-	if (tty && !uport->pps_4wire) {
+	if (tty) {
 		ld = tty_ldisc_ref(tty);
 		if (ld) {
 			if (ld->ops->dcd_change)
@@ -3067,21 +3012,8 @@ EXPORT_SYMBOL_GPL(uart_handle_dcd_change);
  */
 void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 {
-	struct tty_port *port = &uport->state->port;
-	struct tty_struct *tty = port->tty;
-	struct tty_ldisc *ld;
-
 	lockdep_assert_held_once(&uport->lock);
 
-	if (tty && uport->pps_4wire) {
-		ld = tty_ldisc_ref(tty);
-		if (ld) {
-			if (ld->ops->dcd_change)
-				ld->ops->dcd_change(tty, status);
-			tty_ldisc_deref(ld);
-		}
-	}
-
 	uport->icount.cts++;
 
 	if (uart_softcts_mode(uport)) {
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 4e2ba4894dcc..047fa67d039b 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -257,8 +257,7 @@ struct uart_port {
 	struct device		*dev;			/* parent device */
 	unsigned char		hub6;			/* this should be in the 8250 driver */
 	unsigned char		suspended;
-	unsigned char		pps_4wire;		/* CTS instead of DCD */
-	unsigned char		unused;
+	unsigned char		unused[2];
 	const char		*name;			/* port name */
 	struct attribute_group	*attr_group;		/* port specific attributes */
 	const struct attribute_group **tty_groups;	/* all attributes (serial core use only) */
-- 
cgit v1.2.3


From 5d5a0ab1a7918fce5ca5c0fb1871a3e2000f85de Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 1 Oct 2018 10:54:51 -0500
Subject: of: Fix property name in of_node_get_device_type

Commit 0413bedabc88 ("of: Add device_type access helper functions")
added a new helper not yet used in preparation for some treewide clean
up of accesses to 'device_type' properties. Unfortunately, there's an
error and 'type' was used for the property name. Fix this.

Fixes: 0413bedabc88 ("of: Add device_type access helper functions")
Cc: Frank Rowand <frowand.list@gmail.com>
Signed-off-by: Rob Herring <robh@kernel.org>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f581531d797e..04cef4490efc 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -991,7 +991,7 @@ static inline struct device_node *of_find_matching_node(
 
 static inline const char *of_node_get_device_type(const struct device_node *np)
 {
-	return of_get_property(np, "type", NULL);
+	return of_get_property(np, "device_type", NULL);
 }
 
 static inline bool of_node_is_type(const struct device_node *np, const char *type)
-- 
cgit v1.2.3


From 479adb89a97b0a33e5a9d702119872cc82ca21aa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 4 Oct 2018 13:28:08 -0700
Subject: cgroup: Fix dom_cgrp propagation when enabling threaded mode

A cgroup which is already a threaded domain may be converted into a
threaded cgroup if the prerequisite conditions are met.  When this
happens, all threaded descendant should also have their ->dom_cgrp
updated to the new threaded domain cgroup.  Unfortunately, this
propagation was missing leading to the following failure.

  # cd /sys/fs/cgroup/unified
  # cat cgroup.subtree_control    # show that no controllers are enabled

  # mkdir -p mycgrp/a/b/c
  # echo threaded > mycgrp/a/b/cgroup.type

  At this point, the hierarchy looks as follows:

      mycgrp [d]
	  a [dt]
	      b [t]
		  c [inv]

  Now let's make node "a" threaded (and thus "mycgrp" s made "domain threaded"):

  # echo threaded > mycgrp/a/cgroup.type

  By this point, we now have a hierarchy that looks as follows:

      mycgrp [dt]
	  a [t]
	      b [t]
		  c [inv]

  But, when we try to convert the node "c" from "domain invalid" to
  "threaded", we get ENOTSUP on the write():

  # echo threaded > mycgrp/a/b/c/cgroup.type
  sh: echo: write error: Operation not supported

This patch fixes the problem by

* Moving the opencoded ->dom_cgrp save and restoration in
  cgroup_enable_threaded() into cgroup_{save|restore}_control() so
  that mulitple cgroups can be handled.

* Updating all threaded descendants' ->dom_cgrp to point to the new
  dom_cgrp when enabling threaded mode.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-tested-by: "Michael Kerrisk (man-pages)" <mtk.manpages@gmail.com>
Reported-by: Amin Jamali <ajamali@pivotal.io>
Reported-by: Joao De Almeida Pereira <jpereira@pivotal.io>
Link: https://lore.kernel.org/r/CAKgNAkhHYCMn74TCNiMJ=ccLd7DcmXSbvw3CbZ1YREeG7iJM5g@mail.gmail.com
Fixes: 454000adaa2a ("cgroup: introduce cgroup->dom_cgrp and threaded css_set handling")
Cc: stable@vger.kernel.org # v4.14+
---
 include/linux/cgroup-defs.h |  1 +
 kernel/cgroup/cgroup.c      | 25 ++++++++++++++++---------
 2 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index ff20b677fb9f..22254c1fe1c5 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -412,6 +412,7 @@ struct cgroup {
 	 * specific task are charged to the dom_cgrp.
 	 */
 	struct cgroup *dom_cgrp;
+	struct cgroup *old_dom_cgrp;		/* used while enabling threaded */
 
 	/* per-cpu recursive resource statistics */
 	struct cgroup_rstat_cpu __percpu *rstat_cpu;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index aae10baf1902..4a3dae2a8283 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -2836,11 +2836,12 @@ restart:
 }
 
 /**
- * cgroup_save_control - save control masks of a subtree
+ * cgroup_save_control - save control masks and dom_cgrp of a subtree
  * @cgrp: root of the target subtree
  *
- * Save ->subtree_control and ->subtree_ss_mask to the respective old_
- * prefixed fields for @cgrp's subtree including @cgrp itself.
+ * Save ->subtree_control, ->subtree_ss_mask and ->dom_cgrp to the
+ * respective old_ prefixed fields for @cgrp's subtree including @cgrp
+ * itself.
  */
 static void cgroup_save_control(struct cgroup *cgrp)
 {
@@ -2850,6 +2851,7 @@ static void cgroup_save_control(struct cgroup *cgrp)
 	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
 		dsct->old_subtree_control = dsct->subtree_control;
 		dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
+		dsct->old_dom_cgrp = dsct->dom_cgrp;
 	}
 }
 
@@ -2875,11 +2877,12 @@ static void cgroup_propagate_control(struct cgroup *cgrp)
 }
 
 /**
- * cgroup_restore_control - restore control masks of a subtree
+ * cgroup_restore_control - restore control masks and dom_cgrp of a subtree
  * @cgrp: root of the target subtree
  *
- * Restore ->subtree_control and ->subtree_ss_mask from the respective old_
- * prefixed fields for @cgrp's subtree including @cgrp itself.
+ * Restore ->subtree_control, ->subtree_ss_mask and ->dom_cgrp from the
+ * respective old_ prefixed fields for @cgrp's subtree including @cgrp
+ * itself.
  */
 static void cgroup_restore_control(struct cgroup *cgrp)
 {
@@ -2889,6 +2892,7 @@ static void cgroup_restore_control(struct cgroup *cgrp)
 	cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
 		dsct->subtree_control = dsct->old_subtree_control;
 		dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
+		dsct->dom_cgrp = dsct->old_dom_cgrp;
 	}
 }
 
@@ -3196,6 +3200,8 @@ static int cgroup_enable_threaded(struct cgroup *cgrp)
 {
 	struct cgroup *parent = cgroup_parent(cgrp);
 	struct cgroup *dom_cgrp = parent->dom_cgrp;
+	struct cgroup *dsct;
+	struct cgroup_subsys_state *d_css;
 	int ret;
 
 	lockdep_assert_held(&cgroup_mutex);
@@ -3225,12 +3231,13 @@ static int cgroup_enable_threaded(struct cgroup *cgrp)
 	 */
 	cgroup_save_control(cgrp);
 
-	cgrp->dom_cgrp = dom_cgrp;
+	cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
+		if (dsct == cgrp || cgroup_is_threaded(dsct))
+			dsct->dom_cgrp = dom_cgrp;
+
 	ret = cgroup_apply_control(cgrp);
 	if (!ret)
 		parent->nr_threaded_children++;
-	else
-		cgrp->dom_cgrp = cgrp;
 
 	cgroup_finalize_control(cgrp, ret);
 	return ret;
-- 
cgit v1.2.3


From d67f34c19a679436dd2963b588015e119279e7a8 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 17 Sep 2018 14:45:34 +0200
Subject: clocksource: Provide clocksource_arch_init()

Architectures have extra archdata in the clocksource, e.g. for VDSO
support. There are no sanity checks or general initializations for this
available. Add support for that.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Andy Lutomirski <luto@kernel.org>
Acked-by: John Stultz <john.stultz@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Matt Rickard <matt@softrans.com.au>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Florian Weimer <fweimer@redhat.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: devel@linuxdriverproject.org
Cc: virtualization@lists.linux-foundation.org
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Juergen Gross <jgross@suse.com>
Link: https://lkml.kernel.org/r/20180917130706.973042587@linutronix.de
---
 include/linux/clocksource.h | 5 +++++
 kernel/time/Kconfig         | 4 ++++
 kernel/time/clocksource.c   | 2 ++
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 308918928767..6e6b86f9046d 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -241,6 +241,11 @@ static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz
 	__clocksource_update_freq_scale(cs, 1000, khz);
 }
 
+#ifdef CONFIG_ARCH_CLOCKSOURCE_INIT
+extern void clocksource_arch_init(struct clocksource *cs);
+#else
+static inline void clocksource_arch_init(struct clocksource *cs) { }
+#endif
 
 extern int timekeeping_notify(struct clocksource *clock);
 
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 78eabc41eaa6..58b981f4bb5d 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -12,6 +12,10 @@ config CLOCKSOURCE_WATCHDOG
 config ARCH_CLOCKSOURCE_DATA
 	bool
 
+# Architecture has extra clocksource init called from registration
+config ARCH_CLOCKSOURCE_INIT
+	bool
+
 # Clocksources require validation of the clocksource against the last
 # cycle update - x86/TSC misfeature
 config CLOCKSOURCE_VALIDATE_LAST_CYCLE
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 0e6e97a01942..ffe081623aec 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -937,6 +937,8 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 	unsigned long flags;
 
+	clocksource_arch_init(cs);
+
 	/* Initialize mult/shift and max_idle_ns */
 	__clocksource_update_freq_scale(cs, scale, freq);
 
-- 
cgit v1.2.3


From fae29f135ef4a86228d3ce13b61babf0d3031118 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 25 Sep 2018 12:35:18 -0400
Subject: hwrng: core - document the quality field

quality field is currently documented as being 'per mill'.  In fact the
math involved is:

                add_hwgenerator_randomness((void *)rng_fillbuf, rc,
                                           rc * current_quality * 8 >> 10);

thus the actual definition is "bits of entropy per 1024 bits of input".

The current documentation seems to have confused multiple people
in the past, let's fix the documentation to match code.

An alternative is to change core to match driver expectations, replacing
	rc * current_quality * 8 >> 10
with
	rc * current_quality / 1000
but that has performance costs, so probably isn't a good option.

Fixes: 0f734e6e768 ("hwrng: add per-device entropy derating")
Reported-by: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/core.c | 4 ++--
 include/linux/hw_random.h     | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index aaf9e5afaad4..95be7228f327 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -44,10 +44,10 @@ static unsigned short default_quality; /* = 0; default to "off" */
 
 module_param(current_quality, ushort, 0644);
 MODULE_PARM_DESC(current_quality,
-		 "current hwrng entropy estimation per mill");
+		 "current hwrng entropy estimation per 1024 bits of input");
 module_param(default_quality, ushort, 0644);
 MODULE_PARM_DESC(default_quality,
-		 "default entropy content of hwrng per mill");
+		 "default entropy content of hwrng per 1024 bits of input");
 
 static void drop_current_rng(void);
 static int hwrng_init(struct hwrng *rng);
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index bee0827766a3..c0b93e0ff0c0 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -33,7 +33,8 @@
  *			and max is a multiple of 4 and >= 32 bytes.
  * @priv:		Private data, for use by the RNG driver.
  * @quality:		Estimation of true entropy in RNG's bitstream
- *			(per mill).
+ *			(in bits of entropy per 1024 bits of input;
+ *			valid values: 1 to 1024, or 0 for unknown).
  */
 struct hwrng {
 	const char *name;
-- 
cgit v1.2.3


From 9d2f67e43b73e8af7438be219b66a5de0cfa8bd9 Mon Sep 17 00:00:00 2001
From: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
Date: Sat, 29 Sep 2018 15:41:27 +0000
Subject: net/packet: fix packet drop as of virtio gso

When we use raw socket as the vhost backend, a packet from virito with
gso offloading information, cannot be sent out in later validaton at
xmit path, as we did not set correct skb->protocol which is further used
for looking up the gso function.

To fix this, we set this field according to virito hdr information.

Fixes: e858fae2b0b8f4 ("virtio_net: use common code for virtio_net_hdr and skb GSO conversion")
Signed-off-by: Jianfeng Tan <jianfeng.tan@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 18 ++++++++++++++++++
 net/packet/af_packet.c     | 11 +++++++----
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 9397628a1967..cb462f9ab7dd 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -5,6 +5,24 @@
 #include <linux/if_vlan.h>
 #include <uapi/linux/virtio_net.h>
 
+static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
+					   const struct virtio_net_hdr *hdr)
+{
+	switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+	case VIRTIO_NET_HDR_GSO_TCPV4:
+	case VIRTIO_NET_HDR_GSO_UDP:
+		skb->protocol = cpu_to_be16(ETH_P_IP);
+		break;
+	case VIRTIO_NET_HDR_GSO_TCPV6:
+		skb->protocol = cpu_to_be16(ETH_P_IPV6);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 					const struct virtio_net_hdr *hdr,
 					bool little_endian)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 75c92a87e7b2..d6e94dc7e290 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2715,10 +2715,12 @@ tpacket_error:
 			}
 		}
 
-		if (po->has_vnet_hdr && virtio_net_hdr_to_skb(skb, vnet_hdr,
-							      vio_le())) {
-			tp_len = -EINVAL;
-			goto tpacket_error;
+		if (po->has_vnet_hdr) {
+			if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
+				tp_len = -EINVAL;
+				goto tpacket_error;
+			}
+			virtio_net_hdr_set_proto(skb, vnet_hdr);
 		}
 
 		skb->destructor = tpacket_destruct_skb;
@@ -2915,6 +2917,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		if (err)
 			goto out_free;
 		len += sizeof(vnet_hdr);
+		virtio_net_hdr_set_proto(skb, &vnet_hdr);
 	}
 
 	skb_probe_transport_header(skb, reserve);
-- 
cgit v1.2.3


From 661b8d1b0e3a745e25f05adef2ebd00d830eeea7 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 1 Oct 2018 14:51:33 +0200
Subject: net: add umem reference in netdev{_rx}_queue

These references to the umem will be used to store information
on what kind of AF_XDP umem that is bound to a queue id, if any.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/netdevice.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1cbbf77a685f..8318f79586c2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -609,6 +609,9 @@ struct netdev_queue {
 
 	/* Subordinate device that the queue has been assigned to */
 	struct net_device	*sb_dev;
+#ifdef CONFIG_XDP_SOCKETS
+	struct xdp_umem         *umem;
+#endif
 /*
  * write-mostly part
  */
@@ -738,6 +741,9 @@ struct netdev_rx_queue {
 	struct kobject			kobj;
 	struct net_device		*dev;
 	struct xdp_rxq_info		xdp_rxq;
+#ifdef CONFIG_XDP_SOCKETS
+	struct xdp_umem                 *umem;
+#endif
 } ____cacheline_aligned_in_smp;
 
 /*
-- 
cgit v1.2.3


From c2a90025ad09d830c8d8ae69f485eac6aaaa2472 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@bootlin.com>
Date: Thu, 4 Oct 2018 14:22:03 +0200
Subject: phy: add QSGMII and PCIE modes

Prepare for upcoming phys that'll handle QSGMII or PCIe.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Quentin Schulz <quentin.schulz@bootlin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy/phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 9713aebdd348..03b319f89a34 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -37,9 +37,11 @@ enum phy_mode {
 	PHY_MODE_USB_OTG,
 	PHY_MODE_SGMII,
 	PHY_MODE_2500SGMII,
+	PHY_MODE_QSGMII,
 	PHY_MODE_10GKR,
 	PHY_MODE_UFS_HS_A,
 	PHY_MODE_UFS_HS_B,
+	PHY_MODE_PCIE,
 };
 
 /**
-- 
cgit v1.2.3


From 017b1660df89f5fb4bfe66c34e35f7d2031100c7 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Fri, 5 Oct 2018 15:51:29 -0700
Subject: mm: migration: fix migration of huge PMD shared pages

The page migration code employs try_to_unmap() to try and unmap the source
page.  This is accomplished by using rmap_walk to find all vmas where the
page is mapped.  This search stops when page mapcount is zero.  For shared
PMD huge pages, the page map count is always 1 no matter the number of
mappings.  Shared mappings are tracked via the reference count of the PMD
page.  Therefore, try_to_unmap stops prematurely and does not completely
unmap all mappings of the source page.

This problem can result is data corruption as writes to the original
source page can happen after contents of the page are copied to the target
page.  Hence, data is lost.

This problem was originally seen as DB corruption of shared global areas
after a huge page was soft offlined due to ECC memory errors.  DB
developers noticed they could reproduce the issue by (hotplug) offlining
memory used to back huge pages.  A simple testcase can reproduce the
problem by creating a shared PMD mapping (note that this must be at least
PUD_SIZE in size and PUD_SIZE aligned (1GB on x86)), and using
migrate_pages() to migrate process pages between nodes while continually
writing to the huge pages being migrated.

To fix, have the try_to_unmap_one routine check for huge PMD sharing by
calling huge_pmd_unshare for hugetlbfs huge pages.  If it is a shared
mapping it will be 'unshared' which removes the page table entry and drops
the reference on the PMD page.  After this, flush caches and TLB.

mmu notifiers are called before locking page tables, but we can not be
sure of PMD sharing until page tables are locked.  Therefore, check for
the possibility of PMD sharing before locking so that notifiers can
prepare for the worst possible case.

Link: http://lkml.kernel.org/r/20180823205917.16297-2-mike.kravetz@oracle.com
[mike.kravetz@oracle.com: make _range_in_vma() a static inline]
  Link: http://lkml.kernel.org/r/6063f215-a5c8-2f0c-465a-2c515ddc952d@oracle.com
Fixes: 39dde65c9940 ("shared page table for hugetlb page")
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/hugetlb.h | 14 ++++++++++++++
 include/linux/mm.h      |  6 ++++++
 mm/hugetlb.c            | 37 +++++++++++++++++++++++++++++++++++--
 mm/rmap.c               | 42 +++++++++++++++++++++++++++++++++++++++---
 4 files changed, 94 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b68e345f0ca..087fd5f48c91 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 pte_t *huge_pte_offset(struct mm_struct *mm,
 		       unsigned long addr, unsigned long sz);
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end);
 struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
 			      int write);
 struct page *follow_huge_pd(struct vm_area_struct *vma,
@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
 	return 0;
 }
 
+static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
+					pte_t *ptep)
+{
+	return 0;
+}
+
+static inline void adjust_range_if_pmd_sharing_possible(
+				struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end)
+{
+}
+
 #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n)	({ BUG(); 0; })
 #define follow_huge_addr(mm, addr, write)	ERR_PTR(-EINVAL)
 #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a61ebe8ad4ca..0416a7204be3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
 	return vma;
 }
 
+static inline bool range_in_vma(struct vm_area_struct *vma,
+				unsigned long start, unsigned long end)
+{
+	return (vma && vma->vm_start <= start && end <= vma->vm_end);
+}
+
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
 void vma_set_page_prot(struct vm_area_struct *vma);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3c21775f196b..b903d746e132 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4545,12 +4545,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
 	/*
 	 * check on proper vm_flags and page table alignment
 	 */
-	if (vma->vm_flags & VM_MAYSHARE &&
-	    vma->vm_start <= base && end <= vma->vm_end)
+	if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
 		return true;
 	return false;
 }
 
+/*
+ * Determine if start,end range within vma could be mapped by shared pmd.
+ * If yes, adjust start and end to cover range associated with possible
+ * shared pmd mappings.
+ */
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end)
+{
+	unsigned long check_addr = *start;
+
+	if (!(vma->vm_flags & VM_MAYSHARE))
+		return;
+
+	for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
+		unsigned long a_start = check_addr & PUD_MASK;
+		unsigned long a_end = a_start + PUD_SIZE;
+
+		/*
+		 * If sharing is possible, adjust start/end if necessary.
+		 */
+		if (range_in_vma(vma, a_start, a_end)) {
+			if (a_start < *start)
+				*start = a_start;
+			if (a_end > *end)
+				*end = a_end;
+		}
+	}
+}
+
 /*
  * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
  * and returns the corresponding pte. While this is not necessary for the
@@ -4648,6 +4676,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 {
 	return 0;
 }
+
+void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
+				unsigned long *start, unsigned long *end)
+{
+}
 #define want_pmd_share()	(0)
 #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
 
diff --git a/mm/rmap.c b/mm/rmap.c
index eb477809a5c0..1e79fac3186b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 	}
 
 	/*
-	 * We have to assume the worse case ie pmd for invalidation. Note that
-	 * the page can not be free in this function as call of try_to_unmap()
-	 * must hold a reference on the page.
+	 * For THP, we have to assume the worse case ie pmd for invalidation.
+	 * For hugetlb, it could be much worse if we need to do pud
+	 * invalidation in the case of pmd sharing.
+	 *
+	 * Note that the page can not be free in this function as call of
+	 * try_to_unmap() must hold a reference on the page.
 	 */
 	end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
+	if (PageHuge(page)) {
+		/*
+		 * If sharing is possible, start and end will be adjusted
+		 * accordingly.
+		 */
+		adjust_range_if_pmd_sharing_possible(vma, &start, &end);
+	}
 	mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
 
 	while (page_vma_mapped_walk(&pvmw)) {
@@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
 		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
 		address = pvmw.address;
 
+		if (PageHuge(page)) {
+			if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
+				/*
+				 * huge_pmd_unshare unmapped an entire PMD
+				 * page.  There is no way of knowing exactly
+				 * which PMDs may be cached for this mm, so
+				 * we must flush them all.  start/end were
+				 * already adjusted above to cover this range.
+				 */
+				flush_cache_range(vma, start, end);
+				flush_tlb_range(vma, start, end);
+				mmu_notifier_invalidate_range(mm, start, end);
+
+				/*
+				 * The ref count of the PMD page was dropped
+				 * which is part of the way map counting
+				 * is done for shared PMDs.  Return 'true'
+				 * here.  When there is no other sharing,
+				 * huge_pmd_unshare returns false and we will
+				 * unmap the actual page and drop map count
+				 * to zero.
+				 */
+				page_vma_mapped_walk_done(&pvmw);
+				break;
+			}
+		}
 
 		if (IS_ENABLED(CONFIG_MIGRATION) &&
 		    (flags & TTU_MIGRATION) &&
-- 
cgit v1.2.3


From 1bb89893d4fa8275333b3ed74fb0379d63025f9a Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 14 Sep 2018 19:37:23 -0500
Subject: remoteproc: Add missing kernel-doc comment for auto-boot

The commit ddf711872c9d ("remoteproc: Introduce auto-boot flag")
introduced the auto-boot flag but missed adding the corresponding
kernel-doc comment. Add the same.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 include/linux/remoteproc.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index e3c5d856b6da..75f9ca05b865 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -439,6 +439,7 @@ struct rproc_dump_segment {
  * @cached_table: copy of the resource table
  * @table_sz: size of @cached_table
  * @has_iommu: flag to indicate if remote processor is behind an MMU
+ * @auto_boot: flag to indicate if remote processor should be auto-started
  * @dump_segments: list of segments in the firmware
  */
 struct rproc {
-- 
cgit v1.2.3


From 992b649a3f013465d8128da02e5449def662a4c3 Mon Sep 17 00:00:00 2001
From: Lianbo Jiang <lijiang@redhat.com>
Date: Sun, 30 Sep 2018 16:37:41 +0800
Subject: kdump, proc/vmcore: Enable kdumping encrypted memory with SME enabled

In the kdump kernel, the memory of the first kernel needs to be dumped
into the vmcore file.

If SME is enabled in the first kernel, the old memory has to be remapped
with the memory encryption mask in order to access it properly.

Split copy_oldmem_page() functionality to handle encrypted memory
properly.

 [ bp: Heavily massage everything. ]

Signed-off-by: Lianbo Jiang <lijiang@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: kexec@lists.infradead.org
Cc: tglx@linutronix.de
Cc: mingo@redhat.com
Cc: hpa@zytor.com
Cc: akpm@linux-foundation.org
Cc: dan.j.williams@intel.com
Cc: bhelgaas@google.com
Cc: baiyaowei@cmss.chinamobile.com
Cc: tiwai@suse.de
Cc: brijesh.singh@amd.com
Cc: dyoung@redhat.com
Cc: bhe@redhat.com
Cc: jroedel@suse.de
Link: https://lkml.kernel.org/r/be7b47f9-6be6-e0d1-2c2a-9125bc74b818@redhat.com
---
 arch/x86/kernel/crash_dump_64.c | 60 ++++++++++++++++++++++++++++-------------
 fs/proc/vmcore.c                | 24 ++++++++++++-----
 include/linux/crash_dump.h      |  4 +++
 3 files changed, 63 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 4f2e0778feac..eb8ab3915268 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -11,40 +11,62 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
-/**
- * copy_oldmem_page - copy one page from "oldmem"
- * @pfn: page frame number to be copied
- * @buf: target memory address for the copy; this can be in kernel address
- *	space or user address space (see @userbuf)
- * @csize: number of bytes to copy
- * @offset: offset in bytes into the page (based on pfn) to begin the copy
- * @userbuf: if set, @buf is in user address space, use copy_to_user(),
- *	otherwise @buf is in kernel address space, use memcpy().
- *
- * Copy a page from "oldmem". For this page, there is no pte mapped
- * in the current kernel. We stitch up a pte, similar to kmap_atomic.
- */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-		size_t csize, unsigned long offset, int userbuf)
+static ssize_t __copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+				  unsigned long offset, int userbuf,
+				  bool encrypted)
 {
 	void  *vaddr;
 
 	if (!csize)
 		return 0;
 
-	vaddr = ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
+	if (encrypted)
+		vaddr = (__force void *)ioremap_encrypted(pfn << PAGE_SHIFT, PAGE_SIZE);
+	else
+		vaddr = (__force void *)ioremap_cache(pfn << PAGE_SHIFT, PAGE_SIZE);
+
 	if (!vaddr)
 		return -ENOMEM;
 
 	if (userbuf) {
-		if (copy_to_user(buf, vaddr + offset, csize)) {
-			iounmap(vaddr);
+		if (copy_to_user((void __user *)buf, vaddr + offset, csize)) {
+			iounmap((void __iomem *)vaddr);
 			return -EFAULT;
 		}
 	} else
 		memcpy(buf, vaddr + offset, csize);
 
 	set_iounmap_nonlazy();
-	iounmap(vaddr);
+	iounmap((void __iomem *)vaddr);
 	return csize;
 }
+
+/**
+ * copy_oldmem_page - copy one page of memory
+ * @pfn: page frame number to be copied
+ * @buf: target memory address for the copy; this can be in kernel address
+ *	space or user address space (see @userbuf)
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page (based on pfn) to begin the copy
+ * @userbuf: if set, @buf is in user address space, use copy_to_user(),
+ *	otherwise @buf is in kernel address space, use memcpy().
+ *
+ * Copy a page from the old kernel's memory. For this page, there is no pte
+ * mapped in the current kernel. We stitch up a pte, similar to kmap_atomic.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+			 unsigned long offset, int userbuf)
+{
+	return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, false);
+}
+
+/**
+ * copy_oldmem_page_encrypted - same as copy_oldmem_page() above but ioremap the
+ * memory with the encryption mask set to accomodate kdump on SME-enabled
+ * machines.
+ */
+ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize,
+				   unsigned long offset, int userbuf)
+{
+	return __copy_oldmem_page(pfn, buf, csize, offset, userbuf, true);
+}
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index cbde728f8ac6..42c32d06f7da 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -24,6 +24,8 @@
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
 #include <linux/uaccess.h>
+#include <linux/mem_encrypt.h>
+#include <asm/pgtable.h>
 #include <asm/io.h>
 #include "internal.h"
 
@@ -98,7 +100,8 @@ static int pfn_is_ram(unsigned long pfn)
 
 /* Reads a page from the oldmem device from given offset. */
 static ssize_t read_from_oldmem(char *buf, size_t count,
-				u64 *ppos, int userbuf)
+				u64 *ppos, int userbuf,
+				bool encrypted)
 {
 	unsigned long pfn, offset;
 	size_t nr_bytes;
@@ -120,8 +123,15 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
 		if (pfn_is_ram(pfn) == 0)
 			memset(buf, 0, nr_bytes);
 		else {
-			tmp = copy_oldmem_page(pfn, buf, nr_bytes,
-						offset, userbuf);
+			if (encrypted)
+				tmp = copy_oldmem_page_encrypted(pfn, buf,
+								 nr_bytes,
+								 offset,
+								 userbuf);
+			else
+				tmp = copy_oldmem_page(pfn, buf, nr_bytes,
+						       offset, userbuf);
+
 			if (tmp < 0)
 				return tmp;
 		}
@@ -155,7 +165,7 @@ void __weak elfcorehdr_free(unsigned long long addr)
  */
 ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
 {
-	return read_from_oldmem(buf, count, ppos, 0);
+	return read_from_oldmem(buf, count, ppos, 0, false);
 }
 
 /*
@@ -163,7 +173,7 @@ ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
  */
 ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
 {
-	return read_from_oldmem(buf, count, ppos, 0);
+	return read_from_oldmem(buf, count, ppos, 0, sme_active());
 }
 
 /*
@@ -173,6 +183,7 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
 				  unsigned long from, unsigned long pfn,
 				  unsigned long size, pgprot_t prot)
 {
+	prot = pgprot_encrypted(prot);
 	return remap_pfn_range(vma, from, pfn, size, prot);
 }
 
@@ -351,7 +362,8 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
 					    m->offset + m->size - *fpos,
 					    buflen);
 			start = m->paddr + *fpos - m->offset;
-			tmp = read_from_oldmem(buffer, tsz, &start, userbuf);
+			tmp = read_from_oldmem(buffer, tsz, &start,
+					       userbuf, sme_active());
 			if (tmp < 0)
 				return tmp;
 			buflen -= tsz;
diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 3e4ba9d753c8..f774c5eb9e3c 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -26,6 +26,10 @@ extern int remap_oldmem_pfn_range(struct vm_area_struct *vma,
 
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
+extern ssize_t copy_oldmem_page_encrypted(unsigned long pfn, char *buf,
+					  size_t csize, unsigned long offset,
+					  int userbuf);
+
 void vmcore_cleanup(void);
 
 /* Architecture code defines this if there are other possible ELF
-- 
cgit v1.2.3


From 99c65fa7c59ff558e70db8aa61bbdece5d3a9588 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 8 Oct 2018 00:20:07 -0700
Subject: dma-debug: Check for drivers mapping invalid addresses in
 dma_map_single()

I recently debugged a DMA mapping oops where a driver was trying to map
a buffer returned from request_firmware() with dma_map_single(). Memory
returned from request_firmware() is mapped into the vmalloc region and
this isn't a valid region to map with dma_map_single() per the DMA
documentation's "What memory is DMA'able?" section.

Unfortunately, we don't really check that in the DMA debugging code, so
enabling DMA debugging doesn't help catch this problem. Let's add a new
DMA debug function to check for a vmalloc address or an invalid virtual
address and print a warning if this happens. This makes it a little
easier to debug these sorts of problems, instead of seeing odd behavior
or crashes when drivers attempt to map the vmalloc space for DMA.

Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/dma-debug.h   |  8 ++++++++
 include/linux/dma-mapping.h |  1 +
 kernel/dma/debug.c          | 16 ++++++++++++++++
 3 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dma-debug.h b/include/linux/dma-debug.h
index a785f2507159..30213adbb6b9 100644
--- a/include/linux/dma-debug.h
+++ b/include/linux/dma-debug.h
@@ -32,6 +32,9 @@ extern void dma_debug_add_bus(struct bus_type *bus);
 
 extern int dma_debug_resize_entries(u32 num_entries);
 
+extern void debug_dma_map_single(struct device *dev, const void *addr,
+				 unsigned long len);
+
 extern void debug_dma_map_page(struct device *dev, struct page *page,
 			       size_t offset, size_t size,
 			       int direction, dma_addr_t dma_addr,
@@ -103,6 +106,11 @@ static inline int dma_debug_resize_entries(u32 num_entries)
 	return 0;
 }
 
+static inline void debug_dma_map_single(struct device *dev, const void *addr,
+					unsigned long len)
+{
+}
+
 static inline void debug_dma_map_page(struct device *dev, struct page *page,
 				      size_t offset, size_t size,
 				      int direction, dma_addr_t dma_addr,
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 562af6b45f23..547a48bcfa3d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -229,6 +229,7 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
 	dma_addr_t addr;
 
 	BUG_ON(!valid_dma_direction(dir));
+	debug_dma_map_single(dev, ptr, size);
 	addr = ops->map_page(dev, virt_to_page(ptr),
 			     offset_in_page(ptr), size,
 			     dir, attrs);
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index c007d25bee09..231ca4628062 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -1312,6 +1312,22 @@ static void check_sg_segment(struct device *dev, struct scatterlist *sg)
 #endif
 }
 
+void debug_dma_map_single(struct device *dev, const void *addr,
+			    unsigned long len)
+{
+	if (unlikely(dma_debug_disabled()))
+		return;
+
+	if (!virt_addr_valid(addr))
+		err_printk(dev, NULL, "DMA-API: device driver maps memory from invalid area [addr=%p] [len=%lu]\n",
+			   addr, len);
+
+	if (is_vmalloc_addr(addr))
+		err_printk(dev, NULL, "DMA-API: device driver maps memory from vmalloc area [addr=%p] [len=%lu]\n",
+			   addr, len);
+}
+EXPORT_SYMBOL(debug_dma_map_single);
+
 void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
 			size_t size, int direction, dma_addr_t dma_addr,
 			bool map_single)
-- 
cgit v1.2.3


From c941ce9c282cc606e6517356fcc186a9da2b4ab9 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Sun, 7 Oct 2018 12:56:47 +0100
Subject: bpf: add verifier callback to get stack usage info for offloaded
 progs

In preparation for BPF-to-BPF calls in offloaded programs, add a new
function attribute to the struct bpf_prog_offload_ops so that drivers
supporting eBPF offload can hook at the end of program verification, and
potentially extract information collected by the verifier.

Implement a minimal callback (returning 0) in the drivers providing the
structs, namely netdevsim and nfp.

This will be useful in the nfp driver, in later commits, to extract the
number of subprograms as well as the stack depth for those subprograms.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 drivers/net/ethernet/netronome/nfp/bpf/verifier.c |  8 +++++++-
 drivers/net/netdevsim/bpf.c                       |  8 +++++++-
 include/linux/bpf.h                               |  1 +
 include/linux/bpf_verifier.h                      |  1 +
 kernel/bpf/offload.c                              | 18 ++++++++++++++++++
 kernel/bpf/verifier.c                             |  3 +++
 6 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index a6e9248669e1..e470489021e3 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -640,6 +640,12 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
 	return 0;
 }
 
+static int nfp_bpf_finalize(struct bpf_verifier_env *env)
+{
+	return 0;
+}
+
 const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops = {
-	.insn_hook = nfp_verify_insn,
+	.insn_hook	= nfp_verify_insn,
+	.finalize	= nfp_bpf_finalize,
 };
diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c
index 81444208b216..cb3518474f0e 100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@ -86,8 +86,14 @@ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
 	return 0;
 }
 
+static int nsim_bpf_finalize(struct bpf_verifier_env *env)
+{
+	return 0;
+}
+
 static const struct bpf_prog_offload_ops nsim_bpf_analyzer_ops = {
-	.insn_hook = nsim_bpf_verify_insn,
+	.insn_hook	= nsim_bpf_verify_insn,
+	.finalize	= nsim_bpf_finalize,
 };
 
 static bool nsim_xdp_offload_active(struct netdevsim *ns)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 027697b6a22f..9b558713447f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -263,6 +263,7 @@ struct bpf_verifier_ops {
 struct bpf_prog_offload_ops {
 	int (*insn_hook)(struct bpf_verifier_env *env,
 			 int insn_idx, int prev_insn_idx);
+	int (*finalize)(struct bpf_verifier_env *env);
 };
 
 struct bpf_prog_offload {
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7b6fd2ab3263..9e8056ec20fa 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -245,5 +245,6 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
 int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env);
 int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
 				 int insn_idx, int prev_insn_idx);
+int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
 
 #endif /* _LINUX_BPF_VERIFIER_H */
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 177a52436394..8e93c47f0779 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -172,6 +172,24 @@ int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
 	return ret;
 }
 
+int bpf_prog_offload_finalize(struct bpf_verifier_env *env)
+{
+	struct bpf_prog_offload *offload;
+	int ret = -ENODEV;
+
+	down_read(&bpf_devs_lock);
+	offload = env->prog->aux->offload;
+	if (offload) {
+		if (offload->dev_ops->finalize)
+			ret = offload->dev_ops->finalize(env);
+		else
+			ret = 0;
+	}
+	up_read(&bpf_devs_lock);
+
+	return ret;
+}
+
 static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
 {
 	struct bpf_prog_offload *offload = prog->aux->offload;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 73c81bef6ae8..a0454cb299ba 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6309,6 +6309,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 		env->cur_state = NULL;
 	}
 
+	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
+		ret = bpf_prog_offload_finalize(env);
+
 skip_full_check:
 	while (!pop_stack(env, NULL, NULL));
 	free_states(env);
-- 
cgit v1.2.3


From cbf6d82cad4a507475d92d380228302fe668dd94 Mon Sep 17 00:00:00 2001
From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Date: Thu, 30 Aug 2018 01:32:05 +0200
Subject: mmc: core: add helper to see if a host is doing a retune
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a helper to allow host drivers checking if a retune is in progress.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/host.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index beed7121c781..2a5fe75dd082 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -569,6 +569,11 @@ static inline bool mmc_can_retune(struct mmc_host *host)
 	return host->can_retune == 1;
 }
 
+static inline bool mmc_doing_retune(struct mmc_host *host)
+{
+	return host->doing_retune == 1;
+}
+
 static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data)
 {
 	return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-- 
cgit v1.2.3


From 9ef986a697c6d733d86e76d9870b4f1f60512b7a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 20 Sep 2018 16:01:10 -0700
Subject: mmc: mmci: Drop support for pdata GPIO numbers

All the machines using the MMCI are passing GPIOs for the
card detect and write protect using the device tree or
descriptor table (one single case, Integrator/AP IM-PD1).

Drop support for passing global GPIO numbers through
platform data, noone is using it.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 arch/arm/mach-integrator/integrator_cp.c |  2 --
 arch/arm/mach-versatile/versatile_dt.c   |  4 ----
 drivers/mmc/host/mmci.c                  | 34 ++++++--------------------------
 include/linux/amba/mmci.h                | 11 ++---------
 4 files changed, 8 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c
index 772a7cf2010e..976ded5c5916 100644
--- a/arch/arm/mach-integrator/integrator_cp.c
+++ b/arch/arm/mach-integrator/integrator_cp.c
@@ -80,8 +80,6 @@ static unsigned int mmc_status(struct device *dev)
 static struct mmci_platform_data mmc_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
-	.gpio_wp	= -1,
-	.gpio_cd	= -1,
 };
 
 static u64 notrace intcp_read_sched_clock(void)
diff --git a/arch/arm/mach-versatile/versatile_dt.c b/arch/arm/mach-versatile/versatile_dt.c
index 3c8d39c12909..e9d60687e416 100644
--- a/arch/arm/mach-versatile/versatile_dt.c
+++ b/arch/arm/mach-versatile/versatile_dt.c
@@ -89,15 +89,11 @@ unsigned int mmc_status(struct device *dev)
 static struct mmci_platform_data mmc0_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
-	.gpio_wp	= -1,
-	.gpio_cd	= -1,
 };
 
 static struct mmci_platform_data mmc1_plat_data = {
 	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
 	.status		= mmc_status,
-	.gpio_wp	= -1,
-	.gpio_cd	= -1,
 };
 
 /*
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 1841d250e9e2..4b843712dc01 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -28,8 +28,7 @@
 #include <linux/amba/bus.h>
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
+#include <linux/of.h>
 #include <linux/regulator/consumer.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
@@ -1675,13 +1674,6 @@ static int mmci_probe(struct amba_device *dev,
 	else if (plat->ocr_mask)
 		dev_warn(mmc_dev(mmc), "Platform OCR mask is ignored\n");
 
-	/* DT takes precedence over platform data. */
-	if (!np) {
-		if (!plat->cd_invert)
-			mmc->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
-		mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
-	}
-
 	/* We support these capabilities. */
 	mmc->caps |= MMC_CAP_CMD23;
 
@@ -1749,30 +1741,16 @@ static int mmci_probe(struct amba_device *dev,
 	 * - not using DT but using a descriptor table, or
 	 * - using a table of descriptors ALONGSIDE DT, or
 	 * look up these descriptors named "cd" and "wp" right here, fail
-	 * silently of these do not exist and proceed to try platform data
+	 * silently of these do not exist
 	 */
 	if (!np) {
 		ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL);
-		if (ret < 0) {
-			if (ret == -EPROBE_DEFER)
-				goto clk_disable;
-			else if (gpio_is_valid(plat->gpio_cd)) {
-				ret = mmc_gpio_request_cd(mmc, plat->gpio_cd, 0);
-				if (ret)
-					goto clk_disable;
-			}
-		}
+		if (ret == -EPROBE_DEFER)
+			goto clk_disable;
 
 		ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL);
-		if (ret < 0) {
-			if (ret == -EPROBE_DEFER)
-				goto clk_disable;
-			else if (gpio_is_valid(plat->gpio_wp)) {
-				ret = mmc_gpio_request_ro(mmc, plat->gpio_wp);
-				if (ret)
-					goto clk_disable;
-			}
-		}
+		if (ret == -EPROBE_DEFER)
+			goto clk_disable;
 	}
 
 	ret = devm_request_irq(&dev->dev, dev->irq[0], mmci_irq, IRQF_SHARED,
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index da8357ba11bc..c92ebc39fc1f 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -18,20 +18,13 @@
  * mask into a value to be binary (or set some other custom bits
  * in MMCIPWR) or:ed and written into the MMCIPWR register of the
  * block.  May also control external power based on the power_mode.
- * @status: if no GPIO read function was given to the block in
- * gpio_wp (below) this function will be called to determine
- * whether a card is present in the MMC slot or not
- * @gpio_wp: read this GPIO pin to see if the card is write protected
- * @gpio_cd: read this GPIO pin to detect card insertion
- * @cd_invert: true if the gpio_cd pin value is active low
+ * @status: if no GPIO line was given to the block in this function will
+ * be called to determine whether a card is present in the MMC slot or not
  */
 struct mmci_platform_data {
 	unsigned int ocr_mask;
 	int (*ios_handler)(struct device *, struct mmc_ios *);
 	unsigned int (*status)(struct device *);
-	int	gpio_wp;
-	int	gpio_cd;
-	bool	cd_invert;
 };
 
 #endif
-- 
cgit v1.2.3


From 43b7358df63adff21aa81b349d9080954bf1c372 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 23 Sep 2018 09:03:21 +0200
Subject: mmc: sdhci: pxav3: Delete GPIO handling

The platform data for the PXAv3 driver allows passing a card
detect GPIO, but this code is not used in the kernel.

In order to not encourage the use of the old global GPIO
numberspace we need to remove this.

Card detect (and write protect) GPIO can easily be added into
the driver using machine descriptor tables instead, and the
descriptor-based (gpiod) variants of the slot GPIO APIs.

Cc: Jisheng Zhang <jszhang@marvell.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Jisheng Zhang <Jisheng.Zhang@synaptics.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pxav3.c          | 14 --------------
 include/linux/platform_data/pxa_sdhci.h |  4 ----
 2 files changed, 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index b8e96f392428..1783e29eae04 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -21,17 +21,14 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/io.h>
-#include <linux/gpio.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
-#include <linux/mmc/slot-gpio.h>
 #include <linux/platform_data/pxa_sdhci.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
 #include <linux/mbus.h>
@@ -452,16 +449,6 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
 			host->mmc->caps2 |= pdata->host_caps2;
 		if (pdata->pm_caps)
 			host->mmc->pm_caps |= pdata->pm_caps;
-
-		if (gpio_is_valid(pdata->ext_cd_gpio)) {
-			ret = mmc_gpio_request_cd(host->mmc, pdata->ext_cd_gpio,
-						  0);
-			if (ret) {
-				dev_err(mmc_dev(host->mmc),
-					"failed to allocate card detect gpio\n");
-				goto err_cd_req;
-			}
-		}
 	}
 
 	pm_runtime_get_noresume(&pdev->dev);
@@ -486,7 +473,6 @@ err_add_host:
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
 err_of_parse:
-err_cd_req:
 err_mbus_win:
 	clk_disable_unprepare(pxa->clk_io);
 	clk_disable_unprepare(pxa->clk_core);
diff --git a/include/linux/platform_data/pxa_sdhci.h b/include/linux/platform_data/pxa_sdhci.h
index 9e20c2fb4ffd..4977c06d8a86 100644
--- a/include/linux/platform_data/pxa_sdhci.h
+++ b/include/linux/platform_data/pxa_sdhci.h
@@ -33,8 +33,6 @@
  *	1: choose feedback clk + delay value
  *	2: choose internal clk
  * @clk_delay_enable: enable clk_delay or not, used on pxa910
- * @ext_cd_gpio: gpio pin used for external CD line
- * @ext_cd_gpio_invert: invert values for external CD gpio line
  * @max_speed: the maximum speed supported
  * @host_caps: Standard MMC host capabilities bit field.
  * @quirks: quirks of platfrom
@@ -46,8 +44,6 @@ struct sdhci_pxa_platdata {
 	unsigned int	clk_delay_cycles;
 	unsigned int	clk_delay_sel;
 	bool		clk_delay_enable;
-	unsigned int	ext_cd_gpio;
-	bool		ext_cd_gpio_invert;
 	unsigned int	max_speed;
 	u32		host_caps;
 	u32		host_caps2;
-- 
cgit v1.2.3


From 7838a8ddc80b2aa82d364d39042ca422f7748885 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 24 Sep 2018 13:30:50 +0200
Subject: mmc: omap_hsmmc: Kill off cover detection

Cover detection appears to be a feature protecting the SD
card on mobile phones with a slide-cover, such as some Nokia
phones. The idea seems to be to not allow access to the
SD card when the cover is open.

It is only usable with platform data from board files, but
no board file in the kernel is using it, yet it takes up
a sizeable chunk of code in the OMAP HSMMC driver.

Since we do not add new board files for the OMAPs any target
that need this should anyway reimplement it properly using
the device tree, so delete this legacy code.

The driver is marked as orphan in MAINTAINERS by the way.

Cc: Tony Lindgren <tony@atomide.com>
Cc: linux-omap@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c            | 121 +------------------------------
 include/linux/platform_data/hsmmc-omap.h |   1 -
 2 files changed, 2 insertions(+), 120 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 68760d4a5d3d..92aeb42708f3 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -198,7 +198,6 @@ struct omap_hsmmc_host {
 	struct dma_chan		*rx_chan;
 	int			response_busy;
 	int			context_loss;
-	int			protect_card;
 	int			reqs_blocked;
 	int			req_in_progress;
 	unsigned long		clk_rate;
@@ -207,15 +206,6 @@ struct omap_hsmmc_host {
 #define HSMMC_SDIO_IRQ_ENABLED	(1 << 1)        /* SDIO irq enabled */
 	struct omap_hsmmc_next	next_data;
 	struct	omap_hsmmc_platform_data	*pdata;
-
-	/* return MMC cover switch state, can be NULL if not supported.
-	 *
-	 * possible return values:
-	 *   0 - closed
-	 *   1 - open
-	 */
-	int (*get_cover_state)(struct device *dev);
-
 	int (*card_detect)(struct device *dev);
 };
 
@@ -233,13 +223,6 @@ static int omap_hsmmc_card_detect(struct device *dev)
 	return mmc_gpio_get_cd(host->mmc);
 }
 
-static int omap_hsmmc_get_cover_state(struct device *dev)
-{
-	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-
-	return mmc_gpio_get_cd(host->mmc);
-}
-
 static int omap_hsmmc_enable_supply(struct mmc_host *mmc)
 {
 	int ret;
@@ -484,22 +467,13 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	return 0;
 }
 
-static irqreturn_t omap_hsmmc_cover_irq(int irq, void *dev_id);
-
 static int omap_hsmmc_gpio_init(struct mmc_host *mmc,
 				struct omap_hsmmc_host *host,
 				struct omap_hsmmc_platform_data *pdata)
 {
 	int ret;
 
-	if (gpio_is_valid(pdata->gpio_cod)) {
-		ret = mmc_gpio_request_cd(mmc, pdata->gpio_cod, 0);
-		if (ret)
-			return ret;
-
-		host->get_cover_state = omap_hsmmc_get_cover_state;
-		mmc_gpio_set_cd_isr(mmc, omap_hsmmc_cover_irq);
-	} else if (gpio_is_valid(pdata->gpio_cd)) {
+	if (gpio_is_valid(pdata->gpio_cd)) {
 		ret = mmc_gpio_request_cd(mmc, pdata->gpio_cd, 0);
 		if (ret)
 			return ret;
@@ -781,9 +755,6 @@ static void send_init_stream(struct omap_hsmmc_host *host)
 	int reg = 0;
 	unsigned long timeout;
 
-	if (host->protect_card)
-		return;
-
 	disable_irq(host->irq);
 
 	OMAP_HSMMC_WRITE(host->base, IE, INT_EN_MASK);
@@ -804,29 +775,6 @@ static void send_init_stream(struct omap_hsmmc_host *host)
 	enable_irq(host->irq);
 }
 
-static inline
-int omap_hsmmc_cover_is_closed(struct omap_hsmmc_host *host)
-{
-	int r = 1;
-
-	if (host->get_cover_state)
-		r = host->get_cover_state(host->dev);
-	return r;
-}
-
-static ssize_t
-omap_hsmmc_show_cover_switch(struct device *dev, struct device_attribute *attr,
-			   char *buf)
-{
-	struct mmc_host *mmc = container_of(dev, struct mmc_host, class_dev);
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-	return sprintf(buf, "%s\n",
-			omap_hsmmc_cover_is_closed(host) ? "closed" : "open");
-}
-
-static DEVICE_ATTR(cover_switch, S_IRUGO, omap_hsmmc_show_cover_switch, NULL);
-
 static ssize_t
 omap_hsmmc_show_slot_name(struct device *dev, struct device_attribute *attr,
 			char *buf)
@@ -1247,44 +1195,6 @@ err:
 	return ret;
 }
 
-/* Protect the card while the cover is open */
-static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
-{
-	if (!host->get_cover_state)
-		return;
-
-	host->reqs_blocked = 0;
-	if (host->get_cover_state(host->dev)) {
-		if (host->protect_card) {
-			dev_info(host->dev, "%s: cover is closed, "
-					 "card is now accessible\n",
-					 mmc_hostname(host->mmc));
-			host->protect_card = 0;
-		}
-	} else {
-		if (!host->protect_card) {
-			dev_info(host->dev, "%s: cover is open, "
-					 "card is now inaccessible\n",
-					 mmc_hostname(host->mmc));
-			host->protect_card = 1;
-		}
-	}
-}
-
-/*
- * irq handler when (cell-phone) cover is mounted/removed
- */
-static irqreturn_t omap_hsmmc_cover_irq(int irq, void *dev_id)
-{
-	struct omap_hsmmc_host *host = dev_id;
-
-	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
-
-	omap_hsmmc_protect_card(host);
-	mmc_detect_change(host->mmc, (HZ * 200) / 1000);
-	return IRQ_HANDLED;
-}
-
 static void omap_hsmmc_dma_callback(void *param)
 {
 	struct omap_hsmmc_host *host = param;
@@ -1555,24 +1465,7 @@ static void omap_hsmmc_request(struct mmc_host *mmc, struct mmc_request *req)
 
 	BUG_ON(host->req_in_progress);
 	BUG_ON(host->dma_ch != -1);
-	if (host->protect_card) {
-		if (host->reqs_blocked < 3) {
-			/*
-			 * Ensure the controller is left in a consistent
-			 * state by resetting the command and data state
-			 * machines.
-			 */
-			omap_hsmmc_reset_controller_fsm(host, SRD);
-			omap_hsmmc_reset_controller_fsm(host, SRC);
-			host->reqs_blocked += 1;
-		}
-		req->cmd->error = -EBADF;
-		if (req->data)
-			req->data->error = -EBADF;
-		req->cmd->retries = 0;
-		mmc_request_done(mmc, req);
-		return;
-	} else if (host->reqs_blocked)
+	if (host->reqs_blocked)
 		host->reqs_blocked = 0;
 	WARN_ON(host->mrq != NULL);
 	host->mrq = req;
@@ -1921,7 +1814,6 @@ static struct omap_hsmmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
 		pdata->controller_flags |= OMAP_HSMMC_SUPPORTS_DUAL_VOLT;
 
 	pdata->gpio_cd = -EINVAL;
-	pdata->gpio_cod = -EINVAL;
 	pdata->gpio_wp = -EINVAL;
 
 	if (of_find_property(np, "ti,non-removable", NULL)) {
@@ -2125,8 +2017,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	if (!ret)
 		mmc->caps |= MMC_CAP_SDIO_IRQ;
 
-	omap_hsmmc_protect_card(host);
-
 	mmc_add_host(mmc);
 
 	if (mmc_pdata(host)->name != NULL) {
@@ -2134,12 +2024,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		if (ret < 0)
 			goto err_slot_name;
 	}
-	if (host->get_cover_state) {
-		ret = device_create_file(&mmc->class_dev,
-					 &dev_attr_cover_switch);
-		if (ret < 0)
-			goto err_slot_name;
-	}
 
 	omap_hsmmc_debugfs(mmc);
 	pm_runtime_mark_last_busy(host->dev);
@@ -2231,7 +2115,6 @@ static int omap_hsmmc_resume(struct device *dev)
 	if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER))
 		omap_hsmmc_conf_bus_power(host);
 
-	omap_hsmmc_protect_card(host);
 	pm_runtime_mark_last_busy(host->dev);
 	pm_runtime_put_autosuspend(host->dev);
 	return 0;
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 73d9098ada2d..c055d7eda085 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -71,7 +71,6 @@ struct omap_hsmmc_platform_data {
 	char *version;
 
 	int gpio_cd;			/* gpio (card detect) */
-	int gpio_cod;			/* gpio (cover detect) */
 	int gpio_wp;			/* gpio (write protect) */
 	/* if we have special card, init it using this callback */
 	void (*init_card)(struct mmc_card *card);
-- 
cgit v1.2.3


From e63201f19438372fcb45977d8e14c6ab5807d55b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 24 Sep 2018 13:30:51 +0200
Subject: mmc: omap_hsmmc: Delete platform data GPIO CD and WP

The OMAP HSMMC driver has some elaborate and hairy handling for
passing GPIO card detect and write protect lines from a boardfile
into the driver: the machine defines a struct omap2_hsmmc_info
that is copied into struct omap_hsmmc_platform_data by
omap_hsmmc_pdata_init() in arch/arm/mach-omap2/hsmmc.c.

However the .gpio_cd and .gpio_wp fields are not copied from
omap2_hsmmc_info to omap_hsmmc_platform_data by
omap_hsmmc_pdata_init() so they remain unused. The only platform
defining omap2_hsmmc_info also define both to -1, unused.

It turn out there are no boardfiles passing any valid GPIO
lines into the OMAP HSMMC driver at all. And since we are not
going to add any more OMAP2 boardfiles, we can delete this
card detect and write protect handling altogether.

This seems to also fix a bug: the card detect callback
mmc_gpio_get_cd() in the slot GPIO core needs to be called
by drivers utilizing slot GPIO. It appears the the boardfile
quirks were not doing this right, so this would only get
called for boardfiles, i.e. since no boardfile was using it,
never.

Just assign mmc_gpio_get_cd() unconditionally to omap_hsmmc_ops
.get_cd() so card detects from the device tree works.
AFAICT card detect with GPIO lines assigned from
mmc_of_parse() are not working at the moment, but that is
no regression since it probably never worked.

Cc: Tony Lindgren <tony@atomide.com>
Cc: linux-omap@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 arch/arm/mach-omap2/hsmmc.h              |  2 --
 arch/arm/mach-omap2/pdata-quirks.c       |  2 --
 drivers/mmc/host/omap_hsmmc.c            | 52 +-------------------------------
 include/linux/platform_data/hsmmc-omap.h |  2 --
 4 files changed, 1 insertion(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h
index af9af5094ec3..bf99aec5a155 100644
--- a/arch/arm/mach-omap2/hsmmc.h
+++ b/arch/arm/mach-omap2/hsmmc.h
@@ -12,8 +12,6 @@ struct omap2_hsmmc_info {
 	u8	mmc;		/* controller 1/2/3 */
 	u32	caps;		/* 4/8 wires and any additional host
 				 * capabilities OR'd (ref. linux/mmc/host.h) */
-	int	gpio_cd;	/* or -EINVAL */
-	int	gpio_wp;	/* or -EINVAL */
 	struct platform_device *pdev;	/* mmc controller instance */
 	/* init some special card */
 	void (*init_card)(struct mmc_card *card);
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 7f02743edbe4..fe7c1fdb51d8 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -363,8 +363,6 @@ static struct omap2_hsmmc_info pandora_mmc3[] = {
 	{
 		.mmc		= 3,
 		.caps		= MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD,
-		.gpio_cd	= -EINVAL,
-		.gpio_wp	= -EINVAL,
 		.init_card	= pandora_wl1251_init_card,
 	},
 	{}	/* Terminator */
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 92aeb42708f3..467d889a1638 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -30,7 +30,6 @@
 #include <linux/clk.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
-#include <linux/of_gpio.h>
 #include <linux/of_device.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/core.h>
@@ -38,7 +37,6 @@
 #include <linux/mmc/slot-gpio.h>
 #include <linux/io.h>
 #include <linux/irq.h>
-#include <linux/gpio.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
@@ -206,7 +204,6 @@ struct omap_hsmmc_host {
 #define HSMMC_SDIO_IRQ_ENABLED	(1 << 1)        /* SDIO irq enabled */
 	struct omap_hsmmc_next	next_data;
 	struct	omap_hsmmc_platform_data	*pdata;
-	int (*card_detect)(struct device *dev);
 };
 
 struct omap_mmc_of_data {
@@ -216,13 +213,6 @@ struct omap_mmc_of_data {
 
 static void omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host);
 
-static int omap_hsmmc_card_detect(struct device *dev)
-{
-	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-
-	return mmc_gpio_get_cd(host->mmc);
-}
-
 static int omap_hsmmc_enable_supply(struct mmc_host *mmc)
 {
 	int ret;
@@ -467,29 +457,6 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	return 0;
 }
 
-static int omap_hsmmc_gpio_init(struct mmc_host *mmc,
-				struct omap_hsmmc_host *host,
-				struct omap_hsmmc_platform_data *pdata)
-{
-	int ret;
-
-	if (gpio_is_valid(pdata->gpio_cd)) {
-		ret = mmc_gpio_request_cd(mmc, pdata->gpio_cd, 0);
-		if (ret)
-			return ret;
-
-		host->card_detect = omap_hsmmc_card_detect;
-	}
-
-	if (gpio_is_valid(pdata->gpio_wp)) {
-		ret = mmc_gpio_request_ro(mmc, pdata->gpio_wp);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
 /*
  * Start clock to the card
  */
@@ -1539,15 +1506,6 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	omap_hsmmc_set_bus_mode(host);
 }
 
-static int omap_hsmmc_get_cd(struct mmc_host *mmc)
-{
-	struct omap_hsmmc_host *host = mmc_priv(mmc);
-
-	if (!host->card_detect)
-		return -ENOSYS;
-	return host->card_detect(host->dev);
-}
-
 static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
@@ -1686,7 +1644,7 @@ static struct mmc_host_ops omap_hsmmc_ops = {
 	.pre_req = omap_hsmmc_pre_req,
 	.request = omap_hsmmc_request,
 	.set_ios = omap_hsmmc_set_ios,
-	.get_cd = omap_hsmmc_get_cd,
+	.get_cd = mmc_gpio_get_cd,
 	.get_ro = mmc_gpio_get_ro,
 	.init_card = omap_hsmmc_init_card,
 	.enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
@@ -1813,9 +1771,6 @@ static struct omap_hsmmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
 	if (of_find_property(np, "ti,dual-volt", NULL))
 		pdata->controller_flags |= OMAP_HSMMC_SUPPORTS_DUAL_VOLT;
 
-	pdata->gpio_cd = -EINVAL;
-	pdata->gpio_wp = -EINVAL;
-
 	if (of_find_property(np, "ti,non-removable", NULL)) {
 		pdata->nonremovable = true;
 		pdata->no_regulator_off_init = true;
@@ -1900,10 +1855,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	host->pbias_enabled = 0;
 	host->vqmmc_enabled = 0;
 
-	ret = omap_hsmmc_gpio_init(mmc, host, pdata);
-	if (ret)
-		goto err_gpio;
-
 	platform_set_drvdata(pdev, host);
 
 	if (pdev->dev.of_node)
@@ -2045,7 +1996,6 @@ err_irq:
 	if (host->dbclk)
 		clk_disable_unprepare(host->dbclk);
 err1:
-err_gpio:
 	mmc_free_host(mmc);
 err:
 	return ret;
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index c055d7eda085..85da11916bd5 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -70,8 +70,6 @@ struct omap_hsmmc_platform_data {
 	/* string specifying a particular variant of hardware */
 	char *version;
 
-	int gpio_cd;			/* gpio (card detect) */
-	int gpio_wp;			/* gpio (write protect) */
 	/* if we have special card, init it using this callback */
 	void (*init_card)(struct mmc_card *card);
 
-- 
cgit v1.2.3


From d0a6a87e40da49cfc7954c491d3065a25a641b29 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 4 Oct 2018 00:25:38 +0300
Subject: fanotify: support reporting thread id instead of process id

In order to identify which thread triggered the event in a
multi-threaded program, add the FAN_REPORT_TID flag in fanotify_init to
opt-in for reporting the event creator's thread id information.

Signed-off-by: nixiaoming <nixiaoming@huawei.com>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fanotify/fanotify.c      | 9 ++++++---
 fs/notify/fanotify/fanotify.h      | 2 +-
 fs/notify/fanotify/fanotify_user.c | 4 ++--
 include/linux/fanotify.h           | 1 +
 include/uapi/linux/fanotify.h      | 3 +++
 5 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 361e3a0a445c..5769cf3ff035 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -25,7 +25,7 @@ static bool should_merge(struct fsnotify_event *old_fsn,
 	old = FANOTIFY_E(old_fsn);
 	new = FANOTIFY_E(new_fsn);
 
-	if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
+	if (old_fsn->inode == new_fsn->inode && old->pid == new->pid &&
 	    old->path.mnt == new->path.mnt &&
 	    old->path.dentry == new->path.dentry)
 		return true;
@@ -171,7 +171,10 @@ struct fanotify_event_info *fanotify_alloc_event(struct fsnotify_group *group,
 		goto out;
 init: __maybe_unused
 	fsnotify_init_event(&event->fse, inode, mask);
-	event->tgid = get_pid(task_tgid(current));
+	if (FAN_GROUP_FLAG(group, FAN_REPORT_TID))
+		event->pid = get_pid(task_pid(current));
+	else
+		event->pid = get_pid(task_tgid(current));
 	if (path) {
 		event->path = *path;
 		path_get(&event->path);
@@ -270,7 +273,7 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
 
 	event = FANOTIFY_E(fsn_event);
 	path_put(&event->path);
-	put_pid(event->tgid);
+	put_pid(event->pid);
 	if (fanotify_is_perm_event(fsn_event->mask)) {
 		kmem_cache_free(fanotify_perm_event_cachep,
 				FANOTIFY_PE(fsn_event));
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 88a8290a61cb..ea05b8a401e7 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -19,7 +19,7 @@ struct fanotify_event_info {
 	 * during this object's lifetime
 	 */
 	struct path path;
-	struct pid *tgid;
+	struct pid *pid;
 };
 
 /*
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 14594e491d2b..e03be5071362 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -132,7 +132,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->reserved = 0;
 	metadata->mask = fsn_event->mask & FANOTIFY_OUTGOING_EVENTS;
-	metadata->pid = pid_vnr(event->tgid);
+	metadata->pid = pid_vnr(event->pid);
 	if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
 		metadata->fd = FAN_NOFD;
 	else {
@@ -944,7 +944,7 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
  */
 static int __init fanotify_user_setup(void)
 {
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 6);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 7);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
 
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index caf55c67fc6c..a5a60691e48b 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -19,6 +19,7 @@
 				 FAN_CLASS_PRE_CONTENT)
 
 #define FANOTIFY_INIT_FLAGS	(FANOTIFY_CLASS_BITS | \
+				 FAN_REPORT_TID | \
 				 FAN_CLOEXEC | FAN_NONBLOCK | \
 				 FAN_UNLIMITED_QUEUE | FAN_UNLIMITED_MARKS)
 
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index d0c05de670ef..b86740d1c50a 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -40,6 +40,9 @@
 #define FAN_UNLIMITED_MARKS	0x00000020
 #define FAN_ENABLE_AUDIT	0x00000040
 
+/* Flags to determine fanotify event format */
+#define FAN_REPORT_TID		0x00000100	/* event->pid is thread id */
+
 /* Deprecated - do not use this in programs and do not add new flags here! */
 #define FAN_ALL_INIT_FLAGS	(FAN_CLOEXEC | FAN_NONBLOCK | \
 				 FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\
-- 
cgit v1.2.3


From 0792a2c8e0bbda3605b8d42c6b9635be7b19982a Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Tue, 11 Sep 2018 20:18:44 -0400
Subject: macintosh: Use common code to access RTC

Now that the 68k Mac port has adopted the via-pmu driver, the same RTC
code can be shared between m68k and powerpc. Replace duplicated code in
arch/powerpc and arch/m68k with common RTC accessors for Cuda and PMU.

Drop the problematic WARN_ON which was introduced in commit 22db552b50fa
("powerpc/powermac: Fix rtc read/write functions").

Tested-by: Stan Johnson <userm57@yahoo.com>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/m68k/mac/misc.c                   |  75 +++-----------------
 arch/powerpc/platforms/powermac/time.c | 126 ++++++---------------------------
 drivers/macintosh/via-cuda.c           |  35 +++++++++
 drivers/macintosh/via-pmu.c            |  33 +++++++++
 include/linux/cuda.h                   |   4 ++
 include/linux/pmu.h                    |   4 ++
 6 files changed, 106 insertions(+), 171 deletions(-)

(limited to 'include/linux')

diff --git a/arch/m68k/mac/misc.c b/arch/m68k/mac/misc.c
index 1b083c500b9a..ebb3b6d169ea 100644
--- a/arch/m68k/mac/misc.c
+++ b/arch/m68k/mac/misc.c
@@ -37,35 +37,6 @@
 static void (*rom_reset)(void);
 
 #ifdef CONFIG_ADB_CUDA
-static time64_t cuda_read_time(void)
-{
-	struct adb_request req;
-	time64_t time;
-
-	if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
-		return 0;
-	while (!req.complete)
-		cuda_poll();
-
-	time = (u32)((req.reply[3] << 24) | (req.reply[4] << 16) |
-		     (req.reply[5] << 8) | req.reply[6]);
-
-	return time - RTC_OFFSET;
-}
-
-static void cuda_write_time(time64_t time)
-{
-	struct adb_request req;
-	u32 data = lower_32_bits(time + RTC_OFFSET);
-
-	if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
-			 (data >> 24) & 0xFF, (data >> 16) & 0xFF,
-			 (data >> 8) & 0xFF, data & 0xFF) < 0)
-		return;
-	while (!req.complete)
-		cuda_poll();
-}
-
 static __u8 cuda_read_pram(int offset)
 {
 	struct adb_request req;
@@ -91,33 +62,6 @@ static void cuda_write_pram(int offset, __u8 data)
 #endif /* CONFIG_ADB_CUDA */
 
 #ifdef CONFIG_ADB_PMU
-static time64_t pmu_read_time(void)
-{
-	struct adb_request req;
-	time64_t time;
-
-	if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
-		return 0;
-	pmu_wait_complete(&req);
-
-	time = (u32)((req.reply[0] << 24) | (req.reply[1] << 16) |
-		     (req.reply[2] << 8) | req.reply[3]);
-
-	return time - RTC_OFFSET;
-}
-
-static void pmu_write_time(time64_t time)
-{
-	struct adb_request req;
-	u32 data = lower_32_bits(time + RTC_OFFSET);
-
-	if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
-			(data >> 24) & 0xFF, (data >> 16) & 0xFF,
-			(data >> 8) & 0xFF, data & 0xFF) < 0)
-		return;
-	pmu_wait_complete(&req);
-}
-
 static __u8 pmu_read_pram(int offset)
 {
 	struct adb_request req;
@@ -295,13 +239,17 @@ static time64_t via_read_time(void)
  * is basically any machine with Mac II-style ADB.
  */
 
-static void via_write_time(time64_t time)
+static void via_set_rtc_time(struct rtc_time *tm)
 {
 	union {
 		__u8 cdata[4];
 		__u32 idata;
 	} data;
 	__u8 temp;
+	time64_t time;
+
+	time = mktime64(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+	                tm->tm_hour, tm->tm_min, tm->tm_sec);
 
 	/* Clear the write protect bit */
 
@@ -641,12 +589,12 @@ int mac_hwclk(int op, struct rtc_time *t)
 #ifdef CONFIG_ADB_CUDA
 		case MAC_ADB_EGRET:
 		case MAC_ADB_CUDA:
-			now = cuda_read_time();
+			now = cuda_get_time();
 			break;
 #endif
 #ifdef CONFIG_ADB_PMU
 		case MAC_ADB_PB2:
-			now = pmu_read_time();
+			now = pmu_get_time();
 			break;
 #endif
 		default:
@@ -665,24 +613,21 @@ int mac_hwclk(int op, struct rtc_time *t)
 		         __func__, t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
 		         t->tm_hour, t->tm_min, t->tm_sec);
 
-		now = mktime64(t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
-			       t->tm_hour, t->tm_min, t->tm_sec);
-
 		switch (macintosh_config->adb_type) {
 		case MAC_ADB_IOP:
 		case MAC_ADB_II:
 		case MAC_ADB_PB1:
-			via_write_time(now);
+			via_set_rtc_time(t);
 			break;
 #ifdef CONFIG_ADB_CUDA
 		case MAC_ADB_EGRET:
 		case MAC_ADB_CUDA:
-			cuda_write_time(now);
+			cuda_set_rtc_time(t);
 			break;
 #endif
 #ifdef CONFIG_ADB_PMU
 		case MAC_ADB_PB2:
-			pmu_write_time(now);
+			pmu_set_rtc_time(t);
 			break;
 #endif
 		default:
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index f92c1918fb56..f157e3d071f2 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -44,13 +44,6 @@
 #define DBG(x...)
 #endif
 
-/*
- * Offset between Unix time (1970-based) and Mac time (1904-based). Cuda and PMU
- * times wrap in 2040. If we need to handle later times, the read_time functions
- * need to be changed to interpret wrapped times as post-2040.
- */
-#define RTC_OFFSET	2082844800
-
 /*
  * Calibrate the decrementer frequency with the VIA timer 1.
  */
@@ -90,98 +83,6 @@ long __init pmac_time_init(void)
 	return delta;
 }
 
-#ifdef CONFIG_ADB_CUDA
-static time64_t cuda_get_time(void)
-{
-	struct adb_request req;
-	time64_t now;
-
-	if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
-		return 0;
-	while (!req.complete)
-		cuda_poll();
-	if (req.reply_len != 7)
-		printk(KERN_ERR "cuda_get_time: got %d byte reply\n",
-		       req.reply_len);
-	now = (u32)((req.reply[3] << 24) + (req.reply[4] << 16) +
-		    (req.reply[5] << 8) + req.reply[6]);
-	/* it's either after year 2040, or the RTC has gone backwards */
-	WARN_ON(now < RTC_OFFSET);
-
-	return now - RTC_OFFSET;
-}
-
-#define cuda_get_rtc_time(tm)	rtc_time64_to_tm(cuda_get_time(), (tm))
-
-static int cuda_set_rtc_time(struct rtc_time *tm)
-{
-	u32 nowtime;
-	struct adb_request req;
-
-	nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
-	if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
-			 nowtime >> 24, nowtime >> 16, nowtime >> 8,
-			 nowtime) < 0)
-		return -ENXIO;
-	while (!req.complete)
-		cuda_poll();
-	if ((req.reply_len != 3) && (req.reply_len != 7))
-		printk(KERN_ERR "cuda_set_rtc_time: got %d byte reply\n",
-		       req.reply_len);
-	return 0;
-}
-
-#else
-#define cuda_get_time()		0
-#define cuda_get_rtc_time(tm)
-#define cuda_set_rtc_time(tm)	0
-#endif
-
-#ifdef CONFIG_ADB_PMU
-static time64_t pmu_get_time(void)
-{
-	struct adb_request req;
-	time64_t now;
-
-	if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
-		return 0;
-	pmu_wait_complete(&req);
-	if (req.reply_len != 4)
-		printk(KERN_ERR "pmu_get_time: got %d byte reply from PMU\n",
-		       req.reply_len);
-	now = (u32)((req.reply[0] << 24) + (req.reply[1] << 16)	+
-		    (req.reply[2] << 8) + req.reply[3]);
-
-	/* it's either after year 2040, or the RTC has gone backwards */
-	WARN_ON(now < RTC_OFFSET);
-
-	return now - RTC_OFFSET;
-}
-
-#define pmu_get_rtc_time(tm)	rtc_time64_to_tm(pmu_get_time(), (tm))
-
-static int pmu_set_rtc_time(struct rtc_time *tm)
-{
-	u32 nowtime;
-	struct adb_request req;
-
-	nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
-	if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24,
-			nowtime >> 16, nowtime >> 8, nowtime) < 0)
-		return -ENXIO;
-	pmu_wait_complete(&req);
-	if (req.reply_len != 0)
-		printk(KERN_ERR "pmu_set_rtc_time: %d byte reply from PMU\n",
-		       req.reply_len);
-	return 0;
-}
-
-#else
-#define pmu_get_time()		0
-#define pmu_get_rtc_time(tm)
-#define pmu_set_rtc_time(tm)	0
-#endif
-
 #ifdef CONFIG_PMAC_SMU
 static time64_t smu_get_time(void)
 {
@@ -191,11 +92,6 @@ static time64_t smu_get_time(void)
 		return 0;
 	return rtc_tm_to_time64(&tm);
 }
-
-#else
-#define smu_get_time()			0
-#define smu_get_rtc_time(tm, spin)
-#define smu_set_rtc_time(tm, spin)	0
 #endif
 
 /* Can't be __init, it's called when suspending and resuming */
@@ -203,12 +99,18 @@ time64_t pmac_get_boot_time(void)
 {
 	/* Get the time from the RTC, used only at boot time */
 	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
 	case SYS_CTRLER_CUDA:
 		return cuda_get_time();
+#endif
+#ifdef CONFIG_ADB_PMU
 	case SYS_CTRLER_PMU:
 		return pmu_get_time();
+#endif
+#ifdef CONFIG_PMAC_SMU
 	case SYS_CTRLER_SMU:
 		return smu_get_time();
+#endif
 	default:
 		return 0;
 	}
@@ -218,15 +120,21 @@ void pmac_get_rtc_time(struct rtc_time *tm)
 {
 	/* Get the time from the RTC, used only at boot time */
 	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
 	case SYS_CTRLER_CUDA:
-		cuda_get_rtc_time(tm);
+		rtc_time64_to_tm(cuda_get_time(), tm);
 		break;
+#endif
+#ifdef CONFIG_ADB_PMU
 	case SYS_CTRLER_PMU:
-		pmu_get_rtc_time(tm);
+		rtc_time64_to_tm(pmu_get_time(), tm);
 		break;
+#endif
+#ifdef CONFIG_PMAC_SMU
 	case SYS_CTRLER_SMU:
 		smu_get_rtc_time(tm, 1);
 		break;
+#endif
 	default:
 		;
 	}
@@ -235,12 +143,18 @@ void pmac_get_rtc_time(struct rtc_time *tm)
 int pmac_set_rtc_time(struct rtc_time *tm)
 {
 	switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
 	case SYS_CTRLER_CUDA:
 		return cuda_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_ADB_PMU
 	case SYS_CTRLER_PMU:
 		return pmu_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_PMAC_SMU
 	case SYS_CTRLER_SMU:
 		return smu_set_rtc_time(tm, 1);
+#endif
 	default:
 		return -ENODEV;
 	}
diff --git a/drivers/macintosh/via-cuda.c b/drivers/macintosh/via-cuda.c
index 98dd702eb867..bbec6ac0a966 100644
--- a/drivers/macintosh/via-cuda.c
+++ b/drivers/macintosh/via-cuda.c
@@ -766,3 +766,38 @@ cuda_input(unsigned char *buf, int nb)
 	               buf, nb, false);
     }
 }
+
+/* Offset between Unix time (1970-based) and Mac time (1904-based) */
+#define RTC_OFFSET	2082844800
+
+time64_t cuda_get_time(void)
+{
+	struct adb_request req;
+	u32 now;
+
+	if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
+		return 0;
+	while (!req.complete)
+		cuda_poll();
+	if (req.reply_len != 7)
+		pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+	now = (req.reply[3] << 24) + (req.reply[4] << 16) +
+	      (req.reply[5] << 8) + req.reply[6];
+	return (time64_t)now - RTC_OFFSET;
+}
+
+int cuda_set_rtc_time(struct rtc_time *tm)
+{
+	u32 now;
+	struct adb_request req;
+
+	now = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
+	if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
+	                 now >> 24, now >> 16, now >> 8, now) < 0)
+		return -ENXIO;
+	while (!req.complete)
+		cuda_poll();
+	if ((req.reply_len != 3) && (req.reply_len != 7))
+		pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+	return 0;
+}
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index d72c450aebe5..60f57e2abf21 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -1737,6 +1737,39 @@ pmu_enable_irled(int on)
 	pmu_wait_complete(&req);
 }
 
+/* Offset between Unix time (1970-based) and Mac time (1904-based) */
+#define RTC_OFFSET	2082844800
+
+time64_t pmu_get_time(void)
+{
+	struct adb_request req;
+	u32 now;
+
+	if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
+		return 0;
+	pmu_wait_complete(&req);
+	if (req.reply_len != 4)
+		pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+	now = (req.reply[0] << 24) + (req.reply[1] << 16) +
+	      (req.reply[2] << 8) + req.reply[3];
+	return (time64_t)now - RTC_OFFSET;
+}
+
+int pmu_set_rtc_time(struct rtc_time *tm)
+{
+	u32 now;
+	struct adb_request req;
+
+	now = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
+	if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
+	                now >> 24, now >> 16, now >> 8, now) < 0)
+		return -ENXIO;
+	pmu_wait_complete(&req);
+	if (req.reply_len != 0)
+		pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+	return 0;
+}
+
 void
 pmu_restart(void)
 {
diff --git a/include/linux/cuda.h b/include/linux/cuda.h
index 056867f09a01..45bfe9d61271 100644
--- a/include/linux/cuda.h
+++ b/include/linux/cuda.h
@@ -8,6 +8,7 @@
 #ifndef _LINUX_CUDA_H
 #define _LINUX_CUDA_H
 
+#include <linux/rtc.h>
 #include <uapi/linux/cuda.h>
 
 
@@ -16,4 +17,7 @@ extern int cuda_request(struct adb_request *req,
 			void (*done)(struct adb_request *), int nbytes, ...);
 extern void cuda_poll(void);
 
+extern time64_t cuda_get_time(void);
+extern int cuda_set_rtc_time(struct rtc_time *tm);
+
 #endif /* _LINUX_CUDA_H */
diff --git a/include/linux/pmu.h b/include/linux/pmu.h
index 9ac8fc60ad49..52453a24a24f 100644
--- a/include/linux/pmu.h
+++ b/include/linux/pmu.h
@@ -9,6 +9,7 @@
 #ifndef _LINUX_PMU_H
 #define _LINUX_PMU_H
 
+#include <linux/rtc.h>
 #include <uapi/linux/pmu.h>
 
 
@@ -36,6 +37,9 @@ static inline void pmu_resume(void)
 
 extern void pmu_enable_irled(int on);
 
+extern time64_t pmu_get_time(void);
+extern int pmu_set_rtc_time(struct rtc_time *tm);
+
 extern void pmu_restart(void);
 extern void pmu_shutdown(void);
 extern void pmu_unlock(void);
-- 
cgit v1.2.3


From edbee095fafb4b727b51032bdc41e345f95bbc20 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Sun, 7 Oct 2018 21:04:42 +0800
Subject: firmware: imx: add SCU firmware driver support

The System Controller Firmware (SCFW) is a low-level system function
which runs on a dedicated Cortex-M core to provide power, clock, and
resource management. It exists on some i.MX8 processors. e.g. i.MX8QM
(QM, QP), and i.MX8QX (QXP, DX).

This patch implements the SCU firmware IPC function and the common
message sending API sc_call_rpc.

Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Fabio Estevam <fabio.estevam@nxp.com>
Cc: Jassi Brar <jassisinghbrar@gmail.com>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/firmware/Kconfig           |   1 +
 drivers/firmware/Makefile          |   1 +
 drivers/firmware/imx/Kconfig       |  11 +
 drivers/firmware/imx/Makefile      |   2 +
 drivers/firmware/imx/imx-scu.c     | 270 ++++++++++++++++
 include/linux/firmware/imx/ipc.h   |  59 ++++
 include/linux/firmware/imx/sci.h   |  16 +
 include/linux/firmware/imx/types.h | 617 +++++++++++++++++++++++++++++++++++++
 8 files changed, 977 insertions(+)
 create mode 100644 drivers/firmware/imx/Kconfig
 create mode 100644 drivers/firmware/imx/Makefile
 create mode 100644 drivers/firmware/imx/imx-scu.c
 create mode 100644 include/linux/firmware/imx/ipc.h
 create mode 100644 include/linux/firmware/imx/sci.h
 create mode 100644 include/linux/firmware/imx/types.h

(limited to 'include/linux')

diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig
index 6e83880046d7..bd9e4e2c41db 100644
--- a/drivers/firmware/Kconfig
+++ b/drivers/firmware/Kconfig
@@ -289,6 +289,7 @@ config HAVE_ARM_SMCCC
 source "drivers/firmware/broadcom/Kconfig"
 source "drivers/firmware/google/Kconfig"
 source "drivers/firmware/efi/Kconfig"
+source "drivers/firmware/imx/Kconfig"
 source "drivers/firmware/meson/Kconfig"
 source "drivers/firmware/tegra/Kconfig"
 
diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile
index e18a041cfc53..e1281d6a03d4 100644
--- a/drivers/firmware/Makefile
+++ b/drivers/firmware/Makefile
@@ -31,4 +31,5 @@ obj-y				+= meson/
 obj-$(CONFIG_GOOGLE_FIRMWARE)	+= google/
 obj-$(CONFIG_EFI)		+= efi/
 obj-$(CONFIG_UEFI_CPER)		+= efi/
+obj-y				+= imx/
 obj-y				+= tegra/
diff --git a/drivers/firmware/imx/Kconfig b/drivers/firmware/imx/Kconfig
new file mode 100644
index 000000000000..b170c2851e48
--- /dev/null
+++ b/drivers/firmware/imx/Kconfig
@@ -0,0 +1,11 @@
+config IMX_SCU
+	bool "IMX SCU Protocol driver"
+	depends on IMX_MBOX
+	help
+	  The System Controller Firmware (SCFW) is a low-level system function
+	  which runs on a dedicated Cortex-M core to provide power, clock, and
+	  resource management. It exists on some i.MX8 processors. e.g. i.MX8QM
+	  (QM, QP), and i.MX8QX (QXP, DX).
+
+	  This driver manages the IPC interface between host CPU and the
+	  SCU firmware running on M4.
diff --git a/drivers/firmware/imx/Makefile b/drivers/firmware/imx/Makefile
new file mode 100644
index 000000000000..9b1e2febb1aa
--- /dev/null
+++ b/drivers/firmware/imx/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_IMX_SCU)	+= imx-scu.o
diff --git a/drivers/firmware/imx/imx-scu.c b/drivers/firmware/imx/imx-scu.c
new file mode 100644
index 000000000000..2bb1a19c413f
--- /dev/null
+++ b/drivers/firmware/imx/imx-scu.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018 NXP
+ *  Author: Dong Aisheng <aisheng.dong@nxp.com>
+ *
+ * Implementation of the SCU IPC functions using MUs (client side).
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/firmware/imx/types.h>
+#include <linux/firmware/imx/ipc.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/mailbox_client.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#define SCU_MU_CHAN_NUM		8
+#define MAX_RX_TIMEOUT		(msecs_to_jiffies(30))
+
+struct imx_sc_chan {
+	struct imx_sc_ipc *sc_ipc;
+
+	struct mbox_client cl;
+	struct mbox_chan *ch;
+	int idx;
+};
+
+struct imx_sc_ipc {
+	/* SCU uses 4 Tx and 4 Rx channels */
+	struct imx_sc_chan chans[SCU_MU_CHAN_NUM];
+	struct device *dev;
+	struct mutex lock;
+	struct completion done;
+
+	/* temporarily store the SCU msg */
+	u32 *msg;
+	u8 rx_size;
+	u8 count;
+};
+
+/*
+ * This type is used to indicate error response for most functions.
+ */
+enum imx_sc_error_codes {
+	IMX_SC_ERR_NONE = 0,	/* Success */
+	IMX_SC_ERR_VERSION = 1,	/* Incompatible API version */
+	IMX_SC_ERR_CONFIG = 2,	/* Configuration error */
+	IMX_SC_ERR_PARM = 3,	/* Bad parameter */
+	IMX_SC_ERR_NOACCESS = 4,	/* Permission error (no access) */
+	IMX_SC_ERR_LOCKED = 5,	/* Permission error (locked) */
+	IMX_SC_ERR_UNAVAILABLE = 6,	/* Unavailable (out of resources) */
+	IMX_SC_ERR_NOTFOUND = 7,	/* Not found */
+	IMX_SC_ERR_NOPOWER = 8,	/* No power */
+	IMX_SC_ERR_IPC = 9,		/* Generic IPC error */
+	IMX_SC_ERR_BUSY = 10,	/* Resource is currently busy/active */
+	IMX_SC_ERR_FAIL = 11,	/* General I/O failure */
+	IMX_SC_ERR_LAST
+};
+
+static int imx_sc_linux_errmap[IMX_SC_ERR_LAST] = {
+	0,	 /* IMX_SC_ERR_NONE */
+	-EINVAL, /* IMX_SC_ERR_VERSION */
+	-EINVAL, /* IMX_SC_ERR_CONFIG */
+	-EINVAL, /* IMX_SC_ERR_PARM */
+	-EACCES, /* IMX_SC_ERR_NOACCESS */
+	-EACCES, /* IMX_SC_ERR_LOCKED */
+	-ERANGE, /* IMX_SC_ERR_UNAVAILABLE */
+	-EEXIST, /* IMX_SC_ERR_NOTFOUND */
+	-EPERM,	 /* IMX_SC_ERR_NOPOWER */
+	-EPIPE,	 /* IMX_SC_ERR_IPC */
+	-EBUSY,	 /* IMX_SC_ERR_BUSY */
+	-EIO,	 /* IMX_SC_ERR_FAIL */
+};
+
+static struct imx_sc_ipc *imx_sc_ipc_handle;
+
+static inline int imx_sc_to_linux_errno(int errno)
+{
+	if (errno >= IMX_SC_ERR_NONE && errno < IMX_SC_ERR_LAST)
+		return imx_sc_linux_errmap[errno];
+	return -EIO;
+}
+
+/*
+ * Get the default handle used by SCU
+ */
+int imx_scu_get_handle(struct imx_sc_ipc **ipc)
+{
+	if (!imx_sc_ipc_handle)
+		return -EPROBE_DEFER;
+
+	*ipc = imx_sc_ipc_handle;
+	return 0;
+}
+EXPORT_SYMBOL(imx_scu_get_handle);
+
+static void imx_scu_rx_callback(struct mbox_client *c, void *msg)
+{
+	struct imx_sc_chan *sc_chan = container_of(c, struct imx_sc_chan, cl);
+	struct imx_sc_ipc *sc_ipc = sc_chan->sc_ipc;
+	struct imx_sc_rpc_msg *hdr;
+	u32 *data = msg;
+
+	if (sc_chan->idx == 0) {
+		hdr = msg;
+		sc_ipc->rx_size = hdr->size;
+		dev_dbg(sc_ipc->dev, "msg rx size %u\n", sc_ipc->rx_size);
+		if (sc_ipc->rx_size > 4)
+			dev_warn(sc_ipc->dev, "RPC does not support receiving over 4 words: %u\n",
+				 sc_ipc->rx_size);
+	}
+
+	sc_ipc->msg[sc_chan->idx] = *data;
+	sc_ipc->count++;
+
+	dev_dbg(sc_ipc->dev, "mu %u msg %u 0x%x\n", sc_chan->idx,
+		sc_ipc->count, *data);
+
+	if ((sc_ipc->rx_size != 0) && (sc_ipc->count == sc_ipc->rx_size))
+		complete(&sc_ipc->done);
+}
+
+static int imx_scu_ipc_write(struct imx_sc_ipc *sc_ipc, void *msg)
+{
+	struct imx_sc_rpc_msg *hdr = msg;
+	struct imx_sc_chan *sc_chan;
+	u32 *data = msg;
+	int ret;
+	int i;
+
+	/* Check size */
+	if (hdr->size > IMX_SC_RPC_MAX_MSG)
+		return -EINVAL;
+
+	dev_dbg(sc_ipc->dev, "RPC SVC %u FUNC %u SIZE %u\n", hdr->svc,
+		hdr->func, hdr->size);
+
+	for (i = 0; i < hdr->size; i++) {
+		sc_chan = &sc_ipc->chans[i % 4];
+		ret = mbox_send_message(sc_chan->ch, &data[i]);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * RPC command/response
+ */
+int imx_scu_call_rpc(struct imx_sc_ipc *sc_ipc, void *msg, bool have_resp)
+{
+	struct imx_sc_rpc_msg *hdr;
+	int ret;
+
+	if (WARN_ON(!sc_ipc || !msg))
+		return -EINVAL;
+
+	mutex_lock(&sc_ipc->lock);
+	reinit_completion(&sc_ipc->done);
+
+	sc_ipc->msg = msg;
+	sc_ipc->count = 0;
+	ret = imx_scu_ipc_write(sc_ipc, msg);
+	if (ret < 0) {
+		dev_err(sc_ipc->dev, "RPC send msg failed: %d\n", ret);
+		goto out;
+	}
+
+	if (have_resp) {
+		if (!wait_for_completion_timeout(&sc_ipc->done,
+						 MAX_RX_TIMEOUT)) {
+			dev_err(sc_ipc->dev, "RPC send msg timeout\n");
+			mutex_unlock(&sc_ipc->lock);
+			return -ETIMEDOUT;
+		}
+
+		/* response status is stored in hdr->func field */
+		hdr = msg;
+		ret = hdr->func;
+	}
+
+out:
+	mutex_unlock(&sc_ipc->lock);
+
+	dev_dbg(sc_ipc->dev, "RPC SVC done\n");
+
+	return imx_sc_to_linux_errno(ret);
+}
+EXPORT_SYMBOL(imx_scu_call_rpc);
+
+static int imx_scu_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct imx_sc_ipc *sc_ipc;
+	struct imx_sc_chan *sc_chan;
+	struct mbox_client *cl;
+	char *chan_name;
+	int ret;
+	int i;
+
+	sc_ipc = devm_kzalloc(dev, sizeof(*sc_ipc), GFP_KERNEL);
+	if (!sc_ipc)
+		return -ENOMEM;
+
+	for (i = 0; i < SCU_MU_CHAN_NUM; i++) {
+		if (i < 4)
+			chan_name = kasprintf(GFP_KERNEL, "tx%d", i);
+		else
+			chan_name = kasprintf(GFP_KERNEL, "rx%d", i - 4);
+
+		if (!chan_name)
+			return -ENOMEM;
+
+		sc_chan = &sc_ipc->chans[i];
+		cl = &sc_chan->cl;
+		cl->dev = dev;
+		cl->tx_block = false;
+		cl->knows_txdone = true;
+		cl->rx_callback = imx_scu_rx_callback;
+
+		sc_chan->sc_ipc = sc_ipc;
+		sc_chan->idx = i % 4;
+		sc_chan->ch = mbox_request_channel_byname(cl, chan_name);
+		if (IS_ERR(sc_chan->ch)) {
+			ret = PTR_ERR(sc_chan->ch);
+			if (ret != -EPROBE_DEFER)
+				dev_err(dev, "Failed to request mbox chan %s ret %d\n",
+					chan_name, ret);
+			return ret;
+		}
+
+		dev_dbg(dev, "request mbox chan %s\n", chan_name);
+		/* chan_name is not used anymore by framework */
+		kfree(chan_name);
+	}
+
+	sc_ipc->dev = dev;
+	mutex_init(&sc_ipc->lock);
+	init_completion(&sc_ipc->done);
+
+	imx_sc_ipc_handle = sc_ipc;
+
+	dev_info(dev, "NXP i.MX SCU Initialized\n");
+
+	return devm_of_platform_populate(dev);
+}
+
+static const struct of_device_id imx_scu_match[] = {
+	{ .compatible = "fsl,imx-scu", },
+	{ /* Sentinel */ }
+};
+
+static struct platform_driver imx_scu_driver = {
+	.driver = {
+		.name = "imx-scu",
+		.of_match_table = imx_scu_match,
+	},
+	.probe = imx_scu_probe,
+};
+builtin_platform_driver(imx_scu_driver);
+
+MODULE_AUTHOR("Dong Aisheng <aisheng.dong@nxp.com>");
+MODULE_DESCRIPTION("IMX SCU firmware protocol driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/firmware/imx/ipc.h b/include/linux/firmware/imx/ipc.h
new file mode 100644
index 000000000000..6312c8cb084a
--- /dev/null
+++ b/include/linux/firmware/imx/ipc.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright 2018 NXP
+ *
+ * Header file for the IPC implementation.
+ */
+
+#ifndef _SC_IPC_H
+#define _SC_IPC_H
+
+#include <linux/device.h>
+#include <linux/types.h>
+
+#define IMX_SC_RPC_VERSION	1
+#define IMX_SC_RPC_MAX_MSG	8
+
+struct imx_sc_ipc;
+
+enum imx_sc_rpc_svc {
+	IMX_SC_RPC_SVC_UNKNOWN = 0,
+	IMX_SC_RPC_SVC_RETURN = 1,
+	IMX_SC_RPC_SVC_PM = 2,
+	IMX_SC_RPC_SVC_RM = 3,
+	IMX_SC_RPC_SVC_TIMER = 5,
+	IMX_SC_RPC_SVC_PAD = 6,
+	IMX_SC_RPC_SVC_MISC = 7,
+	IMX_SC_RPC_SVC_IRQ = 8,
+	IMX_SC_RPC_SVC_ABORT = 9
+};
+
+struct imx_sc_rpc_msg {
+	uint8_t ver;
+	uint8_t size;
+	uint8_t svc;
+	uint8_t func;
+};
+
+/*
+ * This is an function to send an RPC message over an IPC channel.
+ * It is called by client-side SCFW API function shims.
+ *
+ * @param[in]     ipc         IPC handle
+ * @param[in,out] msg         handle to a message
+ * @param[in]     have_resp   response flag
+ *
+ * If have_resp is true then this function waits for a response
+ * and returns the result in msg.
+ */
+int imx_scu_call_rpc(struct imx_sc_ipc *ipc, void *msg, bool have_resp);
+
+/*
+ * This function gets the default ipc handle used by SCU
+ *
+ * @param[out]	ipc	sc ipc handle
+ *
+ * @return Returns an error code (0 = success, failed if < 0)
+ */
+int imx_scu_get_handle(struct imx_sc_ipc **ipc);
+#endif /* _SC_IPC_H */
diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h
new file mode 100644
index 000000000000..ff3227ad8d7c
--- /dev/null
+++ b/include/linux/firmware/imx/sci.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017~2018 NXP
+ *
+ * Header file containing the public System Controller Interface (SCI)
+ * definitions.
+ */
+
+#ifndef _SC_SCI_H
+#define _SC_SCI_H
+
+#include <linux/firmware/imx/ipc.h>
+#include <linux/firmware/imx/types.h>
+
+#endif /* _SC_SCI_H */
diff --git a/include/linux/firmware/imx/types.h b/include/linux/firmware/imx/types.h
new file mode 100644
index 000000000000..9cbf0c4a6069
--- /dev/null
+++ b/include/linux/firmware/imx/types.h
@@ -0,0 +1,617 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017~2018 NXP
+ *
+ * Header file containing types used across multiple service APIs.
+ */
+
+#ifndef _SC_TYPES_H
+#define _SC_TYPES_H
+
+/*
+ * This type is used to indicate a resource. Resources include peripherals
+ * and bus masters (but not memory regions). Note items from list should
+ * never be changed or removed (only added to at the end of the list).
+ */
+enum imx_sc_rsrc {
+	IMX_SC_R_A53 = 0,
+	IMX_SC_R_A53_0 = 1,
+	IMX_SC_R_A53_1 = 2,
+	IMX_SC_R_A53_2 = 3,
+	IMX_SC_R_A53_3 = 4,
+	IMX_SC_R_A72 = 5,
+	IMX_SC_R_A72_0 = 6,
+	IMX_SC_R_A72_1 = 7,
+	IMX_SC_R_A72_2 = 8,
+	IMX_SC_R_A72_3 = 9,
+	IMX_SC_R_CCI = 10,
+	IMX_SC_R_DB = 11,
+	IMX_SC_R_DRC_0 = 12,
+	IMX_SC_R_DRC_1 = 13,
+	IMX_SC_R_GIC_SMMU = 14,
+	IMX_SC_R_IRQSTR_M4_0 = 15,
+	IMX_SC_R_IRQSTR_M4_1 = 16,
+	IMX_SC_R_SMMU = 17,
+	IMX_SC_R_GIC = 18,
+	IMX_SC_R_DC_0_BLIT0 = 19,
+	IMX_SC_R_DC_0_BLIT1 = 20,
+	IMX_SC_R_DC_0_BLIT2 = 21,
+	IMX_SC_R_DC_0_BLIT_OUT = 22,
+	IMX_SC_R_DC_0_CAPTURE0 = 23,
+	IMX_SC_R_DC_0_CAPTURE1 = 24,
+	IMX_SC_R_DC_0_WARP = 25,
+	IMX_SC_R_DC_0_INTEGRAL0 = 26,
+	IMX_SC_R_DC_0_INTEGRAL1 = 27,
+	IMX_SC_R_DC_0_VIDEO0 = 28,
+	IMX_SC_R_DC_0_VIDEO1 = 29,
+	IMX_SC_R_DC_0_FRAC0 = 30,
+	IMX_SC_R_DC_0_FRAC1 = 31,
+	IMX_SC_R_DC_0 = 32,
+	IMX_SC_R_GPU_2_PID0 = 33,
+	IMX_SC_R_DC_0_PLL_0 = 34,
+	IMX_SC_R_DC_0_PLL_1 = 35,
+	IMX_SC_R_DC_1_BLIT0 = 36,
+	IMX_SC_R_DC_1_BLIT1 = 37,
+	IMX_SC_R_DC_1_BLIT2 = 38,
+	IMX_SC_R_DC_1_BLIT_OUT = 39,
+	IMX_SC_R_DC_1_CAPTURE0 = 40,
+	IMX_SC_R_DC_1_CAPTURE1 = 41,
+	IMX_SC_R_DC_1_WARP = 42,
+	IMX_SC_R_DC_1_INTEGRAL0 = 43,
+	IMX_SC_R_DC_1_INTEGRAL1 = 44,
+	IMX_SC_R_DC_1_VIDEO0 = 45,
+	IMX_SC_R_DC_1_VIDEO1 = 46,
+	IMX_SC_R_DC_1_FRAC0 = 47,
+	IMX_SC_R_DC_1_FRAC1 = 48,
+	IMX_SC_R_DC_1 = 49,
+	IMX_SC_R_GPU_3_PID0 = 50,
+	IMX_SC_R_DC_1_PLL_0 = 51,
+	IMX_SC_R_DC_1_PLL_1 = 52,
+	IMX_SC_R_SPI_0 = 53,
+	IMX_SC_R_SPI_1 = 54,
+	IMX_SC_R_SPI_2 = 55,
+	IMX_SC_R_SPI_3 = 56,
+	IMX_SC_R_UART_0 = 57,
+	IMX_SC_R_UART_1 = 58,
+	IMX_SC_R_UART_2 = 59,
+	IMX_SC_R_UART_3 = 60,
+	IMX_SC_R_UART_4 = 61,
+	IMX_SC_R_EMVSIM_0 = 62,
+	IMX_SC_R_EMVSIM_1 = 63,
+	IMX_SC_R_DMA_0_CH0 = 64,
+	IMX_SC_R_DMA_0_CH1 = 65,
+	IMX_SC_R_DMA_0_CH2 = 66,
+	IMX_SC_R_DMA_0_CH3 = 67,
+	IMX_SC_R_DMA_0_CH4 = 68,
+	IMX_SC_R_DMA_0_CH5 = 69,
+	IMX_SC_R_DMA_0_CH6 = 70,
+	IMX_SC_R_DMA_0_CH7 = 71,
+	IMX_SC_R_DMA_0_CH8 = 72,
+	IMX_SC_R_DMA_0_CH9 = 73,
+	IMX_SC_R_DMA_0_CH10 = 74,
+	IMX_SC_R_DMA_0_CH11 = 75,
+	IMX_SC_R_DMA_0_CH12 = 76,
+	IMX_SC_R_DMA_0_CH13 = 77,
+	IMX_SC_R_DMA_0_CH14 = 78,
+	IMX_SC_R_DMA_0_CH15 = 79,
+	IMX_SC_R_DMA_0_CH16 = 80,
+	IMX_SC_R_DMA_0_CH17 = 81,
+	IMX_SC_R_DMA_0_CH18 = 82,
+	IMX_SC_R_DMA_0_CH19 = 83,
+	IMX_SC_R_DMA_0_CH20 = 84,
+	IMX_SC_R_DMA_0_CH21 = 85,
+	IMX_SC_R_DMA_0_CH22 = 86,
+	IMX_SC_R_DMA_0_CH23 = 87,
+	IMX_SC_R_DMA_0_CH24 = 88,
+	IMX_SC_R_DMA_0_CH25 = 89,
+	IMX_SC_R_DMA_0_CH26 = 90,
+	IMX_SC_R_DMA_0_CH27 = 91,
+	IMX_SC_R_DMA_0_CH28 = 92,
+	IMX_SC_R_DMA_0_CH29 = 93,
+	IMX_SC_R_DMA_0_CH30 = 94,
+	IMX_SC_R_DMA_0_CH31 = 95,
+	IMX_SC_R_I2C_0 = 96,
+	IMX_SC_R_I2C_1 = 97,
+	IMX_SC_R_I2C_2 = 98,
+	IMX_SC_R_I2C_3 = 99,
+	IMX_SC_R_I2C_4 = 100,
+	IMX_SC_R_ADC_0 = 101,
+	IMX_SC_R_ADC_1 = 102,
+	IMX_SC_R_FTM_0 = 103,
+	IMX_SC_R_FTM_1 = 104,
+	IMX_SC_R_CAN_0 = 105,
+	IMX_SC_R_CAN_1 = 106,
+	IMX_SC_R_CAN_2 = 107,
+	IMX_SC_R_DMA_1_CH0 = 108,
+	IMX_SC_R_DMA_1_CH1 = 109,
+	IMX_SC_R_DMA_1_CH2 = 110,
+	IMX_SC_R_DMA_1_CH3 = 111,
+	IMX_SC_R_DMA_1_CH4 = 112,
+	IMX_SC_R_DMA_1_CH5 = 113,
+	IMX_SC_R_DMA_1_CH6 = 114,
+	IMX_SC_R_DMA_1_CH7 = 115,
+	IMX_SC_R_DMA_1_CH8 = 116,
+	IMX_SC_R_DMA_1_CH9 = 117,
+	IMX_SC_R_DMA_1_CH10 = 118,
+	IMX_SC_R_DMA_1_CH11 = 119,
+	IMX_SC_R_DMA_1_CH12 = 120,
+	IMX_SC_R_DMA_1_CH13 = 121,
+	IMX_SC_R_DMA_1_CH14 = 122,
+	IMX_SC_R_DMA_1_CH15 = 123,
+	IMX_SC_R_DMA_1_CH16 = 124,
+	IMX_SC_R_DMA_1_CH17 = 125,
+	IMX_SC_R_DMA_1_CH18 = 126,
+	IMX_SC_R_DMA_1_CH19 = 127,
+	IMX_SC_R_DMA_1_CH20 = 128,
+	IMX_SC_R_DMA_1_CH21 = 129,
+	IMX_SC_R_DMA_1_CH22 = 130,
+	IMX_SC_R_DMA_1_CH23 = 131,
+	IMX_SC_R_DMA_1_CH24 = 132,
+	IMX_SC_R_DMA_1_CH25 = 133,
+	IMX_SC_R_DMA_1_CH26 = 134,
+	IMX_SC_R_DMA_1_CH27 = 135,
+	IMX_SC_R_DMA_1_CH28 = 136,
+	IMX_SC_R_DMA_1_CH29 = 137,
+	IMX_SC_R_DMA_1_CH30 = 138,
+	IMX_SC_R_DMA_1_CH31 = 139,
+	IMX_SC_R_UNUSED1 = 140,
+	IMX_SC_R_UNUSED2 = 141,
+	IMX_SC_R_UNUSED3 = 142,
+	IMX_SC_R_UNUSED4 = 143,
+	IMX_SC_R_GPU_0_PID0 = 144,
+	IMX_SC_R_GPU_0_PID1 = 145,
+	IMX_SC_R_GPU_0_PID2 = 146,
+	IMX_SC_R_GPU_0_PID3 = 147,
+	IMX_SC_R_GPU_1_PID0 = 148,
+	IMX_SC_R_GPU_1_PID1 = 149,
+	IMX_SC_R_GPU_1_PID2 = 150,
+	IMX_SC_R_GPU_1_PID3 = 151,
+	IMX_SC_R_PCIE_A = 152,
+	IMX_SC_R_SERDES_0 = 153,
+	IMX_SC_R_MATCH_0 = 154,
+	IMX_SC_R_MATCH_1 = 155,
+	IMX_SC_R_MATCH_2 = 156,
+	IMX_SC_R_MATCH_3 = 157,
+	IMX_SC_R_MATCH_4 = 158,
+	IMX_SC_R_MATCH_5 = 159,
+	IMX_SC_R_MATCH_6 = 160,
+	IMX_SC_R_MATCH_7 = 161,
+	IMX_SC_R_MATCH_8 = 162,
+	IMX_SC_R_MATCH_9 = 163,
+	IMX_SC_R_MATCH_10 = 164,
+	IMX_SC_R_MATCH_11 = 165,
+	IMX_SC_R_MATCH_12 = 166,
+	IMX_SC_R_MATCH_13 = 167,
+	IMX_SC_R_MATCH_14 = 168,
+	IMX_SC_R_PCIE_B = 169,
+	IMX_SC_R_SATA_0 = 170,
+	IMX_SC_R_SERDES_1 = 171,
+	IMX_SC_R_HSIO_GPIO = 172,
+	IMX_SC_R_MATCH_15 = 173,
+	IMX_SC_R_MATCH_16 = 174,
+	IMX_SC_R_MATCH_17 = 175,
+	IMX_SC_R_MATCH_18 = 176,
+	IMX_SC_R_MATCH_19 = 177,
+	IMX_SC_R_MATCH_20 = 178,
+	IMX_SC_R_MATCH_21 = 179,
+	IMX_SC_R_MATCH_22 = 180,
+	IMX_SC_R_MATCH_23 = 181,
+	IMX_SC_R_MATCH_24 = 182,
+	IMX_SC_R_MATCH_25 = 183,
+	IMX_SC_R_MATCH_26 = 184,
+	IMX_SC_R_MATCH_27 = 185,
+	IMX_SC_R_MATCH_28 = 186,
+	IMX_SC_R_LCD_0 = 187,
+	IMX_SC_R_LCD_0_PWM_0 = 188,
+	IMX_SC_R_LCD_0_I2C_0 = 189,
+	IMX_SC_R_LCD_0_I2C_1 = 190,
+	IMX_SC_R_PWM_0 = 191,
+	IMX_SC_R_PWM_1 = 192,
+	IMX_SC_R_PWM_2 = 193,
+	IMX_SC_R_PWM_3 = 194,
+	IMX_SC_R_PWM_4 = 195,
+	IMX_SC_R_PWM_5 = 196,
+	IMX_SC_R_PWM_6 = 197,
+	IMX_SC_R_PWM_7 = 198,
+	IMX_SC_R_GPIO_0 = 199,
+	IMX_SC_R_GPIO_1 = 200,
+	IMX_SC_R_GPIO_2 = 201,
+	IMX_SC_R_GPIO_3 = 202,
+	IMX_SC_R_GPIO_4 = 203,
+	IMX_SC_R_GPIO_5 = 204,
+	IMX_SC_R_GPIO_6 = 205,
+	IMX_SC_R_GPIO_7 = 206,
+	IMX_SC_R_GPT_0 = 207,
+	IMX_SC_R_GPT_1 = 208,
+	IMX_SC_R_GPT_2 = 209,
+	IMX_SC_R_GPT_3 = 210,
+	IMX_SC_R_GPT_4 = 211,
+	IMX_SC_R_KPP = 212,
+	IMX_SC_R_MU_0A = 213,
+	IMX_SC_R_MU_1A = 214,
+	IMX_SC_R_MU_2A = 215,
+	IMX_SC_R_MU_3A = 216,
+	IMX_SC_R_MU_4A = 217,
+	IMX_SC_R_MU_5A = 218,
+	IMX_SC_R_MU_6A = 219,
+	IMX_SC_R_MU_7A = 220,
+	IMX_SC_R_MU_8A = 221,
+	IMX_SC_R_MU_9A = 222,
+	IMX_SC_R_MU_10A = 223,
+	IMX_SC_R_MU_11A = 224,
+	IMX_SC_R_MU_12A = 225,
+	IMX_SC_R_MU_13A = 226,
+	IMX_SC_R_MU_5B = 227,
+	IMX_SC_R_MU_6B = 228,
+	IMX_SC_R_MU_7B = 229,
+	IMX_SC_R_MU_8B = 230,
+	IMX_SC_R_MU_9B = 231,
+	IMX_SC_R_MU_10B = 232,
+	IMX_SC_R_MU_11B = 233,
+	IMX_SC_R_MU_12B = 234,
+	IMX_SC_R_MU_13B = 235,
+	IMX_SC_R_ROM_0 = 236,
+	IMX_SC_R_FSPI_0 = 237,
+	IMX_SC_R_FSPI_1 = 238,
+	IMX_SC_R_IEE = 239,
+	IMX_SC_R_IEE_R0 = 240,
+	IMX_SC_R_IEE_R1 = 241,
+	IMX_SC_R_IEE_R2 = 242,
+	IMX_SC_R_IEE_R3 = 243,
+	IMX_SC_R_IEE_R4 = 244,
+	IMX_SC_R_IEE_R5 = 245,
+	IMX_SC_R_IEE_R6 = 246,
+	IMX_SC_R_IEE_R7 = 247,
+	IMX_SC_R_SDHC_0 = 248,
+	IMX_SC_R_SDHC_1 = 249,
+	IMX_SC_R_SDHC_2 = 250,
+	IMX_SC_R_ENET_0 = 251,
+	IMX_SC_R_ENET_1 = 252,
+	IMX_SC_R_MLB_0 = 253,
+	IMX_SC_R_DMA_2_CH0 = 254,
+	IMX_SC_R_DMA_2_CH1 = 255,
+	IMX_SC_R_DMA_2_CH2 = 256,
+	IMX_SC_R_DMA_2_CH3 = 257,
+	IMX_SC_R_DMA_2_CH4 = 258,
+	IMX_SC_R_USB_0 = 259,
+	IMX_SC_R_USB_1 = 260,
+	IMX_SC_R_USB_0_PHY = 261,
+	IMX_SC_R_USB_2 = 262,
+	IMX_SC_R_USB_2_PHY = 263,
+	IMX_SC_R_DTCP = 264,
+	IMX_SC_R_NAND = 265,
+	IMX_SC_R_LVDS_0 = 266,
+	IMX_SC_R_LVDS_0_PWM_0 = 267,
+	IMX_SC_R_LVDS_0_I2C_0 = 268,
+	IMX_SC_R_LVDS_0_I2C_1 = 269,
+	IMX_SC_R_LVDS_1 = 270,
+	IMX_SC_R_LVDS_1_PWM_0 = 271,
+	IMX_SC_R_LVDS_1_I2C_0 = 272,
+	IMX_SC_R_LVDS_1_I2C_1 = 273,
+	IMX_SC_R_LVDS_2 = 274,
+	IMX_SC_R_LVDS_2_PWM_0 = 275,
+	IMX_SC_R_LVDS_2_I2C_0 = 276,
+	IMX_SC_R_LVDS_2_I2C_1 = 277,
+	IMX_SC_R_M4_0_PID0 = 278,
+	IMX_SC_R_M4_0_PID1 = 279,
+	IMX_SC_R_M4_0_PID2 = 280,
+	IMX_SC_R_M4_0_PID3 = 281,
+	IMX_SC_R_M4_0_PID4 = 282,
+	IMX_SC_R_M4_0_RGPIO = 283,
+	IMX_SC_R_M4_0_SEMA42 = 284,
+	IMX_SC_R_M4_0_TPM = 285,
+	IMX_SC_R_M4_0_PIT = 286,
+	IMX_SC_R_M4_0_UART = 287,
+	IMX_SC_R_M4_0_I2C = 288,
+	IMX_SC_R_M4_0_INTMUX = 289,
+	IMX_SC_R_M4_0_SIM = 290,
+	IMX_SC_R_M4_0_WDOG = 291,
+	IMX_SC_R_M4_0_MU_0B = 292,
+	IMX_SC_R_M4_0_MU_0A0 = 293,
+	IMX_SC_R_M4_0_MU_0A1 = 294,
+	IMX_SC_R_M4_0_MU_0A2 = 295,
+	IMX_SC_R_M4_0_MU_0A3 = 296,
+	IMX_SC_R_M4_0_MU_1A = 297,
+	IMX_SC_R_M4_1_PID0 = 298,
+	IMX_SC_R_M4_1_PID1 = 299,
+	IMX_SC_R_M4_1_PID2 = 300,
+	IMX_SC_R_M4_1_PID3 = 301,
+	IMX_SC_R_M4_1_PID4 = 302,
+	IMX_SC_R_M4_1_RGPIO = 303,
+	IMX_SC_R_M4_1_SEMA42 = 304,
+	IMX_SC_R_M4_1_TPM = 305,
+	IMX_SC_R_M4_1_PIT = 306,
+	IMX_SC_R_M4_1_UART = 307,
+	IMX_SC_R_M4_1_I2C = 308,
+	IMX_SC_R_M4_1_INTMUX = 309,
+	IMX_SC_R_M4_1_SIM = 310,
+	IMX_SC_R_M4_1_WDOG = 311,
+	IMX_SC_R_M4_1_MU_0B = 312,
+	IMX_SC_R_M4_1_MU_0A0 = 313,
+	IMX_SC_R_M4_1_MU_0A1 = 314,
+	IMX_SC_R_M4_1_MU_0A2 = 315,
+	IMX_SC_R_M4_1_MU_0A3 = 316,
+	IMX_SC_R_M4_1_MU_1A = 317,
+	IMX_SC_R_SAI_0 = 318,
+	IMX_SC_R_SAI_1 = 319,
+	IMX_SC_R_SAI_2 = 320,
+	IMX_SC_R_IRQSTR_SCU2 = 321,
+	IMX_SC_R_IRQSTR_DSP = 322,
+	IMX_SC_R_UNUSED5 = 323,
+	IMX_SC_R_UNUSED6 = 324,
+	IMX_SC_R_AUDIO_PLL_0 = 325,
+	IMX_SC_R_PI_0 = 326,
+	IMX_SC_R_PI_0_PWM_0 = 327,
+	IMX_SC_R_PI_0_PWM_1 = 328,
+	IMX_SC_R_PI_0_I2C_0 = 329,
+	IMX_SC_R_PI_0_PLL = 330,
+	IMX_SC_R_PI_1 = 331,
+	IMX_SC_R_PI_1_PWM_0 = 332,
+	IMX_SC_R_PI_1_PWM_1 = 333,
+	IMX_SC_R_PI_1_I2C_0 = 334,
+	IMX_SC_R_PI_1_PLL = 335,
+	IMX_SC_R_SC_PID0 = 336,
+	IMX_SC_R_SC_PID1 = 337,
+	IMX_SC_R_SC_PID2 = 338,
+	IMX_SC_R_SC_PID3 = 339,
+	IMX_SC_R_SC_PID4 = 340,
+	IMX_SC_R_SC_SEMA42 = 341,
+	IMX_SC_R_SC_TPM = 342,
+	IMX_SC_R_SC_PIT = 343,
+	IMX_SC_R_SC_UART = 344,
+	IMX_SC_R_SC_I2C = 345,
+	IMX_SC_R_SC_MU_0B = 346,
+	IMX_SC_R_SC_MU_0A0 = 347,
+	IMX_SC_R_SC_MU_0A1 = 348,
+	IMX_SC_R_SC_MU_0A2 = 349,
+	IMX_SC_R_SC_MU_0A3 = 350,
+	IMX_SC_R_SC_MU_1A = 351,
+	IMX_SC_R_SYSCNT_RD = 352,
+	IMX_SC_R_SYSCNT_CMP = 353,
+	IMX_SC_R_DEBUG = 354,
+	IMX_SC_R_SYSTEM = 355,
+	IMX_SC_R_SNVS = 356,
+	IMX_SC_R_OTP = 357,
+	IMX_SC_R_VPU_PID0 = 358,
+	IMX_SC_R_VPU_PID1 = 359,
+	IMX_SC_R_VPU_PID2 = 360,
+	IMX_SC_R_VPU_PID3 = 361,
+	IMX_SC_R_VPU_PID4 = 362,
+	IMX_SC_R_VPU_PID5 = 363,
+	IMX_SC_R_VPU_PID6 = 364,
+	IMX_SC_R_VPU_PID7 = 365,
+	IMX_SC_R_VPU_UART = 366,
+	IMX_SC_R_VPUCORE = 367,
+	IMX_SC_R_VPUCORE_0 = 368,
+	IMX_SC_R_VPUCORE_1 = 369,
+	IMX_SC_R_VPUCORE_2 = 370,
+	IMX_SC_R_VPUCORE_3 = 371,
+	IMX_SC_R_DMA_4_CH0 = 372,
+	IMX_SC_R_DMA_4_CH1 = 373,
+	IMX_SC_R_DMA_4_CH2 = 374,
+	IMX_SC_R_DMA_4_CH3 = 375,
+	IMX_SC_R_DMA_4_CH4 = 376,
+	IMX_SC_R_ISI_CH0 = 377,
+	IMX_SC_R_ISI_CH1 = 378,
+	IMX_SC_R_ISI_CH2 = 379,
+	IMX_SC_R_ISI_CH3 = 380,
+	IMX_SC_R_ISI_CH4 = 381,
+	IMX_SC_R_ISI_CH5 = 382,
+	IMX_SC_R_ISI_CH6 = 383,
+	IMX_SC_R_ISI_CH7 = 384,
+	IMX_SC_R_MJPEG_DEC_S0 = 385,
+	IMX_SC_R_MJPEG_DEC_S1 = 386,
+	IMX_SC_R_MJPEG_DEC_S2 = 387,
+	IMX_SC_R_MJPEG_DEC_S3 = 388,
+	IMX_SC_R_MJPEG_ENC_S0 = 389,
+	IMX_SC_R_MJPEG_ENC_S1 = 390,
+	IMX_SC_R_MJPEG_ENC_S2 = 391,
+	IMX_SC_R_MJPEG_ENC_S3 = 392,
+	IMX_SC_R_MIPI_0 = 393,
+	IMX_SC_R_MIPI_0_PWM_0 = 394,
+	IMX_SC_R_MIPI_0_I2C_0 = 395,
+	IMX_SC_R_MIPI_0_I2C_1 = 396,
+	IMX_SC_R_MIPI_1 = 397,
+	IMX_SC_R_MIPI_1_PWM_0 = 398,
+	IMX_SC_R_MIPI_1_I2C_0 = 399,
+	IMX_SC_R_MIPI_1_I2C_1 = 400,
+	IMX_SC_R_CSI_0 = 401,
+	IMX_SC_R_CSI_0_PWM_0 = 402,
+	IMX_SC_R_CSI_0_I2C_0 = 403,
+	IMX_SC_R_CSI_1 = 404,
+	IMX_SC_R_CSI_1_PWM_0 = 405,
+	IMX_SC_R_CSI_1_I2C_0 = 406,
+	IMX_SC_R_HDMI = 407,
+	IMX_SC_R_HDMI_I2S = 408,
+	IMX_SC_R_HDMI_I2C_0 = 409,
+	IMX_SC_R_HDMI_PLL_0 = 410,
+	IMX_SC_R_HDMI_RX = 411,
+	IMX_SC_R_HDMI_RX_BYPASS = 412,
+	IMX_SC_R_HDMI_RX_I2C_0 = 413,
+	IMX_SC_R_ASRC_0 = 414,
+	IMX_SC_R_ESAI_0 = 415,
+	IMX_SC_R_SPDIF_0 = 416,
+	IMX_SC_R_SPDIF_1 = 417,
+	IMX_SC_R_SAI_3 = 418,
+	IMX_SC_R_SAI_4 = 419,
+	IMX_SC_R_SAI_5 = 420,
+	IMX_SC_R_GPT_5 = 421,
+	IMX_SC_R_GPT_6 = 422,
+	IMX_SC_R_GPT_7 = 423,
+	IMX_SC_R_GPT_8 = 424,
+	IMX_SC_R_GPT_9 = 425,
+	IMX_SC_R_GPT_10 = 426,
+	IMX_SC_R_DMA_2_CH5 = 427,
+	IMX_SC_R_DMA_2_CH6 = 428,
+	IMX_SC_R_DMA_2_CH7 = 429,
+	IMX_SC_R_DMA_2_CH8 = 430,
+	IMX_SC_R_DMA_2_CH9 = 431,
+	IMX_SC_R_DMA_2_CH10 = 432,
+	IMX_SC_R_DMA_2_CH11 = 433,
+	IMX_SC_R_DMA_2_CH12 = 434,
+	IMX_SC_R_DMA_2_CH13 = 435,
+	IMX_SC_R_DMA_2_CH14 = 436,
+	IMX_SC_R_DMA_2_CH15 = 437,
+	IMX_SC_R_DMA_2_CH16 = 438,
+	IMX_SC_R_DMA_2_CH17 = 439,
+	IMX_SC_R_DMA_2_CH18 = 440,
+	IMX_SC_R_DMA_2_CH19 = 441,
+	IMX_SC_R_DMA_2_CH20 = 442,
+	IMX_SC_R_DMA_2_CH21 = 443,
+	IMX_SC_R_DMA_2_CH22 = 444,
+	IMX_SC_R_DMA_2_CH23 = 445,
+	IMX_SC_R_DMA_2_CH24 = 446,
+	IMX_SC_R_DMA_2_CH25 = 447,
+	IMX_SC_R_DMA_2_CH26 = 448,
+	IMX_SC_R_DMA_2_CH27 = 449,
+	IMX_SC_R_DMA_2_CH28 = 450,
+	IMX_SC_R_DMA_2_CH29 = 451,
+	IMX_SC_R_DMA_2_CH30 = 452,
+	IMX_SC_R_DMA_2_CH31 = 453,
+	IMX_SC_R_ASRC_1 = 454,
+	IMX_SC_R_ESAI_1 = 455,
+	IMX_SC_R_SAI_6 = 456,
+	IMX_SC_R_SAI_7 = 457,
+	IMX_SC_R_AMIX = 458,
+	IMX_SC_R_MQS_0 = 459,
+	IMX_SC_R_DMA_3_CH0 = 460,
+	IMX_SC_R_DMA_3_CH1 = 461,
+	IMX_SC_R_DMA_3_CH2 = 462,
+	IMX_SC_R_DMA_3_CH3 = 463,
+	IMX_SC_R_DMA_3_CH4 = 464,
+	IMX_SC_R_DMA_3_CH5 = 465,
+	IMX_SC_R_DMA_3_CH6 = 466,
+	IMX_SC_R_DMA_3_CH7 = 467,
+	IMX_SC_R_DMA_3_CH8 = 468,
+	IMX_SC_R_DMA_3_CH9 = 469,
+	IMX_SC_R_DMA_3_CH10 = 470,
+	IMX_SC_R_DMA_3_CH11 = 471,
+	IMX_SC_R_DMA_3_CH12 = 472,
+	IMX_SC_R_DMA_3_CH13 = 473,
+	IMX_SC_R_DMA_3_CH14 = 474,
+	IMX_SC_R_DMA_3_CH15 = 475,
+	IMX_SC_R_DMA_3_CH16 = 476,
+	IMX_SC_R_DMA_3_CH17 = 477,
+	IMX_SC_R_DMA_3_CH18 = 478,
+	IMX_SC_R_DMA_3_CH19 = 479,
+	IMX_SC_R_DMA_3_CH20 = 480,
+	IMX_SC_R_DMA_3_CH21 = 481,
+	IMX_SC_R_DMA_3_CH22 = 482,
+	IMX_SC_R_DMA_3_CH23 = 483,
+	IMX_SC_R_DMA_3_CH24 = 484,
+	IMX_SC_R_DMA_3_CH25 = 485,
+	IMX_SC_R_DMA_3_CH26 = 486,
+	IMX_SC_R_DMA_3_CH27 = 487,
+	IMX_SC_R_DMA_3_CH28 = 488,
+	IMX_SC_R_DMA_3_CH29 = 489,
+	IMX_SC_R_DMA_3_CH30 = 490,
+	IMX_SC_R_DMA_3_CH31 = 491,
+	IMX_SC_R_AUDIO_PLL_1 = 492,
+	IMX_SC_R_AUDIO_CLK_0 = 493,
+	IMX_SC_R_AUDIO_CLK_1 = 494,
+	IMX_SC_R_MCLK_OUT_0 = 495,
+	IMX_SC_R_MCLK_OUT_1 = 496,
+	IMX_SC_R_PMIC_0 = 497,
+	IMX_SC_R_PMIC_1 = 498,
+	IMX_SC_R_SECO = 499,
+	IMX_SC_R_CAAM_JR1 = 500,
+	IMX_SC_R_CAAM_JR2 = 501,
+	IMX_SC_R_CAAM_JR3 = 502,
+	IMX_SC_R_SECO_MU_2 = 503,
+	IMX_SC_R_SECO_MU_3 = 504,
+	IMX_SC_R_SECO_MU_4 = 505,
+	IMX_SC_R_HDMI_RX_PWM_0 = 506,
+	IMX_SC_R_A35 = 507,
+	IMX_SC_R_A35_0 = 508,
+	IMX_SC_R_A35_1 = 509,
+	IMX_SC_R_A35_2 = 510,
+	IMX_SC_R_A35_3 = 511,
+	IMX_SC_R_DSP = 512,
+	IMX_SC_R_DSP_RAM = 513,
+	IMX_SC_R_CAAM_JR1_OUT = 514,
+	IMX_SC_R_CAAM_JR2_OUT = 515,
+	IMX_SC_R_CAAM_JR3_OUT = 516,
+	IMX_SC_R_VPU_DEC_0 = 517,
+	IMX_SC_R_VPU_ENC_0 = 518,
+	IMX_SC_R_CAAM_JR0 = 519,
+	IMX_SC_R_CAAM_JR0_OUT = 520,
+	IMX_SC_R_PMIC_2 = 521,
+	IMX_SC_R_DBLOGIC = 522,
+	IMX_SC_R_HDMI_PLL_1 = 523,
+	IMX_SC_R_BOARD_R0 = 524,
+	IMX_SC_R_BOARD_R1 = 525,
+	IMX_SC_R_BOARD_R2 = 526,
+	IMX_SC_R_BOARD_R3 = 527,
+	IMX_SC_R_BOARD_R4 = 528,
+	IMX_SC_R_BOARD_R5 = 529,
+	IMX_SC_R_BOARD_R6 = 530,
+	IMX_SC_R_BOARD_R7 = 531,
+	IMX_SC_R_MJPEG_DEC_MP = 532,
+	IMX_SC_R_MJPEG_ENC_MP = 533,
+	IMX_SC_R_VPU_TS_0 = 534,
+	IMX_SC_R_VPU_MU_0 = 535,
+	IMX_SC_R_VPU_MU_1 = 536,
+	IMX_SC_R_VPU_MU_2 = 537,
+	IMX_SC_R_VPU_MU_3 = 538,
+	IMX_SC_R_VPU_ENC_1 = 539,
+	IMX_SC_R_VPU = 540,
+	IMX_SC_R_LAST
+};
+
+/* NOTE - please add by replacing some of the UNUSED from above! */
+
+/*
+ * This type is used to indicate a control.
+ */
+enum imx_sc_ctrl {
+	IMX_SC_C_TEMP = 0,
+	IMX_SC_C_TEMP_HI = 1,
+	IMX_SC_C_TEMP_LOW = 2,
+	IMX_SC_C_PXL_LINK_MST1_ADDR = 3,
+	IMX_SC_C_PXL_LINK_MST2_ADDR = 4,
+	IMX_SC_C_PXL_LINK_MST_ENB = 5,
+	IMX_SC_C_PXL_LINK_MST1_ENB = 6,
+	IMX_SC_C_PXL_LINK_MST2_ENB = 7,
+	IMX_SC_C_PXL_LINK_SLV1_ADDR = 8,
+	IMX_SC_C_PXL_LINK_SLV2_ADDR = 9,
+	IMX_SC_C_PXL_LINK_MST_VLD = 10,
+	IMX_SC_C_PXL_LINK_MST1_VLD = 11,
+	IMX_SC_C_PXL_LINK_MST2_VLD = 12,
+	IMX_SC_C_SINGLE_MODE = 13,
+	IMX_SC_C_ID = 14,
+	IMX_SC_C_PXL_CLK_POLARITY = 15,
+	IMX_SC_C_LINESTATE = 16,
+	IMX_SC_C_PCIE_G_RST = 17,
+	IMX_SC_C_PCIE_BUTTON_RST = 18,
+	IMX_SC_C_PCIE_PERST = 19,
+	IMX_SC_C_PHY_RESET = 20,
+	IMX_SC_C_PXL_LINK_RATE_CORRECTION = 21,
+	IMX_SC_C_PANIC = 22,
+	IMX_SC_C_PRIORITY_GROUP = 23,
+	IMX_SC_C_TXCLK = 24,
+	IMX_SC_C_CLKDIV = 25,
+	IMX_SC_C_DISABLE_50 = 26,
+	IMX_SC_C_DISABLE_125 = 27,
+	IMX_SC_C_SEL_125 = 28,
+	IMX_SC_C_MODE = 29,
+	IMX_SC_C_SYNC_CTRL0 = 30,
+	IMX_SC_C_KACHUNK_CNT = 31,
+	IMX_SC_C_KACHUNK_SEL = 32,
+	IMX_SC_C_SYNC_CTRL1 = 33,
+	IMX_SC_C_DPI_RESET = 34,
+	IMX_SC_C_MIPI_RESET = 35,
+	IMX_SC_C_DUAL_MODE = 36,
+	IMX_SC_C_VOLTAGE = 37,
+	IMX_SC_C_PXL_LINK_SEL = 38,
+	IMX_SC_C_OFS_SEL = 39,
+	IMX_SC_C_OFS_AUDIO = 40,
+	IMX_SC_C_OFS_PERIPH = 41,
+	IMX_SC_C_OFS_IRQ = 42,
+	IMX_SC_C_RST0 = 43,
+	IMX_SC_C_RST1 = 44,
+	IMX_SC_C_SEL0 = 45,
+	IMX_SC_C_LAST
+};
+
+#endif /* _SC_TYPES_H */
-- 
cgit v1.2.3


From 15e1f2bc8b3b2d238b9e06b128d4a09d28f11733 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Sun, 7 Oct 2018 21:04:43 +0800
Subject: firmware: imx: add misc svc support

Add SCU MISC SVC support which provides misc control get/set functions.

Cc: Shawn Guo <shawnguo@kernel.org>
Reviewed-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
 drivers/firmware/imx/Makefile         |  2 +-
 drivers/firmware/imx/misc.c           | 99 +++++++++++++++++++++++++++++++++++
 include/linux/firmware/imx/sci.h      |  1 +
 include/linux/firmware/imx/svc/misc.h | 55 +++++++++++++++++++
 4 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 drivers/firmware/imx/misc.c
 create mode 100644 include/linux/firmware/imx/svc/misc.h

(limited to 'include/linux')

diff --git a/drivers/firmware/imx/Makefile b/drivers/firmware/imx/Makefile
index 9b1e2febb1aa..0ac04dfda8d4 100644
--- a/drivers/firmware/imx/Makefile
+++ b/drivers/firmware/imx/Makefile
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_IMX_SCU)	+= imx-scu.o
+obj-$(CONFIG_IMX_SCU)	+= imx-scu.o misc.o
diff --git a/drivers/firmware/imx/misc.c b/drivers/firmware/imx/misc.c
new file mode 100644
index 000000000000..97f5424dbac9
--- /dev/null
+++ b/drivers/firmware/imx/misc.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017~2018 NXP
+ *  Author: Dong Aisheng <aisheng.dong@nxp.com>
+ *
+ * File containing client-side RPC functions for the MISC service. These
+ * function are ported to clients that communicate to the SC.
+ *
+ */
+
+#include <linux/firmware/imx/svc/misc.h>
+
+struct imx_sc_msg_req_misc_set_ctrl {
+	struct imx_sc_rpc_msg hdr;
+	u32 ctrl;
+	u32 val;
+	u16 resource;
+} __packed;
+
+struct imx_sc_msg_req_misc_get_ctrl {
+	struct imx_sc_rpc_msg hdr;
+	u32 ctrl;
+	u16 resource;
+} __packed;
+
+struct imx_sc_msg_resp_misc_get_ctrl {
+	struct imx_sc_rpc_msg hdr;
+	u32 val;
+} __packed;
+
+/*
+ * This function sets a miscellaneous control value.
+ *
+ * @param[in]     ipc         IPC handle
+ * @param[in]     resource    resource the control is associated with
+ * @param[in]     ctrl        control to change
+ * @param[in]     val         value to apply to the control
+ *
+ * @return Returns 0 for success and < 0 for errors.
+ */
+
+int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, u32 resource,
+			    u8 ctrl, u32 val)
+{
+	struct imx_sc_msg_req_misc_set_ctrl msg;
+	struct imx_sc_rpc_msg *hdr = &msg.hdr;
+
+	hdr->ver = IMX_SC_RPC_VERSION;
+	hdr->svc = (uint8_t)IMX_SC_RPC_SVC_MISC;
+	hdr->func = (uint8_t)IMX_SC_MISC_FUNC_SET_CONTROL;
+	hdr->size = 4;
+
+	msg.ctrl = ctrl;
+	msg.val = val;
+	msg.resource = resource;
+
+	return imx_scu_call_rpc(ipc, &msg, true);
+}
+EXPORT_SYMBOL(imx_sc_misc_set_control);
+
+/*
+ * This function gets a miscellaneous control value.
+ *
+ * @param[in]     ipc         IPC handle
+ * @param[in]     resource    resource the control is associated with
+ * @param[in]     ctrl        control to get
+ * @param[out]    val         pointer to return the control value
+ *
+ * @return Returns 0 for success and < 0 for errors.
+ */
+
+int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, u32 resource,
+			    u8 ctrl, u32 *val)
+{
+	struct imx_sc_msg_req_misc_get_ctrl msg;
+	struct imx_sc_msg_resp_misc_get_ctrl *resp;
+	struct imx_sc_rpc_msg *hdr = &msg.hdr;
+	int ret;
+
+	hdr->ver = IMX_SC_RPC_VERSION;
+	hdr->svc = (uint8_t)IMX_SC_RPC_SVC_MISC;
+	hdr->func = (uint8_t)IMX_SC_MISC_FUNC_GET_CONTROL;
+	hdr->size = 3;
+
+	msg.ctrl = ctrl;
+	msg.resource = resource;
+
+	ret = imx_scu_call_rpc(ipc, &msg, true);
+	if (ret)
+		return ret;
+
+	resp = (struct imx_sc_msg_resp_misc_get_ctrl *)&msg;
+	if (val != NULL)
+		*val = resp->val;
+
+	return 0;
+}
+EXPORT_SYMBOL(imx_sc_misc_get_control);
diff --git a/include/linux/firmware/imx/sci.h b/include/linux/firmware/imx/sci.h
index ff3227ad8d7c..29ada609de03 100644
--- a/include/linux/firmware/imx/sci.h
+++ b/include/linux/firmware/imx/sci.h
@@ -13,4 +13,5 @@
 #include <linux/firmware/imx/ipc.h>
 #include <linux/firmware/imx/types.h>
 
+#include <linux/firmware/imx/svc/misc.h>
 #endif /* _SC_SCI_H */
diff --git a/include/linux/firmware/imx/svc/misc.h b/include/linux/firmware/imx/svc/misc.h
new file mode 100644
index 000000000000..e21c49aba92f
--- /dev/null
+++ b/include/linux/firmware/imx/svc/misc.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2016 Freescale Semiconductor, Inc.
+ * Copyright 2017~2018 NXP
+ *
+ * Header file containing the public API for the System Controller (SC)
+ * Miscellaneous (MISC) function.
+ *
+ * MISC_SVC (SVC) Miscellaneous Service
+ *
+ * Module for the Miscellaneous (MISC) service.
+ */
+
+#ifndef _SC_MISC_API_H
+#define _SC_MISC_API_H
+
+#include <linux/firmware/imx/sci.h>
+
+/*
+ * This type is used to indicate RPC MISC function calls.
+ */
+enum imx_misc_func {
+	IMX_SC_MISC_FUNC_UNKNOWN = 0,
+	IMX_SC_MISC_FUNC_SET_CONTROL = 1,
+	IMX_SC_MISC_FUNC_GET_CONTROL = 2,
+	IMX_SC_MISC_FUNC_SET_MAX_DMA_GROUP = 4,
+	IMX_SC_MISC_FUNC_SET_DMA_GROUP = 5,
+	IMX_SC_MISC_FUNC_SECO_IMAGE_LOAD = 8,
+	IMX_SC_MISC_FUNC_SECO_AUTHENTICATE = 9,
+	IMX_SC_MISC_FUNC_DEBUG_OUT = 10,
+	IMX_SC_MISC_FUNC_WAVEFORM_CAPTURE = 6,
+	IMX_SC_MISC_FUNC_BUILD_INFO = 15,
+	IMX_SC_MISC_FUNC_UNIQUE_ID = 19,
+	IMX_SC_MISC_FUNC_SET_ARI = 3,
+	IMX_SC_MISC_FUNC_BOOT_STATUS = 7,
+	IMX_SC_MISC_FUNC_BOOT_DONE = 14,
+	IMX_SC_MISC_FUNC_OTP_FUSE_READ = 11,
+	IMX_SC_MISC_FUNC_OTP_FUSE_WRITE = 17,
+	IMX_SC_MISC_FUNC_SET_TEMP = 12,
+	IMX_SC_MISC_FUNC_GET_TEMP = 13,
+	IMX_SC_MISC_FUNC_GET_BOOT_DEV = 16,
+	IMX_SC_MISC_FUNC_GET_BUTTON_STATUS = 18,
+};
+
+/*
+ * Control Functions
+ */
+
+int imx_sc_misc_set_control(struct imx_sc_ipc *ipc, u32 resource,
+			    u8 ctrl, u32 val);
+
+int imx_sc_misc_get_control(struct imx_sc_ipc *ipc, u32 resource,
+			    u8 ctrl, u32 *val);
+
+#endif /* _SC_MISC_API_H */
-- 
cgit v1.2.3


From 4a19edb60d0203cd5bf95a8b46ea8f63fd41194c Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 7 Oct 2018 20:16:22 -0700
Subject: netlink: Pass extack to dump handlers

Declare extack in netlink_dump and pass to dump handlers via
netlink_callback. Add any extack message after the dump_done_errno
allowing error messages to be returned. This will be useful when
strict checking is done on dump requests, returning why the dump
fails EINVAL.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  1 +
 net/netlink/af_netlink.c | 12 +++++++++++-
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 71f121b66ca8..88c8a2d83eb3 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -176,6 +176,7 @@ struct netlink_callback {
 	void			*data;
 	/* the module that dump function belong to */
 	struct module		*module;
+	struct netlink_ext_ack	*extack;
 	u16			family;
 	u16			min_dump_alloc;
 	unsigned int		prev_seq, seq;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e3a0538ec0be..7ac585f33a9e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2171,6 +2171,7 @@ EXPORT_SYMBOL(__nlmsg_put);
 static int netlink_dump(struct sock *sk)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
+	struct netlink_ext_ack extack = {};
 	struct netlink_callback *cb;
 	struct sk_buff *skb = NULL;
 	struct nlmsghdr *nlh;
@@ -2222,8 +2223,11 @@ static int netlink_dump(struct sock *sk)
 	skb_reserve(skb, skb_tailroom(skb) - alloc_size);
 	netlink_skb_set_owner_r(skb, sk);
 
-	if (nlk->dump_done_errno > 0)
+	if (nlk->dump_done_errno > 0) {
+		cb->extack = &extack;
 		nlk->dump_done_errno = cb->dump(skb, cb);
+		cb->extack = NULL;
+	}
 
 	if (nlk->dump_done_errno > 0 ||
 	    skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
@@ -2246,6 +2250,12 @@ static int netlink_dump(struct sock *sk)
 	memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
 	       sizeof(nlk->dump_done_errno));
 
+	if (extack._msg && nlk->flags & NETLINK_F_EXT_ACK) {
+		nlh->nlmsg_flags |= NLM_F_ACK_TLVS;
+		if (!nla_put_string(skb, NLMSGERR_ATTR_MSG, extack._msg))
+			nlmsg_end(skb, nlh);
+	}
+
 	if (sk_filter(sk, skb))
 		kfree_skb(skb);
 	else
-- 
cgit v1.2.3


From 89d35528d17d25819a755a2b52931e911baebc66 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 7 Oct 2018 20:16:27 -0700
Subject: netlink: Add new socket option to enable strict checking on dumps

Add a new socket option, NETLINK_DUMP_STRICT_CHK, that userspace
can use via setsockopt to request strict checking of headers and
attributes on dump requests.

To get dump features such as kernel side filtering based on data in
the header or attributes appended to the dump request, userspace
must call setsockopt() for NETLINK_DUMP_STRICT_CHK and a non-zero
value. Since the netlink sock and its flags are private to the
af_netlink code, the strict checking flag is passed to dump handlers
via a flag in the netlink_callback struct.

For old userspace on new kernel there is no impact as all of the data
checks in later patches are wrapped in a check on the new strict flag.

For new userspace on old kernel, the setsockopt will fail and even if
new userspace sets data in the headers and appended attributes the
kernel will silently ignore it. Moving forward when the setsockopt
succeeds, the new userspace on old kernel means the dump request can
pass an attribute the kernel does not understand. The dump will then
fail as the older kernel does not understand it.

New userspace on new kernel setting the socket option gets the benefit
of the improved data dump.

Kernel side the NETLINK_DUMP_STRICT_CHK uapi is converted to a generic
NETLINK_F_STRICT_CHK flag which can potentially be leveraged for tighter
checking on the NEW, DEL, and SET commands.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Christian Brauner <christian@brauner.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h      |  1 +
 include/uapi/linux/netlink.h |  1 +
 net/netlink/af_netlink.c     | 21 ++++++++++++++++++++-
 net/netlink/af_netlink.h     |  1 +
 4 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 88c8a2d83eb3..72580f1a72a2 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -179,6 +179,7 @@ struct netlink_callback {
 	struct netlink_ext_ack	*extack;
 	u16			family;
 	u16			min_dump_alloc;
+	bool			strict_check;
 	unsigned int		prev_seq, seq;
 	long			args[6];
 };
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 776bc92e9118..486ed1f0c0bc 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -155,6 +155,7 @@ enum nlmsgerr_attrs {
 #define NETLINK_LIST_MEMBERSHIPS	9
 #define NETLINK_CAP_ACK			10
 #define NETLINK_EXT_ACK			11
+#define NETLINK_DUMP_STRICT_CHK		12
 
 struct nl_pktinfo {
 	__u32	group;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7ac585f33a9e..e613a9f89600 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1706,6 +1706,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 			nlk->flags &= ~NETLINK_F_EXT_ACK;
 		err = 0;
 		break;
+	case NETLINK_DUMP_STRICT_CHK:
+		if (val)
+			nlk->flags |= NETLINK_F_STRICT_CHK;
+		else
+			nlk->flags &= ~NETLINK_F_STRICT_CHK;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -1799,6 +1806,15 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
 			return -EFAULT;
 		err = 0;
 		break;
+	case NETLINK_DUMP_STRICT_CHK:
+		if (len < sizeof(int))
+			return -EINVAL;
+		len = sizeof(int);
+		val = nlk->flags & NETLINK_F_STRICT_CHK ? 1 : 0;
+		if (put_user(len, optlen) || put_user(val, optval))
+			return -EFAULT;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -2282,9 +2298,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 			 const struct nlmsghdr *nlh,
 			 struct netlink_dump_control *control)
 {
+	struct netlink_sock *nlk, *nlk2;
 	struct netlink_callback *cb;
 	struct sock *sk;
-	struct netlink_sock *nlk;
 	int ret;
 
 	refcount_inc(&skb->users);
@@ -2318,6 +2334,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
 	cb->min_dump_alloc = control->min_dump_alloc;
 	cb->skb = skb;
 
+	nlk2 = nlk_sk(NETLINK_CB(skb).sk);
+	cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK);
+
 	if (control->start) {
 		ret = control->start(cb);
 		if (ret)
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 962de7b3c023..5f454c8de6a4 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -15,6 +15,7 @@
 #define NETLINK_F_LISTEN_ALL_NSID	0x10
 #define NETLINK_F_CAP_ACK		0x20
 #define NETLINK_F_EXT_ACK		0x40
+#define NETLINK_F_STRICT_CHK		0x80
 
 #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
 #define NLGRPLONGS(x)	(NLGRPSZ(x)/sizeof(unsigned long))
-- 
cgit v1.2.3


From 5390a8df769ec9ba9c995191bb0867430f602ebb Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 11 Sep 2018 18:40:06 +0300
Subject: mtd: spi-nor: add support to non-uniform SFDP SPI NOR flash memories

Based on Cyrille Pitchen's patch https://lkml.org/lkml/2017/3/22/935.

This patch is a transitional patch in introducing  the support of
SFDP SPI memories with non-uniform erase sizes like Spansion s25fs512s.
Non-uniform erase maps will be used later when initialized based on the
SFDP data.

Introduce the memory erase map which splits the memory array into one
or many erase regions. Each erase region supports up to 4 erase types,
as defined by the JEDEC JESD216B (SFDP) specification.

To be backward compatible, the erase map of uniform SPI NOR flash memories
is initialized so it contains only one erase region and this erase region
supports only one erase command. Hence a single size is used to erase any
sector/block of the memory.

Besides, since the algorithm used to erase sectors on non-uniform SPI NOR
flash memories is quite expensive, when possible, the erase map is tuned
to come back to the uniform case.

The 'erase with the best command, move forward and repeat' approach was
suggested by Cristian Birsan in a brainstorm session, so:

Suggested-by: Cristian Birsan <cristian.birsan@microchip.com>
Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 599 +++++++++++++++++++++++++++++++++++++++---
 include/linux/mtd/spi-nor.h   | 107 ++++++++
 2 files changed, 664 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index f028277fb1ce..c53885971b67 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -18,6 +18,7 @@
 #include <linux/math64.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
+#include <linux/sort.h>
 
 #include <linux/mtd/mtd.h>
 #include <linux/of_platform.h>
@@ -260,6 +261,18 @@ static void spi_nor_set_4byte_opcodes(struct spi_nor *nor,
 	nor->read_opcode = spi_nor_convert_3to4_read(nor->read_opcode);
 	nor->program_opcode = spi_nor_convert_3to4_program(nor->program_opcode);
 	nor->erase_opcode = spi_nor_convert_3to4_erase(nor->erase_opcode);
+
+	if (!spi_nor_has_uniform_erase(nor)) {
+		struct spi_nor_erase_map *map = &nor->erase_map;
+		struct spi_nor_erase_type *erase;
+		int i;
+
+		for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) {
+			erase = &map->erase_type[i];
+			erase->opcode =
+				spi_nor_convert_3to4_erase(erase->opcode);
+		}
+	}
 }
 
 /* Enable/disable 4-byte addressing mode. */
@@ -497,6 +510,277 @@ static int spi_nor_erase_sector(struct spi_nor *nor, u32 addr)
 	return nor->write_reg(nor, nor->erase_opcode, buf, nor->addr_width);
 }
 
+/**
+ * spi_nor_div_by_erase_size() - calculate remainder and update new dividend
+ * @erase:	pointer to a structure that describes a SPI NOR erase type
+ * @dividend:	dividend value
+ * @remainder:	pointer to u32 remainder (will be updated)
+ *
+ * Return: the result of the division
+ */
+static u64 spi_nor_div_by_erase_size(const struct spi_nor_erase_type *erase,
+				     u64 dividend, u32 *remainder)
+{
+	/* JEDEC JESD216B Standard imposes erase sizes to be power of 2. */
+	*remainder = (u32)dividend & erase->size_mask;
+	return dividend >> erase->size_shift;
+}
+
+/**
+ * spi_nor_find_best_erase_type() - find the best erase type for the given
+ *				    offset in the serial flash memory and the
+ *				    number of bytes to erase. The region in
+ *				    which the address fits is expected to be
+ *				    provided.
+ * @map:	the erase map of the SPI NOR
+ * @region:	pointer to a structure that describes a SPI NOR erase region
+ * @addr:	offset in the serial flash memory
+ * @len:	number of bytes to erase
+ *
+ * Return: a pointer to the best fitted erase type, NULL otherwise.
+ */
+static const struct spi_nor_erase_type *
+spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map,
+			     const struct spi_nor_erase_region *region,
+			     u64 addr, u32 len)
+{
+	const struct spi_nor_erase_type *erase;
+	u32 rem;
+	int i;
+	u8 erase_mask = region->offset & SNOR_ERASE_TYPE_MASK;
+
+	/*
+	 * Erase types are ordered by size, with the biggest erase type at
+	 * index 0.
+	 */
+	for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
+		/* Does the erase region support the tested erase type? */
+		if (!(erase_mask & BIT(i)))
+			continue;
+
+		erase = &map->erase_type[i];
+
+		/* Don't erase more than what the user has asked for. */
+		if (erase->size > len)
+			continue;
+
+		/* Alignment is not mandatory for overlaid regions */
+		if (region->offset & SNOR_OVERLAID_REGION)
+			return erase;
+
+		spi_nor_div_by_erase_size(erase, addr, &rem);
+		if (rem)
+			continue;
+		else
+			return erase;
+	}
+
+	return NULL;
+}
+
+/**
+ * spi_nor_region_next() - get the next spi nor region
+ * @region:	pointer to a structure that describes a SPI NOR erase region
+ *
+ * Return: the next spi nor region or NULL if last region.
+ */
+static struct spi_nor_erase_region *
+spi_nor_region_next(struct spi_nor_erase_region *region)
+{
+	if (spi_nor_region_is_last(region))
+		return NULL;
+	region++;
+	return region;
+}
+
+/**
+ * spi_nor_find_erase_region() - find the region of the serial flash memory in
+ *				 which the offset fits
+ * @map:	the erase map of the SPI NOR
+ * @addr:	offset in the serial flash memory
+ *
+ * Return: a pointer to the spi_nor_erase_region struct, ERR_PTR(-errno)
+ *	   otherwise.
+ */
+static struct spi_nor_erase_region *
+spi_nor_find_erase_region(const struct spi_nor_erase_map *map, u64 addr)
+{
+	struct spi_nor_erase_region *region = map->regions;
+	u64 region_start = region->offset & ~SNOR_ERASE_FLAGS_MASK;
+	u64 region_end = region_start + region->size;
+
+	while (addr < region_start || addr >= region_end) {
+		region = spi_nor_region_next(region);
+		if (!region)
+			return ERR_PTR(-EINVAL);
+
+		region_start = region->offset & ~SNOR_ERASE_FLAGS_MASK;
+		region_end = region_start + region->size;
+	}
+
+	return region;
+}
+
+/**
+ * spi_nor_init_erase_cmd() - initialize an erase command
+ * @region:	pointer to a structure that describes a SPI NOR erase region
+ * @erase:	pointer to a structure that describes a SPI NOR erase type
+ *
+ * Return: the pointer to the allocated erase command, ERR_PTR(-errno)
+ *	   otherwise.
+ */
+static struct spi_nor_erase_command *
+spi_nor_init_erase_cmd(const struct spi_nor_erase_region *region,
+		       const struct spi_nor_erase_type *erase)
+{
+	struct spi_nor_erase_command *cmd;
+
+	cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&cmd->list);
+	cmd->opcode = erase->opcode;
+	cmd->count = 1;
+
+	if (region->offset & SNOR_OVERLAID_REGION)
+		cmd->size = region->size;
+	else
+		cmd->size = erase->size;
+
+	return cmd;
+}
+
+/**
+ * spi_nor_destroy_erase_cmd_list() - destroy erase command list
+ * @erase_list:	list of erase commands
+ */
+static void spi_nor_destroy_erase_cmd_list(struct list_head *erase_list)
+{
+	struct spi_nor_erase_command *cmd, *next;
+
+	list_for_each_entry_safe(cmd, next, erase_list, list) {
+		list_del(&cmd->list);
+		kfree(cmd);
+	}
+}
+
+/**
+ * spi_nor_init_erase_cmd_list() - initialize erase command list
+ * @nor:	pointer to a 'struct spi_nor'
+ * @erase_list:	list of erase commands to be executed once we validate that the
+ *		erase can be performed
+ * @addr:	offset in the serial flash memory
+ * @len:	number of bytes to erase
+ *
+ * Builds the list of best fitted erase commands and verifies if the erase can
+ * be performed.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_init_erase_cmd_list(struct spi_nor *nor,
+				       struct list_head *erase_list,
+				       u64 addr, u32 len)
+{
+	const struct spi_nor_erase_map *map = &nor->erase_map;
+	const struct spi_nor_erase_type *erase, *prev_erase = NULL;
+	struct spi_nor_erase_region *region;
+	struct spi_nor_erase_command *cmd = NULL;
+	u64 region_end;
+	int ret = -EINVAL;
+
+	region = spi_nor_find_erase_region(map, addr);
+	if (IS_ERR(region))
+		return PTR_ERR(region);
+
+	region_end = spi_nor_region_end(region);
+
+	while (len) {
+		erase = spi_nor_find_best_erase_type(map, region, addr, len);
+		if (!erase)
+			goto destroy_erase_cmd_list;
+
+		if (prev_erase != erase ||
+		    region->offset & SNOR_OVERLAID_REGION) {
+			cmd = spi_nor_init_erase_cmd(region, erase);
+			if (IS_ERR(cmd)) {
+				ret = PTR_ERR(cmd);
+				goto destroy_erase_cmd_list;
+			}
+
+			list_add_tail(&cmd->list, erase_list);
+		} else {
+			cmd->count++;
+		}
+
+		addr += cmd->size;
+		len -= cmd->size;
+
+		if (len && addr >= region_end) {
+			region = spi_nor_region_next(region);
+			if (!region)
+				goto destroy_erase_cmd_list;
+			region_end = spi_nor_region_end(region);
+		}
+
+		prev_erase = erase;
+	}
+
+	return 0;
+
+destroy_erase_cmd_list:
+	spi_nor_destroy_erase_cmd_list(erase_list);
+	return ret;
+}
+
+/**
+ * spi_nor_erase_multi_sectors() - perform a non-uniform erase
+ * @nor:	pointer to a 'struct spi_nor'
+ * @addr:	offset in the serial flash memory
+ * @len:	number of bytes to erase
+ *
+ * Build a list of best fitted erase commands and execute it once we validate
+ * that the erase can be performed.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_erase_multi_sectors(struct spi_nor *nor, u64 addr, u32 len)
+{
+	LIST_HEAD(erase_list);
+	struct spi_nor_erase_command *cmd, *next;
+	int ret;
+
+	ret = spi_nor_init_erase_cmd_list(nor, &erase_list, addr, len);
+	if (ret)
+		return ret;
+
+	list_for_each_entry_safe(cmd, next, &erase_list, list) {
+		nor->erase_opcode = cmd->opcode;
+		while (cmd->count) {
+			write_enable(nor);
+
+			ret = spi_nor_erase_sector(nor, addr);
+			if (ret)
+				goto destroy_erase_cmd_list;
+
+			addr += cmd->size;
+			cmd->count--;
+
+			ret = spi_nor_wait_till_ready(nor);
+			if (ret)
+				goto destroy_erase_cmd_list;
+		}
+		list_del(&cmd->list);
+		kfree(cmd);
+	}
+
+	return 0;
+
+destroy_erase_cmd_list:
+	spi_nor_destroy_erase_cmd_list(&erase_list);
+	return ret;
+}
+
 /*
  * Erase an address range on the nor chip.  The address range may extend
  * one or more erase sectors.  Return an error is there is a problem erasing.
@@ -511,9 +795,11 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 	dev_dbg(nor->dev, "at 0x%llx, len %lld\n", (long long)instr->addr,
 			(long long)instr->len);
 
-	div_u64_rem(instr->len, mtd->erasesize, &rem);
-	if (rem)
-		return -EINVAL;
+	if (spi_nor_has_uniform_erase(nor)) {
+		div_u64_rem(instr->len, mtd->erasesize, &rem);
+		if (rem)
+			return -EINVAL;
+	}
 
 	addr = instr->addr;
 	len = instr->len;
@@ -552,7 +838,7 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 	 */
 
 	/* "sector"-at-a-time erase */
-	} else {
+	} else if (spi_nor_has_uniform_erase(nor)) {
 		while (len) {
 			write_enable(nor);
 
@@ -567,6 +853,12 @@ static int spi_nor_erase(struct mtd_info *mtd, struct erase_info *instr)
 			if (ret)
 				goto erase_err;
 		}
+
+	/* erase multiple sectors */
+	} else {
+		ret = spi_nor_erase_multi_sectors(nor, addr, len);
+		if (ret)
+			goto erase_err;
 	}
 
 	write_disable(nor);
@@ -2165,6 +2457,116 @@ static const struct sfdp_bfpt_erase sfdp_bfpt_erases[] = {
 
 static int spi_nor_hwcaps_read2cmd(u32 hwcaps);
 
+/**
+ * spi_nor_set_erase_type() - set a SPI NOR erase type
+ * @erase:	pointer to a structure that describes a SPI NOR erase type
+ * @size:	the size of the sector/block erased by the erase type
+ * @opcode:	the SPI command op code to erase the sector/block
+ */
+static void spi_nor_set_erase_type(struct spi_nor_erase_type *erase,
+				   u32 size, u8 opcode)
+{
+	erase->size = size;
+	erase->opcode = opcode;
+	/* JEDEC JESD216B Standard imposes erase sizes to be power of 2. */
+	erase->size_shift = ffs(erase->size) - 1;
+	erase->size_mask = (1 << erase->size_shift) - 1;
+}
+
+/**
+ * spi_nor_set_erase_settings_from_bfpt() - set erase type settings from BFPT
+ * @erase:	pointer to a structure that describes a SPI NOR erase type
+ * @size:	the size of the sector/block erased by the erase type
+ * @opcode:	the SPI command op code to erase the sector/block
+ * @i:		erase type index as sorted in the Basic Flash Parameter Table
+ *
+ * The supported Erase Types will be sorted at init in ascending order, with
+ * the smallest Erase Type size being the first member in the erase_type array
+ * of the spi_nor_erase_map structure. Save the Erase Type index as sorted in
+ * the Basic Flash Parameter Table since it will be used later on to
+ * synchronize with the supported Erase Types defined in SFDP optional tables.
+ */
+static void
+spi_nor_set_erase_settings_from_bfpt(struct spi_nor_erase_type *erase,
+				     u32 size, u8 opcode, u8 i)
+{
+	erase->idx = i;
+	spi_nor_set_erase_type(erase, size, opcode);
+}
+
+/**
+ * spi_nor_map_cmp_erase_type() - compare the map's erase types by size
+ * @l:	member in the left half of the map's erase_type array
+ * @r:	member in the right half of the map's erase_type array
+ *
+ * Comparison function used in the sort() call to sort in ascending order the
+ * map's erase types, the smallest erase type size being the first member in the
+ * sorted erase_type array.
+ *
+ * Return: the result of @l->size - @r->size
+ */
+static int spi_nor_map_cmp_erase_type(const void *l, const void *r)
+{
+	const struct spi_nor_erase_type *left = l, *right = r;
+
+	return left->size - right->size;
+}
+
+/**
+ * spi_nor_regions_sort_erase_types() - sort erase types in each region
+ * @map:	the erase map of the SPI NOR
+ *
+ * Function assumes that the erase types defined in the erase map are already
+ * sorted in ascending order, with the smallest erase type size being the first
+ * member in the erase_type array. It replicates the sort done for the map's
+ * erase types. Each region's erase bitmask will indicate which erase types are
+ * supported from the sorted erase types defined in the erase map.
+ * Sort the all region's erase type at init in order to speed up the process of
+ * finding the best erase command at runtime.
+ */
+static void spi_nor_regions_sort_erase_types(struct spi_nor_erase_map *map)
+{
+	struct spi_nor_erase_region *region = map->regions;
+	struct spi_nor_erase_type *erase_type = map->erase_type;
+	int i;
+	u8 region_erase_mask, sorted_erase_mask;
+
+	while (region) {
+		region_erase_mask = region->offset & SNOR_ERASE_TYPE_MASK;
+
+		/* Replicate the sort done for the map's erase types. */
+		sorted_erase_mask = 0;
+		for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++)
+			if (erase_type[i].size &&
+			    region_erase_mask & BIT(erase_type[i].idx))
+				sorted_erase_mask |= BIT(i);
+
+		/* Overwrite erase mask. */
+		region->offset = (region->offset & ~SNOR_ERASE_TYPE_MASK) |
+				 sorted_erase_mask;
+
+		region = spi_nor_region_next(region);
+	}
+}
+
+/**
+ * spi_nor_init_uniform_erase_map() - Initialize uniform erase map
+ * @map:		the erase map of the SPI NOR
+ * @erase_mask:		bitmask encoding erase types that can erase the entire
+ *			flash memory
+ * @flash_size:		the spi nor flash memory size
+ */
+static void spi_nor_init_uniform_erase_map(struct spi_nor_erase_map *map,
+					   u8 erase_mask, u64 flash_size)
+{
+	/* Offset 0 with erase_mask and SNOR_LAST_REGION bit set */
+	map->uniform_region.offset = (erase_mask & SNOR_ERASE_TYPE_MASK) |
+				     SNOR_LAST_REGION;
+	map->uniform_region.size = flash_size;
+	map->regions = &map->uniform_region;
+	map->uniform_erase_type = erase_mask;
+}
+
 /**
  * spi_nor_parse_bfpt() - read and parse the Basic Flash Parameter Table.
  * @nor:		pointer to a 'struct spi_nor'
@@ -2199,12 +2601,14 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 			      const struct sfdp_parameter_header *bfpt_header,
 			      struct spi_nor_flash_parameter *params)
 {
-	struct mtd_info *mtd = &nor->mtd;
+	struct spi_nor_erase_map *map = &nor->erase_map;
+	struct spi_nor_erase_type *erase_type = map->erase_type;
 	struct sfdp_bfpt bfpt;
 	size_t len;
 	int i, cmd, err;
 	u32 addr;
 	u16 half;
+	u8 erase_mask;
 
 	/* JESD216 Basic Flash Parameter Table length is at least 9 DWORDs. */
 	if (bfpt_header->length < BFPT_DWORD_MAX_JESD216)
@@ -2273,7 +2677,12 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 		spi_nor_set_read_settings_from_bfpt(read, half, rd->proto);
 	}
 
-	/* Sector Erase settings. */
+	/*
+	 * Sector Erase settings. Reinitialize the uniform erase map using the
+	 * Erase Types defined in the bfpt table.
+	 */
+	erase_mask = 0;
+	memset(&nor->erase_map, 0, sizeof(nor->erase_map));
 	for (i = 0; i < ARRAY_SIZE(sfdp_bfpt_erases); i++) {
 		const struct sfdp_bfpt_erase *er = &sfdp_bfpt_erases[i];
 		u32 erasesize;
@@ -2288,18 +2697,25 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 
 		erasesize = 1U << erasesize;
 		opcode = (half >> 8) & 0xff;
-#ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
-		if (erasesize == SZ_4K) {
-			nor->erase_opcode = opcode;
-			mtd->erasesize = erasesize;
-			break;
-		}
-#endif
-		if (!mtd->erasesize || mtd->erasesize < erasesize) {
-			nor->erase_opcode = opcode;
-			mtd->erasesize = erasesize;
-		}
+		erase_mask |= BIT(i);
+		spi_nor_set_erase_settings_from_bfpt(&erase_type[i], erasesize,
+						     opcode, i);
 	}
+	spi_nor_init_uniform_erase_map(map, erase_mask, params->size);
+	/*
+	 * Sort all the map's Erase Types in ascending order with the smallest
+	 * erase size being the first member in the erase_type array.
+	 */
+	sort(erase_type, SNOR_ERASE_TYPE_MAX, sizeof(erase_type[0]),
+	     spi_nor_map_cmp_erase_type, NULL);
+	/*
+	 * Sort the erase types in the uniform region in order to update the
+	 * uniform_erase_type bitmask. The bitmask will be used later on when
+	 * selecting the uniform erase.
+	 */
+	spi_nor_regions_sort_erase_types(map);
+	map->uniform_erase_type = map->uniform_region.offset &
+				  SNOR_ERASE_TYPE_MASK;
 
 	/* Stop here if not JESD216 rev A or later. */
 	if (bfpt_header->length < BFPT_DWORD_MAX)
@@ -2455,6 +2871,9 @@ static int spi_nor_init_params(struct spi_nor *nor,
 			       const struct flash_info *info,
 			       struct spi_nor_flash_parameter *params)
 {
+	struct spi_nor_erase_map *map = &nor->erase_map;
+	u8 i, erase_mask;
+
 	/* Set legacy flash parameters as default. */
 	memset(params, 0, sizeof(*params));
 
@@ -2494,6 +2913,28 @@ static int spi_nor_init_params(struct spi_nor *nor,
 	spi_nor_set_pp_settings(&params->page_programs[SNOR_CMD_PP],
 				SPINOR_OP_PP, SNOR_PROTO_1_1_1);
 
+	/*
+	 * Sector Erase settings. Sort Erase Types in ascending order, with the
+	 * smallest erase size starting at BIT(0).
+	 */
+	erase_mask = 0;
+	i = 0;
+	if (info->flags & SECT_4K_PMC) {
+		erase_mask |= BIT(i);
+		spi_nor_set_erase_type(&map->erase_type[i], 4096u,
+				       SPINOR_OP_BE_4K_PMC);
+		i++;
+	} else if (info->flags & SECT_4K) {
+		erase_mask |= BIT(i);
+		spi_nor_set_erase_type(&map->erase_type[i], 4096u,
+				       SPINOR_OP_BE_4K);
+		i++;
+	}
+	erase_mask |= BIT(i);
+	spi_nor_set_erase_type(&map->erase_type[i], info->sector_size,
+			       SPINOR_OP_SE);
+	spi_nor_init_uniform_erase_map(map, erase_mask, params->size);
+
 	/* Select the procedure to set the Quad Enable bit. */
 	if (params->hwcaps.mask & (SNOR_HWCAPS_READ_QUAD |
 				   SNOR_HWCAPS_PP_QUAD)) {
@@ -2521,20 +2962,20 @@ static int spi_nor_init_params(struct spi_nor *nor,
 			params->quad_enable = info->quad_enable;
 	}
 
-	/* Override the parameters with data read from SFDP tables. */
-	nor->addr_width = 0;
-	nor->mtd.erasesize = 0;
 	if ((info->flags & (SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ)) &&
 	    !(info->flags & SPI_NOR_SKIP_SFDP)) {
 		struct spi_nor_flash_parameter sfdp_params;
+		struct spi_nor_erase_map prev_map;
 
 		memcpy(&sfdp_params, params, sizeof(sfdp_params));
-		if (spi_nor_parse_sfdp(nor, &sfdp_params)) {
-			nor->addr_width = 0;
-			nor->mtd.erasesize = 0;
-		} else {
+		memcpy(&prev_map, &nor->erase_map, sizeof(prev_map));
+
+		if (spi_nor_parse_sfdp(nor, &sfdp_params))
+			/* restore previous erase map */
+			memcpy(&nor->erase_map, &prev_map,
+			       sizeof(nor->erase_map));
+		else
 			memcpy(params, &sfdp_params, sizeof(*params));
-		}
 	}
 
 	return 0;
@@ -2643,29 +3084,103 @@ static int spi_nor_select_pp(struct spi_nor *nor,
 	return 0;
 }
 
-static int spi_nor_select_erase(struct spi_nor *nor,
-				const struct flash_info *info)
+/**
+ * spi_nor_select_uniform_erase() - select optimum uniform erase type
+ * @map:		the erase map of the SPI NOR
+ * @wanted_size:	the erase type size to search for. Contains the value of
+ *			info->sector_size or of the "small sector" size in case
+ *			CONFIG_MTD_SPI_NOR_USE_4K_SECTORS is defined.
+ *
+ * Once the optimum uniform sector erase command is found, disable all the
+ * other.
+ *
+ * Return: pointer to erase type on success, NULL otherwise.
+ */
+static const struct spi_nor_erase_type *
+spi_nor_select_uniform_erase(struct spi_nor_erase_map *map,
+			     const u32 wanted_size)
 {
-	struct mtd_info *mtd = &nor->mtd;
+	const struct spi_nor_erase_type *tested_erase, *erase = NULL;
+	int i;
+	u8 uniform_erase_type = map->uniform_erase_type;
 
-	/* Do nothing if already configured from SFDP. */
-	if (mtd->erasesize)
-		return 0;
+	for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
+		if (!(uniform_erase_type & BIT(i)))
+			continue;
+
+		tested_erase = &map->erase_type[i];
+
+		/*
+		 * If the current erase size is the one, stop here:
+		 * we have found the right uniform Sector Erase command.
+		 */
+		if (tested_erase->size == wanted_size) {
+			erase = tested_erase;
+			break;
+		}
 
+		/*
+		 * Otherwise, the current erase size is still a valid canditate.
+		 * Select the biggest valid candidate.
+		 */
+		if (!erase && tested_erase->size)
+			erase = tested_erase;
+			/* keep iterating to find the wanted_size */
+	}
+
+	if (!erase)
+		return NULL;
+
+	/* Disable all other Sector Erase commands. */
+	map->uniform_erase_type &= ~SNOR_ERASE_TYPE_MASK;
+	map->uniform_erase_type |= BIT(erase - map->erase_type);
+	return erase;
+}
+
+static int spi_nor_select_erase(struct spi_nor *nor, u32 wanted_size)
+{
+	struct spi_nor_erase_map *map = &nor->erase_map;
+	const struct spi_nor_erase_type *erase = NULL;
+	struct mtd_info *mtd = &nor->mtd;
+	int i;
+
+	/*
+	 * The previous implementation handling Sector Erase commands assumed
+	 * that the SPI flash memory has an uniform layout then used only one
+	 * of the supported erase sizes for all Sector Erase commands.
+	 * So to be backward compatible, the new implementation also tries to
+	 * manage the SPI flash memory as uniform with a single erase sector
+	 * size, when possible.
+	 */
 #ifdef CONFIG_MTD_SPI_NOR_USE_4K_SECTORS
 	/* prefer "small sector" erase if possible */
-	if (info->flags & SECT_4K) {
-		nor->erase_opcode = SPINOR_OP_BE_4K;
-		mtd->erasesize = 4096;
-	} else if (info->flags & SECT_4K_PMC) {
-		nor->erase_opcode = SPINOR_OP_BE_4K_PMC;
-		mtd->erasesize = 4096;
-	} else
+	wanted_size = 4096u;
 #endif
-	{
-		nor->erase_opcode = SPINOR_OP_SE;
-		mtd->erasesize = info->sector_size;
+
+	if (spi_nor_has_uniform_erase(nor)) {
+		erase = spi_nor_select_uniform_erase(map, wanted_size);
+		if (!erase)
+			return -EINVAL;
+		nor->erase_opcode = erase->opcode;
+		mtd->erasesize = erase->size;
+		return 0;
 	}
+
+	/*
+	 * For non-uniform SPI flash memory, set mtd->erasesize to the
+	 * maximum erase sector size. No need to set nor->erase_opcode.
+	 */
+	for (i = SNOR_ERASE_TYPE_MAX - 1; i >= 0; i--) {
+		if (map->erase_type[i].size) {
+			erase = &map->erase_type[i];
+			break;
+		}
+	}
+
+	if (!erase)
+		return -EINVAL;
+
+	mtd->erasesize = erase->size;
 	return 0;
 }
 
@@ -2712,7 +3227,7 @@ static int spi_nor_setup(struct spi_nor *nor, const struct flash_info *info,
 	}
 
 	/* Select the Sector Erase command. */
-	err = spi_nor_select_erase(nor, info);
+	err = spi_nor_select_erase(nor, info->sector_size);
 	if (err) {
 		dev_err(nor->dev,
 			"can't select erase settings supported by both the SPI controller and memory.\n");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index c922e97f205a..894cbf88bf2e 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -238,6 +238,94 @@ enum spi_nor_option_flags {
 	SNOR_F_BROKEN_RESET	= BIT(6),
 };
 
+/**
+ * struct spi_nor_erase_type - Structure to describe a SPI NOR erase type
+ * @size:		the size of the sector/block erased by the erase type.
+ *			JEDEC JESD216B imposes erase sizes to be a power of 2.
+ * @size_shift:		@size is a power of 2, the shift is stored in
+ *			@size_shift.
+ * @size_mask:		the size mask based on @size_shift.
+ * @opcode:		the SPI command op code to erase the sector/block.
+ * @idx:		Erase Type index as sorted in the Basic Flash Parameter
+ *			Table. It will be used to synchronize the supported
+ *			Erase Types with the ones identified in the SFDP
+ *			optional tables.
+ */
+struct spi_nor_erase_type {
+	u32	size;
+	u32	size_shift;
+	u32	size_mask;
+	u8	opcode;
+	u8	idx;
+};
+
+/**
+ * struct spi_nor_erase_command - Used for non-uniform erases
+ * The structure is used to describe a list of erase commands to be executed
+ * once we validate that the erase can be performed. The elements in the list
+ * are run-length encoded.
+ * @list:		for inclusion into the list of erase commands.
+ * @count:		how many times the same erase command should be
+ *			consecutively used.
+ * @size:		the size of the sector/block erased by the command.
+ * @opcode:		the SPI command op code to erase the sector/block.
+ */
+struct spi_nor_erase_command {
+	struct list_head	list;
+	u32			count;
+	u32			size;
+	u8			opcode;
+};
+
+/**
+ * struct spi_nor_erase_region - Structure to describe a SPI NOR erase region
+ * @offset:		the offset in the data array of erase region start.
+ *			LSB bits are used as a bitmask encoding flags to
+ *			determine if this region is overlaid, if this region is
+ *			the last in the SPI NOR flash memory and to indicate
+ *			all the supported erase commands inside this region.
+ *			The erase types are sorted in ascending order with the
+ *			smallest Erase Type size being at BIT(0).
+ * @size:		the size of the region in bytes.
+ */
+struct spi_nor_erase_region {
+	u64		offset;
+	u64		size;
+};
+
+#define SNOR_ERASE_TYPE_MAX	4
+#define SNOR_ERASE_TYPE_MASK	GENMASK_ULL(SNOR_ERASE_TYPE_MAX - 1, 0)
+
+#define SNOR_LAST_REGION	BIT(4)
+#define SNOR_OVERLAID_REGION	BIT(5)
+
+#define SNOR_ERASE_FLAGS_MAX	6
+#define SNOR_ERASE_FLAGS_MASK	GENMASK_ULL(SNOR_ERASE_FLAGS_MAX - 1, 0)
+
+/**
+ * struct spi_nor_erase_map - Structure to describe the SPI NOR erase map
+ * @regions:		array of erase regions. The regions are consecutive in
+ *			address space. Walking through the regions is done
+ *			incrementally.
+ * @uniform_region:	a pre-allocated erase region for SPI NOR with a uniform
+ *			sector size (legacy implementation).
+ * @erase_type:		an array of erase types shared by all the regions.
+ *			The erase types are sorted in ascending order, with the
+ *			smallest Erase Type size being the first member in the
+ *			erase_type array.
+ * @uniform_erase_type:	bitmask encoding erase types that can erase the
+ *			entire memory. This member is completed at init by
+ *			uniform and non-uniform SPI NOR flash memories if they
+ *			support at least one erase type that can erase the
+ *			entire memory.
+ */
+struct spi_nor_erase_map {
+	struct spi_nor_erase_region	*regions;
+	struct spi_nor_erase_region	uniform_region;
+	struct spi_nor_erase_type	erase_type[SNOR_ERASE_TYPE_MAX];
+	u8				uniform_erase_type;
+};
+
 /**
  * struct flash_info - Forward declaration of a structure used internally by
  *		       spi_nor_scan()
@@ -262,6 +350,7 @@ struct flash_info;
  * @write_proto:	the SPI protocol for write operations
  * @reg_proto		the SPI protocol for read_reg/write_reg/erase operations
  * @cmd_buf:		used by the write_reg
+ * @erase_map:		the erase map of the SPI NOR
  * @prepare:		[OPTIONAL] do some preparations for the
  *			read/write/erase/lock/unlock operations
  * @unprepare:		[OPTIONAL] do some post work after the
@@ -297,6 +386,7 @@ struct spi_nor {
 	bool			sst_write_second;
 	u32			flags;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
+	struct spi_nor_erase_map	erase_map;
 
 	int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops);
 	void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops);
@@ -317,6 +407,23 @@ struct spi_nor {
 	void *priv;
 };
 
+static u64 __maybe_unused
+spi_nor_region_is_last(const struct spi_nor_erase_region *region)
+{
+	return region->offset & SNOR_LAST_REGION;
+}
+
+static u64 __maybe_unused
+spi_nor_region_end(const struct spi_nor_erase_region *region)
+{
+	return (region->offset & ~SNOR_ERASE_FLAGS_MASK) + region->size;
+}
+
+static bool __maybe_unused spi_nor_has_uniform_erase(const struct spi_nor *nor)
+{
+	return !!nor->erase_map.uniform_erase_type;
+}
+
 static inline void spi_nor_set_flash_node(struct spi_nor *nor,
 					  struct device_node *np)
 {
-- 
cgit v1.2.3


From b038e8e3be724dd293a0849f5fe267fdddcca9dd Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 11 Sep 2018 18:40:07 +0300
Subject: mtd: spi-nor: parse SFDP Sector Map Parameter Table

Add support for the SFDP (JESD216B) Sector Map Parameter Table. This
table is optional, but when available, we parse it to identify the
location and size of sectors within the main data array of the
flash memory device and to identify which Erase Types are supported by
each sector.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Reviewed-by: Marek Vasut <marek.vasut@gmail.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 319 +++++++++++++++++++++++++++++++++++++++---
 include/linux/mtd/spi-nor.h   |  12 ++
 2 files changed, 315 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index c53885971b67..7993dd5c0c21 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -2155,6 +2155,36 @@ spi_nor_set_pp_settings(struct spi_nor_pp_command *pp,
  * Serial Flash Discoverable Parameters (SFDP) parsing.
  */
 
+/**
+ * spi_nor_read_raw() - raw read of serial flash memory. read_opcode,
+ *			addr_width and read_dummy members of the struct spi_nor
+ *			should be previously
+ * set.
+ * @nor:	pointer to a 'struct spi_nor'
+ * @addr:	offset in the serial flash memory
+ * @len:	number of bytes to read
+ * @buf:	buffer where the data is copied into
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_read_raw(struct spi_nor *nor, u32 addr, size_t len, u8 *buf)
+{
+	int ret;
+
+	while (len) {
+		ret = nor->read(nor, addr, len, buf);
+		if (!ret || ret > len)
+			return -EIO;
+		if (ret < 0)
+			return ret;
+
+		buf += ret;
+		addr += ret;
+		len -= ret;
+	}
+	return 0;
+}
+
 /**
  * spi_nor_read_sfdp() - read Serial Flash Discoverable Parameters.
  * @nor:	pointer to a 'struct spi_nor'
@@ -2182,22 +2212,8 @@ static int spi_nor_read_sfdp(struct spi_nor *nor, u32 addr,
 	nor->addr_width = 3;
 	nor->read_dummy = 8;
 
-	while (len) {
-		ret = nor->read(nor, addr, len, (u8 *)buf);
-		if (!ret || ret > len) {
-			ret = -EIO;
-			goto read_err;
-		}
-		if (ret < 0)
-			goto read_err;
-
-		buf += ret;
-		addr += ret;
-		len -= ret;
-	}
-	ret = 0;
+	ret = spi_nor_read_raw(nor, addr, len, buf);
 
-read_err:
 	nor->read_opcode = read_opcode;
 	nor->addr_width = addr_width;
 	nor->read_dummy = read_dummy;
@@ -2757,6 +2773,277 @@ static int spi_nor_parse_bfpt(struct spi_nor *nor,
 	return 0;
 }
 
+#define SMPT_CMD_ADDRESS_LEN_MASK		GENMASK(23, 22)
+#define SMPT_CMD_ADDRESS_LEN_0			(0x0UL << 22)
+#define SMPT_CMD_ADDRESS_LEN_3			(0x1UL << 22)
+#define SMPT_CMD_ADDRESS_LEN_4			(0x2UL << 22)
+#define SMPT_CMD_ADDRESS_LEN_USE_CURRENT	(0x3UL << 22)
+
+#define SMPT_CMD_READ_DUMMY_MASK		GENMASK(19, 16)
+#define SMPT_CMD_READ_DUMMY_SHIFT		16
+#define SMPT_CMD_READ_DUMMY(_cmd) \
+	(((_cmd) & SMPT_CMD_READ_DUMMY_MASK) >> SMPT_CMD_READ_DUMMY_SHIFT)
+#define SMPT_CMD_READ_DUMMY_IS_VARIABLE		0xfUL
+
+#define SMPT_CMD_READ_DATA_MASK			GENMASK(31, 24)
+#define SMPT_CMD_READ_DATA_SHIFT		24
+#define SMPT_CMD_READ_DATA(_cmd) \
+	(((_cmd) & SMPT_CMD_READ_DATA_MASK) >> SMPT_CMD_READ_DATA_SHIFT)
+
+#define SMPT_CMD_OPCODE_MASK			GENMASK(15, 8)
+#define SMPT_CMD_OPCODE_SHIFT			8
+#define SMPT_CMD_OPCODE(_cmd) \
+	(((_cmd) & SMPT_CMD_OPCODE_MASK) >> SMPT_CMD_OPCODE_SHIFT)
+
+#define SMPT_MAP_REGION_COUNT_MASK		GENMASK(23, 16)
+#define SMPT_MAP_REGION_COUNT_SHIFT		16
+#define SMPT_MAP_REGION_COUNT(_header) \
+	((((_header) & SMPT_MAP_REGION_COUNT_MASK) >> \
+	  SMPT_MAP_REGION_COUNT_SHIFT) + 1)
+
+#define SMPT_MAP_ID_MASK			GENMASK(15, 8)
+#define SMPT_MAP_ID_SHIFT			8
+#define SMPT_MAP_ID(_header) \
+	(((_header) & SMPT_MAP_ID_MASK) >> SMPT_MAP_ID_SHIFT)
+
+#define SMPT_MAP_REGION_SIZE_MASK		GENMASK(31, 8)
+#define SMPT_MAP_REGION_SIZE_SHIFT		8
+#define SMPT_MAP_REGION_SIZE(_region) \
+	(((((_region) & SMPT_MAP_REGION_SIZE_MASK) >> \
+	   SMPT_MAP_REGION_SIZE_SHIFT) + 1) * 256)
+
+#define SMPT_MAP_REGION_ERASE_TYPE_MASK		GENMASK(3, 0)
+#define SMPT_MAP_REGION_ERASE_TYPE(_region) \
+	((_region) & SMPT_MAP_REGION_ERASE_TYPE_MASK)
+
+#define SMPT_DESC_TYPE_MAP			BIT(1)
+#define SMPT_DESC_END				BIT(0)
+
+/**
+ * spi_nor_smpt_addr_width() - return the address width used in the
+ *			       configuration detection command.
+ * @nor:	pointer to a 'struct spi_nor'
+ * @settings:	configuration detection command descriptor, dword1
+ */
+static u8 spi_nor_smpt_addr_width(const struct spi_nor *nor, const u32 settings)
+{
+	switch (settings & SMPT_CMD_ADDRESS_LEN_MASK) {
+	case SMPT_CMD_ADDRESS_LEN_0:
+		return 0;
+	case SMPT_CMD_ADDRESS_LEN_3:
+		return 3;
+	case SMPT_CMD_ADDRESS_LEN_4:
+		return 4;
+	case SMPT_CMD_ADDRESS_LEN_USE_CURRENT:
+		/* fall through */
+	default:
+		return nor->addr_width;
+	}
+}
+
+/**
+ * spi_nor_smpt_read_dummy() - return the configuration detection command read
+ *			       latency, in clock cycles.
+ * @nor:	pointer to a 'struct spi_nor'
+ * @settings:	configuration detection command descriptor, dword1
+ *
+ * Return: the number of dummy cycles for an SMPT read
+ */
+static u8 spi_nor_smpt_read_dummy(const struct spi_nor *nor, const u32 settings)
+{
+	u8 read_dummy = SMPT_CMD_READ_DUMMY(settings);
+
+	if (read_dummy == SMPT_CMD_READ_DUMMY_IS_VARIABLE)
+		return nor->read_dummy;
+	return read_dummy;
+}
+
+/**
+ * spi_nor_get_map_in_use() - get the configuration map in use
+ * @nor:	pointer to a 'struct spi_nor'
+ * @smpt:	pointer to the sector map parameter table
+ */
+static const u32 *spi_nor_get_map_in_use(struct spi_nor *nor, const u32 *smpt)
+{
+	const u32 *ret = NULL;
+	u32 i, addr;
+	int err;
+	u8 addr_width, read_opcode, read_dummy;
+	u8 read_data_mask, data_byte, map_id;
+
+	addr_width = nor->addr_width;
+	read_dummy = nor->read_dummy;
+	read_opcode = nor->read_opcode;
+
+	map_id = 0;
+	i = 0;
+	/* Determine if there are any optional Detection Command Descriptors */
+	while (!(smpt[i] & SMPT_DESC_TYPE_MAP)) {
+		read_data_mask = SMPT_CMD_READ_DATA(smpt[i]);
+		nor->addr_width = spi_nor_smpt_addr_width(nor, smpt[i]);
+		nor->read_dummy = spi_nor_smpt_read_dummy(nor, smpt[i]);
+		nor->read_opcode = SMPT_CMD_OPCODE(smpt[i]);
+		addr = smpt[i + 1];
+
+		err = spi_nor_read_raw(nor, addr, 1, &data_byte);
+		if (err)
+			goto out;
+
+		/*
+		 * Build an index value that is used to select the Sector Map
+		 * Configuration that is currently in use.
+		 */
+		map_id = map_id << 1 | !!(data_byte & read_data_mask);
+		i = i + 2;
+	}
+
+	/* Find the matching configuration map */
+	while (SMPT_MAP_ID(smpt[i]) != map_id) {
+		if (smpt[i] & SMPT_DESC_END)
+			goto out;
+		/* increment the table index to the next map */
+		i += SMPT_MAP_REGION_COUNT(smpt[i]) + 1;
+	}
+
+	ret = smpt + i;
+	/* fall through */
+out:
+	nor->addr_width = addr_width;
+	nor->read_dummy = read_dummy;
+	nor->read_opcode = read_opcode;
+	return ret;
+}
+
+/**
+ * spi_nor_region_check_overlay() - set overlay bit when the region is overlaid
+ * @region:	pointer to a structure that describes a SPI NOR erase region
+ * @erase:	pointer to a structure that describes a SPI NOR erase type
+ * @erase_type:	erase type bitmask
+ */
+static void
+spi_nor_region_check_overlay(struct spi_nor_erase_region *region,
+			     const struct spi_nor_erase_type *erase,
+			     const u8 erase_type)
+{
+	int i;
+
+	for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) {
+		if (!(erase_type & BIT(i)))
+			continue;
+		if (region->size & erase[i].size_mask) {
+			spi_nor_region_mark_overlay(region);
+			return;
+		}
+	}
+}
+
+/**
+ * spi_nor_init_non_uniform_erase_map() - initialize the non-uniform erase map
+ * @nor:	pointer to a 'struct spi_nor'
+ * @smpt:	pointer to the sector map parameter table
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_init_non_uniform_erase_map(struct spi_nor *nor,
+					      const u32 *smpt)
+{
+	struct spi_nor_erase_map *map = &nor->erase_map;
+	const struct spi_nor_erase_type *erase = map->erase_type;
+	struct spi_nor_erase_region *region;
+	u64 offset;
+	u32 region_count;
+	int i, j;
+	u8 erase_type;
+
+	region_count = SMPT_MAP_REGION_COUNT(*smpt);
+	/*
+	 * The regions will be freed when the driver detaches from the
+	 * device.
+	 */
+	region = devm_kcalloc(nor->dev, region_count, sizeof(*region),
+			      GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+	map->regions = region;
+
+	map->uniform_erase_type = 0xff;
+	offset = 0;
+	/* Populate regions. */
+	for (i = 0; i < region_count; i++) {
+		j = i + 1; /* index for the region dword */
+		region[i].size = SMPT_MAP_REGION_SIZE(smpt[j]);
+		erase_type = SMPT_MAP_REGION_ERASE_TYPE(smpt[j]);
+		region[i].offset = offset | erase_type;
+
+		spi_nor_region_check_overlay(&region[i], erase, erase_type);
+
+		/*
+		 * Save the erase types that are supported in all regions and
+		 * can erase the entire flash memory.
+		 */
+		map->uniform_erase_type &= erase_type;
+
+		offset = (region[i].offset & ~SNOR_ERASE_FLAGS_MASK) +
+			 region[i].size;
+	}
+
+	spi_nor_region_mark_end(&region[i - 1]);
+
+	return 0;
+}
+
+/**
+ * spi_nor_parse_smpt() - parse Sector Map Parameter Table
+ * @nor:		pointer to a 'struct spi_nor'
+ * @smpt_header:	sector map parameter table header
+ *
+ * This table is optional, but when available, we parse it to identify the
+ * location and size of sectors within the main data array of the flash memory
+ * device and to identify which Erase Types are supported by each sector.
+ *
+ * Return: 0 on success, -errno otherwise.
+ */
+static int spi_nor_parse_smpt(struct spi_nor *nor,
+			      const struct sfdp_parameter_header *smpt_header)
+{
+	const u32 *sector_map;
+	u32 *smpt;
+	size_t len;
+	u32 addr;
+	int i, ret;
+
+	/* Read the Sector Map Parameter Table. */
+	len = smpt_header->length * sizeof(*smpt);
+	smpt = kzalloc(len, GFP_KERNEL);
+	if (!smpt)
+		return -ENOMEM;
+
+	addr = SFDP_PARAM_HEADER_PTP(smpt_header);
+	ret = spi_nor_read_sfdp(nor, addr, len, smpt);
+	if (ret)
+		goto out;
+
+	/* Fix endianness of the SMPT DWORDs. */
+	for (i = 0; i < smpt_header->length; i++)
+		smpt[i] = le32_to_cpu(smpt[i]);
+
+	sector_map = spi_nor_get_map_in_use(nor, smpt);
+	if (!sector_map) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = spi_nor_init_non_uniform_erase_map(nor, sector_map);
+	if (ret)
+		goto out;
+
+	spi_nor_regions_sort_erase_types(&nor->erase_map);
+	/* fall through */
+out:
+	kfree(smpt);
+	return ret;
+}
+
 /**
  * spi_nor_parse_sfdp() - parse the Serial Flash Discoverable Parameters.
  * @nor:		pointer to a 'struct spi_nor'
@@ -2851,7 +3138,7 @@ static int spi_nor_parse_sfdp(struct spi_nor *nor,
 
 		switch (SFDP_PARAM_HEADER_ID(param_header)) {
 		case SFDP_SECTOR_MAP_ID:
-			dev_info(dev, "non-uniform erase sector maps are not supported yet.\n");
+			err = spi_nor_parse_smpt(nor, param_header);
 			break;
 
 		default:
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 894cbf88bf2e..7f0c7303575e 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -419,6 +419,18 @@ spi_nor_region_end(const struct spi_nor_erase_region *region)
 	return (region->offset & ~SNOR_ERASE_FLAGS_MASK) + region->size;
 }
 
+static void __maybe_unused
+spi_nor_region_mark_end(struct spi_nor_erase_region *region)
+{
+	region->offset |= SNOR_LAST_REGION;
+}
+
+static void __maybe_unused
+spi_nor_region_mark_overlay(struct spi_nor_erase_region *region)
+{
+	region->offset |= SNOR_OVERLAID_REGION;
+}
+
 static bool __maybe_unused spi_nor_has_uniform_erase(const struct spi_nor *nor)
 {
 	return !!nor->erase_map.uniform_erase_type;
-- 
cgit v1.2.3


From b8d62f33b7b225935649ab165d901fe8dd7f95e5 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 8 Oct 2018 13:17:26 +0200
Subject: genirq: Fix grammar s/an /a /

Fix a grammar mistake in <linux/interrupt.h>.

[ mingo: While at it also fix another similar error in another comment as well. ]

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Jiri Kosina <trivial@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20181008111726.26286-1-geert%2Brenesas@glider.be
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/interrupt.h | 2 +-
 kernel/irq/irqdomain.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index eeceac3376fc..1d6711c28271 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -45,7 +45,7 @@
  * IRQF_PERCPU - Interrupt is per cpu
  * IRQF_NOBALANCING - Flag to exclude this interrupt from irq balancing
  * IRQF_IRQPOLL - Interrupt is used for polling (only the interrupt that is
- *                registered first in an shared interrupt is considered for
+ *                registered first in a shared interrupt is considered for
  *                performance reasons)
  * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
  *                Used by threaded interrupts which need to keep the
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 3b30a4aeb0db..3366d11c3e02 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -867,7 +867,7 @@ void irq_dispose_mapping(unsigned int virq)
 EXPORT_SYMBOL_GPL(irq_dispose_mapping);
 
 /**
- * irq_find_mapping() - Find a linux irq from an hw irq number.
+ * irq_find_mapping() - Find a linux irq from a hw irq number.
  * @domain: domain owning this hardware interrupt
  * @hwirq: hardware irq number in that domain space
  */
-- 
cgit v1.2.3


From e054637597ba36d3729ba6a3a3dd7aad8e2a3003 Mon Sep 17 00:00:00 2001
From: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Date: Sat, 6 Oct 2018 16:53:19 +0530
Subject: mm, sched/numa: Remove remaining traces of NUMA rate-limiting

Remove the leftover pglist_data::numabalancing_migrate_lock and its
initialization, we stopped using this lock with:

  efaffc5e40ae ("mm, sched/numa: Remove rate-limiting of automatic NUMA balancing migration")

[ mingo: Rewrote the changelog. ]

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Linux-MM <linux-mm@kvack.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/1538824999-31230-1-git-send-email-srikar@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mmzone.h |  4 ----
 mm/page_alloc.c        | 10 ----------
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3f4c0b167333..d4b0c79d2924 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -667,10 +667,6 @@ typedef struct pglist_data {
 	enum zone_type kcompactd_classzone_idx;
 	wait_queue_head_t kcompactd_wait;
 	struct task_struct *kcompactd;
-#endif
-#ifdef CONFIG_NUMA_BALANCING
-	/* Lock serializing the migrate rate limiting window */
-	spinlock_t numabalancing_migrate_lock;
 #endif
 	/*
 	 * This is a per-node reserve of pages that are not available
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 706a738c0aee..e2ef1c17942f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6193,15 +6193,6 @@ static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
 	return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
 }
 
-#ifdef CONFIG_NUMA_BALANCING
-static void pgdat_init_numabalancing(struct pglist_data *pgdat)
-{
-	spin_lock_init(&pgdat->numabalancing_migrate_lock);
-}
-#else
-static void pgdat_init_numabalancing(struct pglist_data *pgdat) {}
-#endif
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void pgdat_init_split_queue(struct pglist_data *pgdat)
 {
@@ -6226,7 +6217,6 @@ static void __meminit pgdat_init_internals(struct pglist_data *pgdat)
 {
 	pgdat_resize_init(pgdat);
 
-	pgdat_init_numabalancing(pgdat);
 	pgdat_init_split_queue(pgdat);
 	pgdat_init_kcompactd(pgdat);
 
-- 
cgit v1.2.3


From 8ca2b56cd7da98fc8f8d787bb706b9d6c8674a3b Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Wed, 3 Oct 2018 13:07:18 -0400
Subject: locking/lockdep: Make class->ops a percpu counter and move it under
 CONFIG_DEBUG_LOCKDEP=y

A sizable portion of the CPU cycles spent on the __lock_acquire() is used
up by the atomic increment of the class->ops stat counter. By taking it out
from the lock_class structure and changing it to a per-cpu per-lock-class
counter, we can reduce the amount of cacheline contention on the class
structure when multiple CPUs are trying to acquire locks of the same
class simultaneously.

To limit the increase in memory consumption because of the percpu nature
of that counter, it is now put back under the CONFIG_DEBUG_LOCKDEP
config option. So the memory consumption increase will only occur if
CONFIG_DEBUG_LOCKDEP is defined. The lock_class structure, however,
is reduced in size by 16 bytes on 64-bit archs after ops removal and
a minor restructuring of the fields.

This patch also fixes a bug in the increment code as the counter is of
the 'unsigned long' type, but atomic_inc() was used to increment it.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/d66681f3-8781-9793-1dcf-2436a284550b@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h            |  7 +------
 kernel/locking/lockdep.c           | 11 ++++++++---
 kernel/locking/lockdep_internals.h | 27 +++++++++++++++++++++++++++
 kernel/locking/lockdep_proc.c      |  2 +-
 4 files changed, 37 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b0d0b51c4d85..1fd82ff99c65 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -99,13 +99,8 @@ struct lock_class {
 	 */
 	unsigned int			version;
 
-	/*
-	 * Statistics counter:
-	 */
-	unsigned long			ops;
-
-	const char			*name;
 	int				name_version;
+	const char			*name;
 
 #ifdef CONFIG_LOCK_STAT
 	unsigned long			contention_point[LOCKSTAT_POINTS];
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 511d30f88bce..a0f83058d6aa 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -139,7 +139,7 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES];
  * get freed - this significantly simplifies the debugging code.
  */
 unsigned long nr_lock_classes;
-static struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
+struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
 
 static inline struct lock_class *hlock_class(struct held_lock *hlock)
 {
@@ -436,6 +436,7 @@ unsigned int max_lockdep_depth;
  * Various lockdep statistics:
  */
 DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
+DEFINE_PER_CPU(unsigned long [MAX_LOCKDEP_KEYS], lock_class_ops);
 #endif
 
 /*
@@ -1392,7 +1393,9 @@ static void print_lock_class_header(struct lock_class *class, int depth)
 
 	printk("%*s->", depth, "");
 	print_lock_name(class);
-	printk(KERN_CONT " ops: %lu", class->ops);
+#ifdef CONFIG_DEBUG_LOCKDEP
+	printk(KERN_CONT " ops: %lu", debug_class_ops_read(class));
+#endif
 	printk(KERN_CONT " {\n");
 
 	for (bit = 0; bit < LOCK_USAGE_STATES; bit++) {
@@ -3227,7 +3230,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 		if (!class)
 			return 0;
 	}
-	atomic_inc((atomic_t *)&class->ops);
+
+	debug_class_ops_inc(class);
+
 	if (very_verbose(class)) {
 		printk("\nacquire class [%px] %s", class->key, class->name);
 		if (class->name_version > 1)
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index d459d624ba2a..88c847a41c8a 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -152,9 +152,15 @@ struct lockdep_stats {
 	int	nr_find_usage_forwards_recursions;
 	int	nr_find_usage_backwards_checks;
 	int	nr_find_usage_backwards_recursions;
+
+	/*
+	 * Per lock class locking operation stat counts
+	 */
+	unsigned long lock_class_ops[MAX_LOCKDEP_KEYS];
 };
 
 DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
+extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
 
 #define __debug_atomic_inc(ptr)					\
 	this_cpu_inc(lockdep_stats.ptr);
@@ -179,9 +185,30 @@ DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
 	}								\
 	__total;							\
 })
+
+static inline void debug_class_ops_inc(struct lock_class *class)
+{
+	int idx;
+
+	idx = class - lock_classes;
+	__debug_atomic_inc(lock_class_ops[idx]);
+}
+
+static inline unsigned long debug_class_ops_read(struct lock_class *class)
+{
+	int idx, cpu;
+	unsigned long ops = 0;
+
+	idx = class - lock_classes;
+	for_each_possible_cpu(cpu)
+		ops += per_cpu(lockdep_stats.lock_class_ops[idx], cpu);
+	return ops;
+}
+
 #else
 # define __debug_atomic_inc(ptr)	do { } while (0)
 # define debug_atomic_inc(ptr)		do { } while (0)
 # define debug_atomic_dec(ptr)		do { } while (0)
 # define debug_atomic_read(ptr)		0
+# define debug_class_ops_inc(ptr)	do { } while (0)
 #endif
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c
index 3dd980dfba2d..3d31f9b0059e 100644
--- a/kernel/locking/lockdep_proc.c
+++ b/kernel/locking/lockdep_proc.c
@@ -68,7 +68,7 @@ static int l_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "%p", class->key);
 #ifdef CONFIG_DEBUG_LOCKDEP
-	seq_printf(m, " OPS:%8ld", class->ops);
+	seq_printf(m, " OPS:%8ld", debug_class_ops_read(class));
 #endif
 #ifdef CONFIG_PROVE_LOCKING
 	seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class));
-- 
cgit v1.2.3


From 78d8f8f1a1800cf6068ffcac7a14cda19ae693d8 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor@chromium.org>
Date: Fri, 5 Oct 2018 11:41:13 -0700
Subject: mfd: cros: add "base attached" MKBP switch definition

This adds a "base attached" switch definition to the MKBP protocol that
is used by Whiskers driver to properly determine device state (clamshell
vs tablet mode).

Signed-off-by: Dmitry Torokhov <dtor@chromium.org>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/mfd/cros_ec_commands.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 6e1ab9bead28..7690f0793965 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -2132,6 +2132,7 @@ struct ec_response_get_next_event_v1 {
 /* Switches */
 #define EC_MKBP_LID_OPEN	0
 #define EC_MKBP_TABLET_MODE	1
+#define EC_MKBP_BASE_ATTACHED	2
 
 /*****************************************************************************/
 /* Temperature sensor commands */
-- 
cgit v1.2.3


From 53c99bd665a2649341ed6aed358ab56a3eedcd00 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 31 Aug 2018 10:42:24 +0200
Subject: init: add arch_call_rest_init to allow stack switching

With CONFIG_VMAP_STACK=y the kernel stack of all tasks should be
allocated in the vmalloc space. The initial stack used for all
the early init code is in the init_thread_union. To be able to
switch from this early stack to a properly allocated stack
from vmalloc the architecture needs a switch-over point.

Introduce the arch_call_rest_init() function with a weak definition
in init/main.c with the only purpose to call rest_init() from the
end of start_kernel(). The architecture override can then do the
necessary magic to switch to the new vmalloc'ed stack.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/start_kernel.h | 2 ++
 init/main.c                  | 9 +++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/start_kernel.h b/include/linux/start_kernel.h
index 4b268d86a784..8b369a41c03c 100644
--- a/include/linux/start_kernel.h
+++ b/include/linux/start_kernel.h
@@ -9,5 +9,7 @@
    up something else. */
 
 extern asmlinkage void __init start_kernel(void);
+extern void __init arch_call_rest_init(void);
+extern void __ref rest_init(void);
 
 #endif /* _LINUX_START_KERNEL_H */
diff --git a/init/main.c b/init/main.c
index 18f8f0140fa0..78b714a5fa94 100644
--- a/init/main.c
+++ b/init/main.c
@@ -394,7 +394,7 @@ static void __init setup_command_line(char *command_line)
 
 static __initdata DECLARE_COMPLETION(kthreadd_done);
 
-static noinline void __ref rest_init(void)
+noinline void __ref rest_init(void)
 {
 	struct task_struct *tsk;
 	int pid;
@@ -528,6 +528,11 @@ static void __init mm_init(void)
 	pti_init();
 }
 
+void __init __weak arch_call_rest_init(void)
+{
+	rest_init();
+}
+
 asmlinkage __visible void __init start_kernel(void)
 {
 	char *command_line;
@@ -736,7 +741,7 @@ asmlinkage __visible void __init start_kernel(void)
 	}
 
 	/* Do the rest non-__init'ed, we're now alive */
-	rest_init();
+	arch_call_rest_init();
 }
 
 /* Call all constructor functions linked into the kernel. */
-- 
cgit v1.2.3


From dde709d1361ab50d3b9f2824f72b4374f5582e84 Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Mon, 20 Nov 2017 11:16:14 +0100
Subject: compiler: introduce __no_sanitize_address_or_inline

Due to conflict between kasan instrumentation and inlining
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 functions which are
defined as inline could not be called from functions defined with
__no_sanitize_address.

Introduce __no_sanitize_address_or_inline which would expand to
__no_sanitize_address when the kernel is built with kasan support and
to inline otherwise. This helps to avoid disabling kasan
instrumentation for entire files.

Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/compiler-gcc.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 763bbad1e258..997ade08a79d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -208,6 +208,12 @@
  * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
  */
 #define __no_sanitize_address __attribute__((no_sanitize_address))
+#ifdef CONFIG_KASAN
+#define __no_sanitize_address_or_inline					\
+	__no_sanitize_address __maybe_unused notrace
+#else
+#define __no_sanitize_address_or_inline inline
+#endif
 #endif
 
 #if GCC_VERSION >= 50100
@@ -225,6 +231,7 @@
 
 #if !defined(__no_sanitize_address)
 #define __no_sanitize_address
+#define __no_sanitize_address_or_inline inline
 #endif
 
 /*
-- 
cgit v1.2.3


From 3b0296b8c893adb17b422179b9e779e4c32aa347 Mon Sep 17 00:00:00 2001
From: Rajan Vaja <rajan.vaja@xilinx.com>
Date: Mon, 8 Oct 2018 11:21:44 -0700
Subject: firmware: xilinx: Add zynqmp IOCTL API for device control

Add ZynqMP firmware IOCTL API to control and configure
devices like PLLs, SD, Gem, etc.

Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Jolly Shah <jollys@xilinx.com>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/firmware/xilinx/zynqmp.c     | 42 ++++++++++++++++++++++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h |  4 +++-
 2 files changed, 45 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index 84b3fd2eca8b..9a1c72a9280f 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -428,6 +428,47 @@ static int zynqmp_pm_clock_getparent(u32 clock_id, u32 *parent_id)
 	return ret;
 }
 
+/**
+ * zynqmp_is_valid_ioctl() - Check whether IOCTL ID is valid or not
+ * @ioctl_id:	IOCTL ID
+ *
+ * Return: 1 if IOCTL is valid else 0
+ */
+static inline int zynqmp_is_valid_ioctl(u32 ioctl_id)
+{
+	switch (ioctl_id) {
+	case IOCTL_SET_PLL_FRAC_MODE:
+	case IOCTL_GET_PLL_FRAC_MODE:
+	case IOCTL_SET_PLL_FRAC_DATA:
+	case IOCTL_GET_PLL_FRAC_DATA:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+/**
+ * zynqmp_pm_ioctl() - PM IOCTL API for device control and configs
+ * @node_id:	Node ID of the device
+ * @ioctl_id:	ID of the requested IOCTL
+ * @arg1:	Argument 1 to requested IOCTL call
+ * @arg2:	Argument 2 to requested IOCTL call
+ * @out:	Returned output value
+ *
+ * This function calls IOCTL to firmware for device control and configuration.
+ *
+ * Return: Returns status, either success or error+reason
+ */
+static int zynqmp_pm_ioctl(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2,
+			   u32 *out)
+{
+	if (!zynqmp_is_valid_ioctl(ioctl_id))
+		return -EINVAL;
+
+	return zynqmp_pm_invoke_fn(PM_IOCTL, node_id, ioctl_id,
+				   arg1, arg2, out);
+}
+
 static const struct zynqmp_eemi_ops eemi_ops = {
 	.get_api_version = zynqmp_pm_get_api_version,
 	.query_data = zynqmp_pm_query_data,
@@ -440,6 +481,7 @@ static const struct zynqmp_eemi_ops eemi_ops = {
 	.clock_getrate = zynqmp_pm_clock_getrate,
 	.clock_setparent = zynqmp_pm_clock_setparent,
 	.clock_getparent = zynqmp_pm_clock_getparent,
+	.ioctl = zynqmp_pm_ioctl,
 };
 
 /**
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 015e130431e6..7a9db0861803 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -34,7 +34,8 @@
 
 enum pm_api_id {
 	PM_GET_API_VERSION = 1,
-	PM_QUERY_DATA = 35,
+	PM_IOCTL = 34,
+	PM_QUERY_DATA,
 	PM_CLOCK_ENABLE,
 	PM_CLOCK_DISABLE,
 	PM_CLOCK_GETSTATE,
@@ -99,6 +100,7 @@ struct zynqmp_eemi_ops {
 	int (*clock_getrate)(u32 clock_id, u64 *rate);
 	int (*clock_setparent)(u32 clock_id, u32 parent_id);
 	int (*clock_getparent)(u32 clock_id, u32 *parent_id);
+	int (*ioctl)(u32 node_id, u32 ioctl_id, u32 arg1, u32 arg2, u32 *out);
 };
 
 #if IS_REACHABLE(CONFIG_ARCH_ZYNQMP)
-- 
cgit v1.2.3


From 3fde0e16d016ecb273f0fa404b5d56b947fc0576 Mon Sep 17 00:00:00 2001
From: Jolly Shah <jolly.shah@xilinx.com>
Date: Mon, 8 Oct 2018 11:21:46 -0700
Subject: drivers: clk: Add ZynqMP clock driver

This patch adds CCF compliant clock driver for ZynqMP.
Clock driver queries supported clock information from
firmware and regiters pll and output clocks with CCF.

Signed-off-by: Rajan Vaja <rajan.vaja@xilinx.com>
Signed-off-by: Tejas Patel <tejasp@xilinx.com>
Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
Signed-off-by: Jolly Shah <jolly.shah@xilinx.com>
Acked-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
---
 drivers/clk/Kconfig                  |   1 +
 drivers/clk/Makefile                 |   1 +
 drivers/clk/zynqmp/Kconfig           |  10 +
 drivers/clk/zynqmp/Makefile          |   4 +
 drivers/clk/zynqmp/clk-gate-zynqmp.c | 144 +++++++
 drivers/clk/zynqmp/clk-mux-zynqmp.c  | 141 +++++++
 drivers/clk/zynqmp/clk-zynqmp.h      |  68 ++++
 drivers/clk/zynqmp/clkc.c            | 716 +++++++++++++++++++++++++++++++++++
 drivers/clk/zynqmp/divider.c         | 217 +++++++++++
 drivers/clk/zynqmp/pll.c             | 335 ++++++++++++++++
 include/linux/firmware/xlnx-zynqmp.h |   1 +
 11 files changed, 1638 insertions(+)
 create mode 100644 drivers/clk/zynqmp/Kconfig
 create mode 100644 drivers/clk/zynqmp/Makefile
 create mode 100644 drivers/clk/zynqmp/clk-gate-zynqmp.c
 create mode 100644 drivers/clk/zynqmp/clk-mux-zynqmp.c
 create mode 100644 drivers/clk/zynqmp/clk-zynqmp.h
 create mode 100644 drivers/clk/zynqmp/clkc.c
 create mode 100644 drivers/clk/zynqmp/divider.c
 create mode 100644 drivers/clk/zynqmp/pll.c

(limited to 'include/linux')

diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 292056bbb30e..1deafb4db60c 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -299,5 +299,6 @@ source "drivers/clk/sunxi-ng/Kconfig"
 source "drivers/clk/tegra/Kconfig"
 source "drivers/clk/ti/Kconfig"
 source "drivers/clk/uniphier/Kconfig"
+source "drivers/clk/zynqmp/Kconfig"
 
 endmenu
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index a84c5573cabe..ad11421bdacd 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -108,3 +108,4 @@ obj-$(CONFIG_X86)			+= x86/
 endif
 obj-$(CONFIG_ARCH_ZX)			+= zte/
 obj-$(CONFIG_ARCH_ZYNQ)			+= zynq/
+obj-$(CONFIG_COMMON_CLK_ZYNQMP)         += zynqmp/
diff --git a/drivers/clk/zynqmp/Kconfig b/drivers/clk/zynqmp/Kconfig
new file mode 100644
index 000000000000..17086059be8b
--- /dev/null
+++ b/drivers/clk/zynqmp/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config COMMON_CLK_ZYNQMP
+	bool "Support for Xilinx ZynqMP Ultrascale+ clock controllers"
+	depends on ARCH_ZYNQMP || COMPILE_TEST
+	depends on ZYNQMP_FIRMWARE
+	help
+	  Support for the Zynqmp Ultrascale clock controller.
+	  It has a dependency on the PMU firmware.
+	  Say Y if you want to include clock support.
diff --git a/drivers/clk/zynqmp/Makefile b/drivers/clk/zynqmp/Makefile
new file mode 100644
index 000000000000..0ec24bfe0f18
--- /dev/null
+++ b/drivers/clk/zynqmp/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+# Zynq Ultrascale+ MPSoC clock specific Makefile
+
+obj-$(CONFIG_ARCH_ZYNQMP)	+= pll.o clk-gate-zynqmp.o divider.o clk-mux-zynqmp.o clkc.o
diff --git a/drivers/clk/zynqmp/clk-gate-zynqmp.c b/drivers/clk/zynqmp/clk-gate-zynqmp.c
new file mode 100644
index 000000000000..83b236f20fff
--- /dev/null
+++ b/drivers/clk/zynqmp/clk-gate-zynqmp.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC clock controller
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ *
+ * Gated clock implementation
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "clk-zynqmp.h"
+
+/**
+ * struct clk_gate - gating clock
+ * @hw:		handle between common and hardware-specific interfaces
+ * @flags:	hardware-specific flags
+ * @clk_id:	Id of clock
+ */
+struct zynqmp_clk_gate {
+	struct clk_hw hw;
+	u8 flags;
+	u32 clk_id;
+};
+
+#define to_zynqmp_clk_gate(_hw) container_of(_hw, struct zynqmp_clk_gate, hw)
+
+/**
+ * zynqmp_clk_gate_enable() - Enable clock
+ * @hw:		handle between common and hardware-specific interfaces
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_clk_gate_enable(struct clk_hw *hw)
+{
+	struct zynqmp_clk_gate *gate = to_zynqmp_clk_gate(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = gate->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_enable(clk_id);
+
+	if (ret)
+		pr_warn_once("%s() clock enabled failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret;
+}
+
+/*
+ * zynqmp_clk_gate_disable() - Disable clock
+ * @hw:		handle between common and hardware-specific interfaces
+ */
+static void zynqmp_clk_gate_disable(struct clk_hw *hw)
+{
+	struct zynqmp_clk_gate *gate = to_zynqmp_clk_gate(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = gate->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_disable(clk_id);
+
+	if (ret)
+		pr_warn_once("%s() clock disable failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+}
+
+/**
+ * zynqmp_clk_gate_is_enable() - Check clock state
+ * @hw:		handle between common and hardware-specific interfaces
+ *
+ * Return: 1 if enabled, 0 if disabled else error code
+ */
+static int zynqmp_clk_gate_is_enabled(struct clk_hw *hw)
+{
+	struct zynqmp_clk_gate *gate = to_zynqmp_clk_gate(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = gate->clk_id;
+	int state, ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getstate(clk_id, &state);
+	if (ret) {
+		pr_warn_once("%s() clock get state failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+		return -EIO;
+	}
+
+	return state ? 1 : 0;
+}
+
+static const struct clk_ops zynqmp_clk_gate_ops = {
+	.enable = zynqmp_clk_gate_enable,
+	.disable = zynqmp_clk_gate_disable,
+	.is_enabled = zynqmp_clk_gate_is_enabled,
+};
+
+/**
+ * zynqmp_clk_register_gate() - Register a gate clock with the clock framework
+ * @name:		Name of this clock
+ * @clk_id:		Id of this clock
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware of the registered clock gate
+ */
+struct clk_hw *zynqmp_clk_register_gate(const char *name, u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes)
+{
+	struct zynqmp_clk_gate *gate;
+	struct clk_hw *hw;
+	int ret;
+	struct clk_init_data init;
+
+	/* allocate the gate */
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &zynqmp_clk_gate_ops;
+	init.flags = nodes->flag;
+	init.parent_names = parents;
+	init.num_parents = 1;
+
+	/* struct clk_gate assignments */
+	gate->flags = nodes->type_flag;
+	gate->hw.init = &init;
+	gate->clk_id = clk_id;
+
+	hw = &gate->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(gate);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
diff --git a/drivers/clk/zynqmp/clk-mux-zynqmp.c b/drivers/clk/zynqmp/clk-mux-zynqmp.c
new file mode 100644
index 000000000000..4143f560c28d
--- /dev/null
+++ b/drivers/clk/zynqmp/clk-mux-zynqmp.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC mux
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "clk-zynqmp.h"
+
+/*
+ * DOC: basic adjustable multiplexer clock that cannot gate
+ *
+ * Traits of this clock:
+ * prepare - clk_prepare only ensures that parents are prepared
+ * enable - clk_enable only ensures that parents are enabled
+ * rate - rate is only affected by parent switching.  No clk_set_rate support
+ * parent - parent is adjustable through clk_set_parent
+ */
+
+/**
+ * struct zynqmp_clk_mux - multiplexer clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @flags:	hardware-specific flags
+ * @clk_id:	Id of clock
+ */
+struct zynqmp_clk_mux {
+	struct clk_hw hw;
+	u8 flags;
+	u32 clk_id;
+};
+
+#define to_zynqmp_clk_mux(_hw) container_of(_hw, struct zynqmp_clk_mux, hw)
+
+/**
+ * zynqmp_clk_mux_get_parent() - Get parent of clock
+ * @hw:		handle between common and hardware-specific interfaces
+ *
+ * Return: Parent index
+ */
+static u8 zynqmp_clk_mux_get_parent(struct clk_hw *hw)
+{
+	struct zynqmp_clk_mux *mux = to_zynqmp_clk_mux(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = mux->clk_id;
+	u32 val;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getparent(clk_id, &val);
+
+	if (ret)
+		pr_warn_once("%s() getparent failed for clock: %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return val;
+}
+
+/**
+ * zynqmp_clk_mux_set_parent() - Set parent of clock
+ * @hw:		handle between common and hardware-specific interfaces
+ * @index:	Parent index
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_clk_mux_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct zynqmp_clk_mux *mux = to_zynqmp_clk_mux(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = mux->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_setparent(clk_id, index);
+
+	if (ret)
+		pr_warn_once("%s() set parent failed for clock: %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret;
+}
+
+static const struct clk_ops zynqmp_clk_mux_ops = {
+	.get_parent = zynqmp_clk_mux_get_parent,
+	.set_parent = zynqmp_clk_mux_set_parent,
+	.determine_rate = __clk_mux_determine_rate,
+};
+
+static const struct clk_ops zynqmp_clk_mux_ro_ops = {
+	.get_parent = zynqmp_clk_mux_get_parent,
+};
+
+/**
+ * zynqmp_clk_register_mux() - Register a mux table with the clock
+ *			       framework
+ * @name:		Name of this clock
+ * @clk_id:		Id of this clock
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware of the registered clock mux
+ */
+struct clk_hw *zynqmp_clk_register_mux(const char *name, u32 clk_id,
+				       const char * const *parents,
+				       u8 num_parents,
+				       const struct clock_topology *nodes)
+{
+	struct zynqmp_clk_mux *mux;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	mux = kzalloc(sizeof(*mux), GFP_KERNEL);
+	if (!mux)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	if (nodes->type_flag & CLK_MUX_READ_ONLY)
+		init.ops = &zynqmp_clk_mux_ro_ops;
+	else
+		init.ops = &zynqmp_clk_mux_ops;
+	init.flags = nodes->flag;
+	init.parent_names = parents;
+	init.num_parents = num_parents;
+	mux->flags = nodes->type_flag;
+	mux->hw.init = &init;
+	mux->clk_id = clk_id;
+
+	hw = &mux->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(hw);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+EXPORT_SYMBOL_GPL(zynqmp_clk_register_mux);
diff --git a/drivers/clk/zynqmp/clk-zynqmp.h b/drivers/clk/zynqmp/clk-zynqmp.h
new file mode 100644
index 000000000000..7ab163b67249
--- /dev/null
+++ b/drivers/clk/zynqmp/clk-zynqmp.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Copyright (C) 2016-2018 Xilinx
+ */
+
+#ifndef __LINUX_CLK_ZYNQMP_H_
+#define __LINUX_CLK_ZYNQMP_H_
+
+#include <linux/spinlock.h>
+
+#include <linux/firmware/xlnx-zynqmp.h>
+
+/* Clock APIs payload parameters */
+#define CLK_GET_NAME_RESP_LEN				16
+#define CLK_GET_TOPOLOGY_RESP_WORDS			3
+#define CLK_GET_PARENTS_RESP_WORDS			3
+#define CLK_GET_ATTR_RESP_WORDS				1
+
+enum topology_type {
+	TYPE_INVALID,
+	TYPE_MUX,
+	TYPE_PLL,
+	TYPE_FIXEDFACTOR,
+	TYPE_DIV1,
+	TYPE_DIV2,
+	TYPE_GATE,
+};
+
+/**
+ * struct clock_topology - Clock topology
+ * @type:	Type of topology
+ * @flag:	Topology flags
+ * @type_flag:	Topology type specific flag
+ */
+struct clock_topology {
+	u32 type;
+	u32 flag;
+	u32 type_flag;
+};
+
+struct clk_hw *zynqmp_clk_register_pll(const char *name, u32 clk_id,
+				       const char * const *parents,
+				       u8 num_parents,
+				       const struct clock_topology *nodes);
+
+struct clk_hw *zynqmp_clk_register_gate(const char *name, u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes);
+
+struct clk_hw *zynqmp_clk_register_divider(const char *name,
+					   u32 clk_id,
+					   const char * const *parents,
+					   u8 num_parents,
+					   const struct clock_topology *nodes);
+
+struct clk_hw *zynqmp_clk_register_mux(const char *name, u32 clk_id,
+				       const char * const *parents,
+				       u8 num_parents,
+				       const struct clock_topology *nodes);
+
+struct clk_hw *zynqmp_clk_register_fixed_factor(const char *name,
+					u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes);
+
+#endif
diff --git a/drivers/clk/zynqmp/clkc.c b/drivers/clk/zynqmp/clkc.c
new file mode 100644
index 000000000000..9d7d297f0ea8
--- /dev/null
+++ b/drivers/clk/zynqmp/clkc.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC clock controller
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ *
+ * Based on drivers/clk/zynq/clkc.c
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+
+#include "clk-zynqmp.h"
+
+#define MAX_PARENT			100
+#define MAX_NODES			6
+#define MAX_NAME_LEN			50
+
+#define CLK_TYPE_SHIFT			2
+
+#define PM_API_PAYLOAD_LEN		3
+
+#define NA_PARENT			0xFFFFFFFF
+#define DUMMY_PARENT			0xFFFFFFFE
+
+#define CLK_TYPE_FIELD_LEN		4
+#define CLK_TOPOLOGY_NODE_OFFSET	16
+#define NODES_PER_RESP			3
+
+#define CLK_TYPE_FIELD_MASK		0xF
+#define CLK_FLAG_FIELD_MASK		GENMASK(21, 8)
+#define CLK_TYPE_FLAG_FIELD_MASK	GENMASK(31, 24)
+
+#define CLK_PARENTS_ID_LEN		16
+#define CLK_PARENTS_ID_MASK		0xFFFF
+
+/* Flags for parents */
+#define PARENT_CLK_SELF			0
+#define PARENT_CLK_NODE1		1
+#define PARENT_CLK_NODE2		2
+#define PARENT_CLK_NODE3		3
+#define PARENT_CLK_NODE4		4
+#define PARENT_CLK_EXTERNAL		5
+
+#define END_OF_CLK_NAME			"END_OF_CLK"
+#define END_OF_TOPOLOGY_NODE		1
+#define END_OF_PARENTS			1
+#define RESERVED_CLK_NAME		""
+
+#define CLK_VALID_MASK			0x1
+
+enum clk_type {
+	CLK_TYPE_OUTPUT,
+	CLK_TYPE_EXTERNAL,
+};
+
+/**
+ * struct clock_parent - Clock parent
+ * @name:	Parent name
+ * @id:		Parent clock ID
+ * @flag:	Parent flags
+ */
+struct clock_parent {
+	char name[MAX_NAME_LEN];
+	int id;
+	u32 flag;
+};
+
+/**
+ * struct zynqmp_clock - Clock
+ * @clk_name:		Clock name
+ * @valid:		Validity flag of clock
+ * @type:		Clock type (Output/External)
+ * @node:		Clock topology nodes
+ * @num_nodes:		Number of nodes present in topology
+ * @parent:		Parent of clock
+ * @num_parents:	Number of parents of clock
+ */
+struct zynqmp_clock {
+	char clk_name[MAX_NAME_LEN];
+	u32 valid;
+	enum clk_type type;
+	struct clock_topology node[MAX_NODES];
+	u32 num_nodes;
+	struct clock_parent parent[MAX_PARENT];
+	u32 num_parents;
+};
+
+static const char clk_type_postfix[][10] = {
+	[TYPE_INVALID] = "",
+	[TYPE_MUX] = "_mux",
+	[TYPE_GATE] = "",
+	[TYPE_DIV1] = "_div1",
+	[TYPE_DIV2] = "_div2",
+	[TYPE_FIXEDFACTOR] = "_ff",
+	[TYPE_PLL] = ""
+};
+
+static struct clk_hw *(* const clk_topology[]) (const char *name, u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes)
+					= {
+	[TYPE_INVALID] = NULL,
+	[TYPE_MUX] = zynqmp_clk_register_mux,
+	[TYPE_PLL] = zynqmp_clk_register_pll,
+	[TYPE_FIXEDFACTOR] = zynqmp_clk_register_fixed_factor,
+	[TYPE_DIV1] = zynqmp_clk_register_divider,
+	[TYPE_DIV2] = zynqmp_clk_register_divider,
+	[TYPE_GATE] = zynqmp_clk_register_gate
+};
+
+static struct zynqmp_clock *clock;
+static struct clk_hw_onecell_data *zynqmp_data;
+static unsigned int clock_max_idx;
+static const struct zynqmp_eemi_ops *eemi_ops;
+
+/**
+ * zynqmp_is_valid_clock() - Check whether clock is valid or not
+ * @clk_id:	Clock index
+ *
+ * Return: 1 if clock is valid, 0 if clock is invalid else error code
+ */
+static inline int zynqmp_is_valid_clock(u32 clk_id)
+{
+	if (clk_id > clock_max_idx)
+		return -ENODEV;
+
+	return clock[clk_id].valid;
+}
+
+/**
+ * zynqmp_get_clock_name() - Get name of clock from Clock index
+ * @clk_id:	Clock index
+ * @clk_name:	Name of clock
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_get_clock_name(u32 clk_id, char *clk_name)
+{
+	int ret;
+
+	ret = zynqmp_is_valid_clock(clk_id);
+	if (ret == 1) {
+		strncpy(clk_name, clock[clk_id].clk_name, MAX_NAME_LEN);
+		return 0;
+	}
+
+	return ret == 0 ? -EINVAL : ret;
+}
+
+/**
+ * zynqmp_get_clock_type() - Get type of clock
+ * @clk_id:	Clock index
+ * @type:	Clock type: CLK_TYPE_OUTPUT or CLK_TYPE_EXTERNAL
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_get_clock_type(u32 clk_id, u32 *type)
+{
+	int ret;
+
+	ret = zynqmp_is_valid_clock(clk_id);
+	if (ret == 1) {
+		*type = clock[clk_id].type;
+		return 0;
+	}
+
+	return ret == 0 ? -EINVAL : ret;
+}
+
+/**
+ * zynqmp_pm_clock_get_num_clocks() - Get number of clocks in system
+ * @nclocks:	Number of clocks in system/board.
+ *
+ * Call firmware API to get number of clocks.
+ *
+ * Return: 0 on success else error code.
+ */
+static int zynqmp_pm_clock_get_num_clocks(u32 *nclocks)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_NUM_CLOCKS;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	*nclocks = ret_payload[1];
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_get_name() - Get the name of clock for given id
+ * @clock_id:	ID of the clock to be queried
+ * @name:	Name of given clock
+ *
+ * This function is used to get name of clock specified by given
+ * clock ID.
+ *
+ * Return: Returns 0, in case of error name would be 0
+ */
+static int zynqmp_pm_clock_get_name(u32 clock_id, char *name)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+
+	qdata.qid = PM_QID_CLOCK_GET_NAME;
+	qdata.arg1 = clock_id;
+
+	eemi_ops->query_data(qdata, ret_payload);
+	memcpy(name, ret_payload, CLK_GET_NAME_RESP_LEN);
+
+	return 0;
+}
+
+/**
+ * zynqmp_pm_clock_get_topology() - Get the topology of clock for given id
+ * @clock_id:	ID of the clock to be queried
+ * @index:	Node index of clock topology
+ * @topology:	Buffer to store nodes in topology and flags
+ *
+ * This function is used to get topology information for the clock
+ * specified by given clock ID.
+ *
+ * This API will return 3 node of topology with a single response. To get
+ * other nodes, master should call same API in loop with new
+ * index till error is returned. E.g First call should have
+ * index 0 which will return nodes 0,1 and 2. Next call, index
+ * should be 3 which will return nodes 3,4 and 5 and so on.
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_pm_clock_get_topology(u32 clock_id, u32 index, u32 *topology)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_TOPOLOGY;
+	qdata.arg1 = clock_id;
+	qdata.arg2 = index;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	memcpy(topology, &ret_payload[1], CLK_GET_TOPOLOGY_RESP_WORDS * 4);
+
+	return ret;
+}
+
+/**
+ * zynqmp_clk_register_fixed_factor() - Register fixed factor with the
+ *					clock framework
+ * @name:		Name of this clock
+ * @clk_id:		Clock ID
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware to the registered clock
+ */
+struct clk_hw *zynqmp_clk_register_fixed_factor(const char *name, u32 clk_id,
+					const char * const *parents,
+					u8 num_parents,
+					const struct clock_topology *nodes)
+{
+	u32 mult, div;
+	struct clk_hw *hw;
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS;
+	qdata.arg1 = clk_id;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	mult = ret_payload[1];
+	div = ret_payload[2];
+
+	hw = clk_hw_register_fixed_factor(NULL, name,
+					  parents[0],
+					  nodes->flag, mult,
+					  div);
+
+	return hw;
+}
+
+/**
+ * zynqmp_pm_clock_get_parents() - Get the first 3 parents of clock for given id
+ * @clock_id:	Clock ID
+ * @index:	Parent index
+ * @parents:	3 parents of the given clock
+ *
+ * This function is used to get 3 parents for the clock specified by
+ * given clock ID.
+ *
+ * This API will return 3 parents with a single response. To get
+ * other parents, master should call same API in loop with new
+ * parent index till error is returned. E.g First call should have
+ * index 0 which will return parents 0,1 and 2. Next call, index
+ * should be 3 which will return parent 3,4 and 5 and so on.
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_pm_clock_get_parents(u32 clock_id, u32 index, u32 *parents)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_PARENTS;
+	qdata.arg1 = clock_id;
+	qdata.arg2 = index;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	memcpy(parents, &ret_payload[1], CLK_GET_PARENTS_RESP_WORDS * 4);
+
+	return ret;
+}
+
+/**
+ * zynqmp_pm_clock_get_attributes() - Get the attributes of clock for given id
+ * @clock_id:	Clock ID
+ * @attr:	Clock attributes
+ *
+ * This function is used to get clock's attributes(e.g. valid, clock type, etc).
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_pm_clock_get_attributes(u32 clock_id, u32 *attr)
+{
+	struct zynqmp_pm_query_data qdata = {0};
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+
+	qdata.qid = PM_QID_CLOCK_GET_ATTRIBUTES;
+	qdata.arg1 = clock_id;
+
+	ret = eemi_ops->query_data(qdata, ret_payload);
+	memcpy(attr, &ret_payload[1], CLK_GET_ATTR_RESP_WORDS * 4);
+
+	return ret;
+}
+
+/**
+ * __zynqmp_clock_get_topology() - Get topology data of clock from firmware
+ *				   response data
+ * @topology:		Clock topology
+ * @data:		Clock topology data received from firmware
+ * @nnodes:		Number of nodes
+ *
+ * Return: 0 on success else error+reason
+ */
+static int __zynqmp_clock_get_topology(struct clock_topology *topology,
+				       u32 *data, u32 *nnodes)
+{
+	int i;
+
+	for (i = 0; i < PM_API_PAYLOAD_LEN; i++) {
+		if (!(data[i] & CLK_TYPE_FIELD_MASK))
+			return END_OF_TOPOLOGY_NODE;
+		topology[*nnodes].type = data[i] & CLK_TYPE_FIELD_MASK;
+		topology[*nnodes].flag = FIELD_GET(CLK_FLAG_FIELD_MASK,
+						   data[i]);
+		topology[*nnodes].type_flag =
+				FIELD_GET(CLK_TYPE_FLAG_FIELD_MASK, data[i]);
+		(*nnodes)++;
+	}
+
+	return 0;
+}
+
+/**
+ * zynqmp_clock_get_topology() - Get topology of clock from firmware using
+ *				 PM_API
+ * @clk_id:		Clock index
+ * @topology:		Clock topology
+ * @num_nodes:		Number of nodes
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_clock_get_topology(u32 clk_id,
+				     struct clock_topology *topology,
+				     u32 *num_nodes)
+{
+	int j, ret;
+	u32 pm_resp[PM_API_PAYLOAD_LEN] = {0};
+
+	*num_nodes = 0;
+	for (j = 0; j <= MAX_NODES; j += 3) {
+		ret = zynqmp_pm_clock_get_topology(clk_id, j, pm_resp);
+		if (ret)
+			return ret;
+		ret = __zynqmp_clock_get_topology(topology, pm_resp, num_nodes);
+		if (ret == END_OF_TOPOLOGY_NODE)
+			return 0;
+	}
+
+	return 0;
+}
+
+/**
+ * __zynqmp_clock_get_topology() - Get parents info of clock from firmware
+ *				   response data
+ * @parents:		Clock parents
+ * @data:		Clock parents data received from firmware
+ * @nparent:		Number of parent
+ *
+ * Return: 0 on success else error+reason
+ */
+static int __zynqmp_clock_get_parents(struct clock_parent *parents, u32 *data,
+				      u32 *nparent)
+{
+	int i;
+	struct clock_parent *parent;
+
+	for (i = 0; i < PM_API_PAYLOAD_LEN; i++) {
+		if (data[i] == NA_PARENT)
+			return END_OF_PARENTS;
+
+		parent = &parents[i];
+		parent->id = data[i] & CLK_PARENTS_ID_MASK;
+		if (data[i] == DUMMY_PARENT) {
+			strcpy(parent->name, "dummy_name");
+			parent->flag = 0;
+		} else {
+			parent->flag = data[i] >> CLK_PARENTS_ID_LEN;
+			if (zynqmp_get_clock_name(parent->id, parent->name))
+				continue;
+		}
+		*nparent += 1;
+	}
+
+	return 0;
+}
+
+/**
+ * zynqmp_clock_get_parents() - Get parents info from firmware using PM_API
+ * @clk_id:		Clock index
+ * @parents:		Clock parents
+ * @num_parents:	Total number of parents
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_clock_get_parents(u32 clk_id, struct clock_parent *parents,
+				    u32 *num_parents)
+{
+	int j = 0, ret;
+	u32 pm_resp[PM_API_PAYLOAD_LEN] = {0};
+
+	*num_parents = 0;
+	do {
+		/* Get parents from firmware */
+		ret = zynqmp_pm_clock_get_parents(clk_id, j, pm_resp);
+		if (ret)
+			return ret;
+
+		ret = __zynqmp_clock_get_parents(&parents[j], pm_resp,
+						 num_parents);
+		if (ret == END_OF_PARENTS)
+			return 0;
+		j += PM_API_PAYLOAD_LEN;
+	} while (*num_parents <= MAX_PARENT);
+
+	return 0;
+}
+
+/**
+ * zynqmp_get_parent_list() - Create list of parents name
+ * @np:			Device node
+ * @clk_id:		Clock index
+ * @parent_list:	List of parent's name
+ * @num_parents:	Total number of parents
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_get_parent_list(struct device_node *np, u32 clk_id,
+				  const char **parent_list, u32 *num_parents)
+{
+	int i = 0, ret;
+	u32 total_parents = clock[clk_id].num_parents;
+	struct clock_topology *clk_nodes;
+	struct clock_parent *parents;
+
+	clk_nodes = clock[clk_id].node;
+	parents = clock[clk_id].parent;
+
+	for (i = 0; i < total_parents; i++) {
+		if (!parents[i].flag) {
+			parent_list[i] = parents[i].name;
+		} else if (parents[i].flag == PARENT_CLK_EXTERNAL) {
+			ret = of_property_match_string(np, "clock-names",
+						       parents[i].name);
+			if (ret < 0)
+				strcpy(parents[i].name, "dummy_name");
+			parent_list[i] = parents[i].name;
+		} else {
+			strcat(parents[i].name,
+			       clk_type_postfix[clk_nodes[parents[i].flag - 1].
+			       type]);
+			parent_list[i] = parents[i].name;
+		}
+	}
+
+	*num_parents = total_parents;
+	return 0;
+}
+
+/**
+ * zynqmp_register_clk_topology() - Register clock topology
+ * @clk_id:		Clock index
+ * @clk_name:		Clock Name
+ * @num_parents:	Total number of parents
+ * @parent_names:	List of parents name
+ *
+ * Return: Returns either clock hardware or error+reason
+ */
+static struct clk_hw *zynqmp_register_clk_topology(int clk_id, char *clk_name,
+						   int num_parents,
+						   const char **parent_names)
+{
+	int j;
+	u32 num_nodes;
+	char *clk_out = NULL;
+	struct clock_topology *nodes;
+	struct clk_hw *hw = NULL;
+
+	nodes = clock[clk_id].node;
+	num_nodes = clock[clk_id].num_nodes;
+
+	for (j = 0; j < num_nodes; j++) {
+		/*
+		 * Clock name received from firmware is output clock name.
+		 * Intermediate clock names are postfixed with type of clock.
+		 */
+		if (j != (num_nodes - 1)) {
+			clk_out = kasprintf(GFP_KERNEL, "%s%s", clk_name,
+					    clk_type_postfix[nodes[j].type]);
+		} else {
+			clk_out = kasprintf(GFP_KERNEL, "%s", clk_name);
+		}
+
+		if (!clk_topology[nodes[j].type])
+			continue;
+
+		hw = (*clk_topology[nodes[j].type])(clk_out, clk_id,
+						    parent_names,
+						    num_parents,
+						    &nodes[j]);
+		if (IS_ERR(hw))
+			pr_warn_once("%s() %s register fail with %ld\n",
+				     __func__, clk_name, PTR_ERR(hw));
+
+		parent_names[0] = clk_out;
+	}
+	kfree(clk_out);
+	return hw;
+}
+
+/**
+ * zynqmp_register_clocks() - Register clocks
+ * @np:		Device node
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_register_clocks(struct device_node *np)
+{
+	int ret;
+	u32 i, total_parents = 0, type = 0;
+	const char *parent_names[MAX_PARENT];
+
+	for (i = 0; i < clock_max_idx; i++) {
+		char clk_name[MAX_NAME_LEN];
+
+		/* get clock name, continue to next clock if name not found */
+		if (zynqmp_get_clock_name(i, clk_name))
+			continue;
+
+		/* Check if clock is valid and output clock.
+		 * Do not register invalid or external clock.
+		 */
+		ret = zynqmp_get_clock_type(i, &type);
+		if (ret || type != CLK_TYPE_OUTPUT)
+			continue;
+
+		/* Get parents of clock*/
+		if (zynqmp_get_parent_list(np, i, parent_names,
+					   &total_parents)) {
+			WARN_ONCE(1, "No parents found for %s\n",
+				  clock[i].clk_name);
+			continue;
+		}
+
+		zynqmp_data->hws[i] =
+			zynqmp_register_clk_topology(i, clk_name,
+						     total_parents,
+						     parent_names);
+	}
+
+	for (i = 0; i < clock_max_idx; i++) {
+		if (IS_ERR(zynqmp_data->hws[i])) {
+			pr_err("Zynq Ultrascale+ MPSoC clk %s: register failed with %ld\n",
+			       clock[i].clk_name, PTR_ERR(zynqmp_data->hws[i]));
+			WARN_ON(1);
+		}
+	}
+	return 0;
+}
+
+/**
+ * zynqmp_get_clock_info() - Get clock information from firmware using PM_API
+ */
+static void zynqmp_get_clock_info(void)
+{
+	int i, ret;
+	u32 attr, type = 0;
+
+	for (i = 0; i < clock_max_idx; i++) {
+		zynqmp_pm_clock_get_name(i, clock[i].clk_name);
+		if (!strcmp(clock[i].clk_name, RESERVED_CLK_NAME))
+			continue;
+
+		ret = zynqmp_pm_clock_get_attributes(i, &attr);
+		if (ret)
+			continue;
+
+		clock[i].valid = attr & CLK_VALID_MASK;
+		clock[i].type = attr >> CLK_TYPE_SHIFT ? CLK_TYPE_EXTERNAL :
+							CLK_TYPE_OUTPUT;
+	}
+
+	/* Get topology of all clock */
+	for (i = 0; i < clock_max_idx; i++) {
+		ret = zynqmp_get_clock_type(i, &type);
+		if (ret || type != CLK_TYPE_OUTPUT)
+			continue;
+
+		ret = zynqmp_clock_get_topology(i, clock[i].node,
+						&clock[i].num_nodes);
+		if (ret)
+			continue;
+
+		ret = zynqmp_clock_get_parents(i, clock[i].parent,
+					       &clock[i].num_parents);
+		if (ret)
+			continue;
+	}
+}
+
+/**
+ * zynqmp_clk_setup() - Setup the clock framework and register clocks
+ * @np:		Device node
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_clk_setup(struct device_node *np)
+{
+	int ret;
+
+	ret = zynqmp_pm_clock_get_num_clocks(&clock_max_idx);
+	if (ret)
+		return ret;
+
+	zynqmp_data = kzalloc(sizeof(*zynqmp_data) + sizeof(*zynqmp_data) *
+						clock_max_idx, GFP_KERNEL);
+	if (!zynqmp_data)
+		return -ENOMEM;
+
+	clock = kcalloc(clock_max_idx, sizeof(*clock), GFP_KERNEL);
+	if (!clock) {
+		kfree(zynqmp_data);
+		return -ENOMEM;
+	}
+
+	zynqmp_get_clock_info();
+	zynqmp_register_clocks(np);
+
+	zynqmp_data->num = clock_max_idx;
+	of_clk_add_hw_provider(np, of_clk_hw_onecell_get, zynqmp_data);
+
+	return 0;
+}
+
+static int zynqmp_clock_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct device *dev = &pdev->dev;
+
+	eemi_ops = zynqmp_pm_get_eemi_ops();
+	if (!eemi_ops)
+		return -ENXIO;
+
+	ret = zynqmp_clk_setup(dev->of_node);
+
+	return ret;
+}
+
+static const struct of_device_id zynqmp_clock_of_match[] = {
+	{.compatible = "xlnx,zynqmp-clk"},
+	{},
+};
+MODULE_DEVICE_TABLE(of, zynqmp_clock_of_match);
+
+static struct platform_driver zynqmp_clock_driver = {
+	.driver = {
+		.name = "zynqmp_clock",
+		.of_match_table = zynqmp_clock_of_match,
+	},
+	.probe = zynqmp_clock_probe,
+};
+module_platform_driver(zynqmp_clock_driver);
diff --git a/drivers/clk/zynqmp/divider.c b/drivers/clk/zynqmp/divider.c
new file mode 100644
index 000000000000..a371c66e72ef
--- /dev/null
+++ b/drivers/clk/zynqmp/divider.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC Divider support
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ *
+ * Adjustable divider clock implementation
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "clk-zynqmp.h"
+
+/*
+ * DOC: basic adjustable divider clock that cannot gate
+ *
+ * Traits of this clock:
+ * prepare - clk_prepare only ensures that parents are prepared
+ * enable - clk_enable only ensures that parents are enabled
+ * rate - rate is adjustable.  clk->rate = ceiling(parent->rate / divisor)
+ * parent - fixed parent.  No clk_set_parent support
+ */
+
+#define to_zynqmp_clk_divider(_hw)		\
+	container_of(_hw, struct zynqmp_clk_divider, hw)
+
+#define CLK_FRAC	BIT(13) /* has a fractional parent */
+
+/**
+ * struct zynqmp_clk_divider - adjustable divider clock
+ * @hw:		handle between common and hardware-specific interfaces
+ * @flags:	Hardware specific flags
+ * @clk_id:	Id of clock
+ * @div_type:	divisor type (TYPE_DIV1 or TYPE_DIV2)
+ */
+struct zynqmp_clk_divider {
+	struct clk_hw hw;
+	u8 flags;
+	u32 clk_id;
+	u32 div_type;
+};
+
+static inline int zynqmp_divider_get_val(unsigned long parent_rate,
+					 unsigned long rate)
+{
+	return DIV_ROUND_CLOSEST(parent_rate, rate);
+}
+
+/**
+ * zynqmp_clk_divider_recalc_rate() - Recalc rate of divider clock
+ * @hw:			handle between common and hardware-specific interfaces
+ * @parent_rate:	rate of parent clock
+ *
+ * Return: 0 on success else error+reason
+ */
+static unsigned long zynqmp_clk_divider_recalc_rate(struct clk_hw *hw,
+						    unsigned long parent_rate)
+{
+	struct zynqmp_clk_divider *divider = to_zynqmp_clk_divider(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = divider->clk_id;
+	u32 div_type = divider->div_type;
+	u32 div, value;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getdivider(clk_id, &div);
+
+	if (ret)
+		pr_warn_once("%s() get divider failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	if (div_type == TYPE_DIV1)
+		value = div & 0xFFFF;
+	else
+		value = div >> 16;
+
+	return DIV_ROUND_UP_ULL(parent_rate, value);
+}
+
+/**
+ * zynqmp_clk_divider_round_rate() - Round rate of divider clock
+ * @hw:			handle between common and hardware-specific interfaces
+ * @rate:		rate of clock to be set
+ * @prate:		rate of parent clock
+ *
+ * Return: 0 on success else error+reason
+ */
+static long zynqmp_clk_divider_round_rate(struct clk_hw *hw,
+					  unsigned long rate,
+					  unsigned long *prate)
+{
+	struct zynqmp_clk_divider *divider = to_zynqmp_clk_divider(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = divider->clk_id;
+	u32 div_type = divider->div_type;
+	u32 bestdiv;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	/* if read only, just return current value */
+	if (divider->flags & CLK_DIVIDER_READ_ONLY) {
+		ret = eemi_ops->clock_getdivider(clk_id, &bestdiv);
+
+		if (ret)
+			pr_warn_once("%s() get divider failed for %s, ret = %d\n",
+				     __func__, clk_name, ret);
+		if (div_type == TYPE_DIV1)
+			bestdiv = bestdiv & 0xFFFF;
+		else
+			bestdiv  = bestdiv >> 16;
+
+		return DIV_ROUND_UP_ULL((u64)*prate, bestdiv);
+	}
+
+	bestdiv = zynqmp_divider_get_val(*prate, rate);
+
+	if ((clk_hw_get_flags(hw) & CLK_SET_RATE_PARENT) &&
+	    (divider->flags & CLK_FRAC))
+		bestdiv = rate % *prate ? 1 : bestdiv;
+	*prate = rate * bestdiv;
+
+	return rate;
+}
+
+/**
+ * zynqmp_clk_divider_set_rate() - Set rate of divider clock
+ * @hw:			handle between common and hardware-specific interfaces
+ * @rate:		rate of clock to be set
+ * @parent_rate:	rate of parent clock
+ *
+ * Return: 0 on success else error+reason
+ */
+static int zynqmp_clk_divider_set_rate(struct clk_hw *hw, unsigned long rate,
+				       unsigned long parent_rate)
+{
+	struct zynqmp_clk_divider *divider = to_zynqmp_clk_divider(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = divider->clk_id;
+	u32 div_type = divider->div_type;
+	u32 value, div;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	value = zynqmp_divider_get_val(parent_rate, rate);
+	if (div_type == TYPE_DIV1) {
+		div = value & 0xFFFF;
+		div |= 0xffff << 16;
+	} else {
+		div = 0xffff;
+		div |= value << 16;
+	}
+
+	ret = eemi_ops->clock_setdivider(clk_id, div);
+
+	if (ret)
+		pr_warn_once("%s() set divider failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret;
+}
+
+static const struct clk_ops zynqmp_clk_divider_ops = {
+	.recalc_rate = zynqmp_clk_divider_recalc_rate,
+	.round_rate = zynqmp_clk_divider_round_rate,
+	.set_rate = zynqmp_clk_divider_set_rate,
+};
+
+/**
+ * zynqmp_clk_register_divider() - Register a divider clock
+ * @name:		Name of this clock
+ * @clk_id:		Id of clock
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware to registered clock divider
+ */
+struct clk_hw *zynqmp_clk_register_divider(const char *name,
+					   u32 clk_id,
+					   const char * const *parents,
+					   u8 num_parents,
+					   const struct clock_topology *nodes)
+{
+	struct zynqmp_clk_divider *div;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	/* allocate the divider */
+	div = kzalloc(sizeof(*div), GFP_KERNEL);
+	if (!div)
+		return ERR_PTR(-ENOMEM);
+
+	init.name = name;
+	init.ops = &zynqmp_clk_divider_ops;
+	init.flags = nodes->flag;
+	init.parent_names = parents;
+	init.num_parents = 1;
+
+	/* struct clk_divider assignments */
+	div->flags = nodes->type_flag;
+	div->hw.init = &init;
+	div->clk_id = clk_id;
+	div->div_type = nodes->type;
+
+	hw = &div->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(div);
+		hw = ERR_PTR(ret);
+	}
+
+	return hw;
+}
+EXPORT_SYMBOL_GPL(zynqmp_clk_register_divider);
diff --git a/drivers/clk/zynqmp/pll.c b/drivers/clk/zynqmp/pll.c
new file mode 100644
index 000000000000..a541397a172c
--- /dev/null
+++ b/drivers/clk/zynqmp/pll.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Zynq UltraScale+ MPSoC PLL driver
+ *
+ *  Copyright (C) 2016-2018 Xilinx
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include "clk-zynqmp.h"
+
+/**
+ * struct zynqmp_pll - PLL clock
+ * @hw:		Handle between common and hardware-specific interfaces
+ * @clk_id:	PLL clock ID
+ */
+struct zynqmp_pll {
+	struct clk_hw hw;
+	u32 clk_id;
+};
+
+#define to_zynqmp_pll(_hw)	container_of(_hw, struct zynqmp_pll, hw)
+
+#define PLL_FBDIV_MIN	25
+#define PLL_FBDIV_MAX	125
+
+#define PS_PLL_VCO_MIN 1500000000
+#define PS_PLL_VCO_MAX 3000000000UL
+
+enum pll_mode {
+	PLL_MODE_INT,
+	PLL_MODE_FRAC,
+};
+
+#define FRAC_OFFSET 0x8
+#define PLLFCFG_FRAC_EN	BIT(31)
+#define FRAC_DIV  BIT(16)  /* 2^16 */
+
+/**
+ * zynqmp_pll_get_mode() - Get mode of PLL
+ * @hw:		Handle between common and hardware-specific interfaces
+ *
+ * Return: Mode of PLL
+ */
+static inline enum pll_mode zynqmp_pll_get_mode(struct clk_hw *hw)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	u32 clk_id = clk->clk_id;
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->ioctl(0, IOCTL_GET_PLL_FRAC_MODE, clk_id, 0,
+			      ret_payload);
+	if (ret)
+		pr_warn_once("%s() PLL get frac mode failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret_payload[1];
+}
+
+/**
+ * zynqmp_pll_set_mode() - Set the PLL mode
+ * @hw:		Handle between common and hardware-specific interfaces
+ * @on:		Flag to determine the mode
+ */
+static inline void zynqmp_pll_set_mode(struct clk_hw *hw, bool on)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	u32 clk_id = clk->clk_id;
+	const char *clk_name = clk_hw_get_name(hw);
+	int ret;
+	u32 mode;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	if (on)
+		mode = PLL_MODE_FRAC;
+	else
+		mode = PLL_MODE_INT;
+
+	ret = eemi_ops->ioctl(0, IOCTL_SET_PLL_FRAC_MODE, clk_id, mode, NULL);
+	if (ret)
+		pr_warn_once("%s() PLL set frac mode failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+}
+
+/**
+ * zynqmp_pll_round_rate() - Round a clock frequency
+ * @hw:		Handle between common and hardware-specific interfaces
+ * @rate:	Desired clock frequency
+ * @prate:	Clock frequency of parent clock
+ *
+ * Return: Frequency closest to @rate the hardware can generate
+ */
+static long zynqmp_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+				  unsigned long *prate)
+{
+	u32 fbdiv;
+	long rate_div, f;
+
+	/* Enable the fractional mode if needed */
+	rate_div = (rate * FRAC_DIV) / *prate;
+	f = rate_div % FRAC_DIV;
+	zynqmp_pll_set_mode(hw, !!f);
+
+	if (zynqmp_pll_get_mode(hw) == PLL_MODE_FRAC) {
+		if (rate > PS_PLL_VCO_MAX) {
+			fbdiv = rate / PS_PLL_VCO_MAX;
+			rate = rate / (fbdiv + 1);
+		}
+		if (rate < PS_PLL_VCO_MIN) {
+			fbdiv = DIV_ROUND_UP(PS_PLL_VCO_MIN, rate);
+			rate = rate * fbdiv;
+		}
+		return rate;
+	}
+
+	fbdiv = DIV_ROUND_CLOSEST(rate, *prate);
+	fbdiv = clamp_t(u32, fbdiv, PLL_FBDIV_MIN, PLL_FBDIV_MAX);
+	return *prate * fbdiv;
+}
+
+/**
+ * zynqmp_pll_recalc_rate() - Recalculate clock frequency
+ * @hw:			Handle between common and hardware-specific interfaces
+ * @parent_rate:	Clock frequency of parent clock
+ *
+ * Return: Current clock frequency
+ */
+static unsigned long zynqmp_pll_recalc_rate(struct clk_hw *hw,
+					    unsigned long parent_rate)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	u32 clk_id = clk->clk_id;
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 fbdiv, data;
+	unsigned long rate, frac;
+	u32 ret_payload[PAYLOAD_ARG_CNT];
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getdivider(clk_id, &fbdiv);
+	if (ret)
+		pr_warn_once("%s() get divider failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	rate =  parent_rate * fbdiv;
+	if (zynqmp_pll_get_mode(hw) == PLL_MODE_FRAC) {
+		eemi_ops->ioctl(0, IOCTL_GET_PLL_FRAC_DATA, clk_id, 0,
+				ret_payload);
+		data = ret_payload[1];
+		frac = (parent_rate * data) / FRAC_DIV;
+		rate = rate + frac;
+	}
+
+	return rate;
+}
+
+/**
+ * zynqmp_pll_set_rate() - Set rate of PLL
+ * @hw:			Handle between common and hardware-specific interfaces
+ * @rate:		Frequency of clock to be set
+ * @parent_rate:	Clock frequency of parent clock
+ *
+ * Set PLL divider to set desired rate.
+ *
+ * Returns:            rate which is set on success else error code
+ */
+static int zynqmp_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+			       unsigned long parent_rate)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	u32 clk_id = clk->clk_id;
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 fbdiv;
+	long rate_div, frac, m, f;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	if (zynqmp_pll_get_mode(hw) == PLL_MODE_FRAC) {
+		rate_div = (rate * FRAC_DIV) / parent_rate;
+		m = rate_div / FRAC_DIV;
+		f = rate_div % FRAC_DIV;
+		m = clamp_t(u32, m, (PLL_FBDIV_MIN), (PLL_FBDIV_MAX));
+		rate = parent_rate * m;
+		frac = (parent_rate * f) / FRAC_DIV;
+
+		ret = eemi_ops->clock_setdivider(clk_id, m);
+		if (ret)
+			pr_warn_once("%s() set divider failed for %s, ret = %d\n",
+				     __func__, clk_name, ret);
+
+		eemi_ops->ioctl(0, IOCTL_SET_PLL_FRAC_DATA, clk_id, f, NULL);
+
+		return rate + frac;
+	}
+
+	fbdiv = DIV_ROUND_CLOSEST(rate, parent_rate);
+	fbdiv = clamp_t(u32, fbdiv, PLL_FBDIV_MIN, PLL_FBDIV_MAX);
+	ret = eemi_ops->clock_setdivider(clk_id, fbdiv);
+	if (ret)
+		pr_warn_once("%s() set divider failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return parent_rate * fbdiv;
+}
+
+/**
+ * zynqmp_pll_is_enabled() - Check if a clock is enabled
+ * @hw:		Handle between common and hardware-specific interfaces
+ *
+ * Return: 1 if the clock is enabled, 0 otherwise
+ */
+static int zynqmp_pll_is_enabled(struct clk_hw *hw)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = clk->clk_id;
+	unsigned int state;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	ret = eemi_ops->clock_getstate(clk_id, &state);
+	if (ret) {
+		pr_warn_once("%s() clock get state failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+		return -EIO;
+	}
+
+	return state ? 1 : 0;
+}
+
+/**
+ * zynqmp_pll_enable() - Enable clock
+ * @hw:		Handle between common and hardware-specific interfaces
+ *
+ * Return: 0 on success else error code
+ */
+static int zynqmp_pll_enable(struct clk_hw *hw)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = clk->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	if (zynqmp_pll_is_enabled(hw))
+		return 0;
+
+	ret = eemi_ops->clock_enable(clk_id);
+	if (ret)
+		pr_warn_once("%s() clock enable failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+
+	return ret;
+}
+
+/**
+ * zynqmp_pll_disable() - Disable clock
+ * @hw:		Handle between common and hardware-specific interfaces
+ */
+static void zynqmp_pll_disable(struct clk_hw *hw)
+{
+	struct zynqmp_pll *clk = to_zynqmp_pll(hw);
+	const char *clk_name = clk_hw_get_name(hw);
+	u32 clk_id = clk->clk_id;
+	int ret;
+	const struct zynqmp_eemi_ops *eemi_ops = zynqmp_pm_get_eemi_ops();
+
+	if (!zynqmp_pll_is_enabled(hw))
+		return;
+
+	ret = eemi_ops->clock_disable(clk_id);
+	if (ret)
+		pr_warn_once("%s() clock disable failed for %s, ret = %d\n",
+			     __func__, clk_name, ret);
+}
+
+static const struct clk_ops zynqmp_pll_ops = {
+	.enable = zynqmp_pll_enable,
+	.disable = zynqmp_pll_disable,
+	.is_enabled = zynqmp_pll_is_enabled,
+	.round_rate = zynqmp_pll_round_rate,
+	.recalc_rate = zynqmp_pll_recalc_rate,
+	.set_rate = zynqmp_pll_set_rate,
+};
+
+/**
+ * zynqmp_clk_register_pll() - Register PLL with the clock framework
+ * @name:		PLL name
+ * @clk_id:		Clock ID
+ * @parents:		Name of this clock's parents
+ * @num_parents:	Number of parents
+ * @nodes:		Clock topology node
+ *
+ * Return: clock hardware to the registered clock
+ */
+struct clk_hw *zynqmp_clk_register_pll(const char *name, u32 clk_id,
+				       const char * const *parents,
+				       u8 num_parents,
+				       const struct clock_topology *nodes)
+{
+	struct zynqmp_pll *pll;
+	struct clk_hw *hw;
+	struct clk_init_data init;
+	int ret;
+
+	init.name = name;
+	init.ops = &zynqmp_pll_ops;
+	init.flags = nodes->flag;
+	init.parent_names = parents;
+	init.num_parents = 1;
+
+	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
+	if (!pll)
+		return ERR_PTR(-ENOMEM);
+
+	pll->hw.init = &init;
+	pll->clk_id = clk_id;
+
+	hw = &pll->hw;
+	ret = clk_hw_register(NULL, hw);
+	if (ret) {
+		kfree(pll);
+		return ERR_PTR(ret);
+	}
+
+	clk_hw_set_rate_range(hw, PS_PLL_VCO_MIN, PS_PLL_VCO_MAX);
+	if (ret < 0)
+		pr_err("%s:ERROR clk_set_rate_range failed %d\n", name, ret);
+
+	return hw;
+}
diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h
index 7a9db0861803..3c3c28eff56a 100644
--- a/include/linux/firmware/xlnx-zynqmp.h
+++ b/include/linux/firmware/xlnx-zynqmp.h
@@ -72,6 +72,7 @@ enum pm_query_id {
 	PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS,
 	PM_QID_CLOCK_GET_PARENTS,
 	PM_QID_CLOCK_GET_ATTRIBUTES,
+	PM_QID_CLOCK_GET_NUM_CLOCKS = 12,
 };
 
 /**
-- 
cgit v1.2.3


From 7ed1d91a9ed61afe438ba51cbf49cb567ab7dca8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 24 Sep 2018 13:06:58 +0200
Subject: dma-mapping: translate __GFP_NOFAIL to DMA_ATTR_NO_WARN

This allows all dma_map_ops instances to entirely rely on
DMA_ATTR_NO_WARN going forward.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Robin Murphy <robin.murphy@arm.com>
---
 include/linux/dma-mapping.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 547a48bcfa3d..15bd41447025 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -558,9 +558,11 @@ static inline void dma_free_attrs(struct device *dev, size_t size,
 }
 
 static inline void *dma_alloc_coherent(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flag)
+		dma_addr_t *dma_handle, gfp_t gfp)
 {
-	return dma_alloc_attrs(dev, size, dma_handle, flag, 0);
+
+	return dma_alloc_attrs(dev, size, dma_handle, gfp,
+			(gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0);
 }
 
 static inline void dma_free_coherent(struct device *dev, size_t size,
@@ -794,8 +796,12 @@ static inline void dmam_release_declared_memory(struct device *dev)
 static inline void *dma_alloc_wc(struct device *dev, size_t size,
 				 dma_addr_t *dma_addr, gfp_t gfp)
 {
-	return dma_alloc_attrs(dev, size, dma_addr, gfp,
-			       DMA_ATTR_WRITE_COMBINE);
+	unsigned long attrs = DMA_ATTR_NO_WARN;
+
+	if (gfp & __GFP_NOWARN)
+		attrs |= DMA_ATTR_NO_WARN;
+
+	return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs);
 }
 #ifndef dma_alloc_writecombine
 #define dma_alloc_writecombine dma_alloc_wc
-- 
cgit v1.2.3


From d7b6801673f95e5f72efd3ffba1bcbb606883049 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <mb@lightnvm.io>
Date: Tue, 9 Oct 2018 13:11:32 +0200
Subject: lightnvm: combine 1.2 and 2.0 command flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add nvm_set_flags helper to enable core to appropriately
set the command flags for read/write/erase depending on which version
a drive supports.

The flags arguments can be distilled into the access hint,
scrambling, and program/erase suspend. Replace the access hint with
a "is_seq" parameter. The rest of the flags are dependent on the
command opcode, which is trivial to detect and set.

Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Reviewed-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/core.c          | 20 ++++++++++++++++++++
 drivers/lightnvm/pblk-core.c     | 13 ++++---------
 drivers/lightnvm/pblk-read.c     |  8 +-------
 drivers/lightnvm/pblk-recovery.c | 14 ++++----------
 drivers/lightnvm/pblk-write.c    |  2 +-
 drivers/lightnvm/pblk.h          | 38 --------------------------------------
 include/linux/lightnvm.h         |  2 ++
 7 files changed, 32 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 60aa7bc5a630..68553c7ae937 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -752,6 +752,24 @@ int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
 }
 EXPORT_SYMBOL(nvm_set_tgt_bb_tbl);
 
+static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
+{
+	int flags = 0;
+
+	if (geo->version == NVM_OCSSD_SPEC_20)
+		return 0;
+
+	if (rqd->is_seq)
+		flags |= geo->pln_mode >> 1;
+
+	if (rqd->opcode == NVM_OP_PREAD)
+		flags |= (NVM_IO_SCRAMBLE_ENABLE | NVM_IO_SUSPEND);
+	else if (rqd->opcode == NVM_OP_PWRITE)
+		flags |= NVM_IO_SCRAMBLE_ENABLE;
+
+	return flags;
+}
+
 int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
@@ -763,6 +781,7 @@ int nvm_submit_io(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 	nvm_rq_tgt_to_dev(tgt_dev, rqd);
 
 	rqd->dev = tgt_dev;
+	rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
 
 	/* In case of error, fail with right address format */
 	ret = dev->ops->submit_io(dev, rqd);
@@ -783,6 +802,7 @@ int nvm_submit_io_sync(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 	nvm_rq_tgt_to_dev(tgt_dev, rqd);
 
 	rqd->dev = tgt_dev;
+	rqd->flags = nvm_set_flags(&tgt_dev->geo, rqd);
 
 	/* In case of error, fail with right address format */
 	ret = dev->ops->submit_io_sync(dev, rqd);
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 00984b486fea..72acf2f6dbd6 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -688,7 +688,7 @@ next_rq:
 	if (dir == PBLK_WRITE) {
 		struct pblk_sec_meta *meta_list = rqd.meta_list;
 
-		rqd.flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
+		rqd.is_seq = 1;
 		for (i = 0; i < rqd.nr_ppas; ) {
 			spin_lock(&line->lock);
 			paddr = __pblk_alloc_page(pblk, line, min);
@@ -703,11 +703,9 @@ next_rq:
 		for (i = 0; i < rqd.nr_ppas; ) {
 			struct ppa_addr ppa = addr_to_gen_ppa(pblk, paddr, id);
 			int pos = pblk_ppa_to_pos(geo, ppa);
-			int read_type = PBLK_READ_RANDOM;
 
 			if (pblk_io_aligned(pblk, rq_ppas))
-				read_type = PBLK_READ_SEQUENTIAL;
-			rqd.flags = pblk_set_read_mode(pblk, read_type);
+				rqd.is_seq = 1;
 
 			while (test_bit(pos, line->blk_bitmap)) {
 				paddr += min;
@@ -787,17 +785,14 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 	__le64 *lba_list = NULL;
 	int i, ret;
 	int cmd_op, bio_op;
-	int flags;
 
 	if (dir == PBLK_WRITE) {
 		bio_op = REQ_OP_WRITE;
 		cmd_op = NVM_OP_PWRITE;
-		flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
 		lba_list = emeta_to_lbas(pblk, line->emeta->buf);
 	} else if (dir == PBLK_READ_RECOV || dir == PBLK_READ) {
 		bio_op = REQ_OP_READ;
 		cmd_op = NVM_OP_PREAD;
-		flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
 	} else
 		return -EINVAL;
 
@@ -822,7 +817,7 @@ static int pblk_line_submit_smeta_io(struct pblk *pblk, struct pblk_line *line,
 
 	rqd.bio = bio;
 	rqd.opcode = cmd_op;
-	rqd.flags = flags;
+	rqd.is_seq = 1;
 	rqd.nr_ppas = lm->smeta_sec;
 
 	for (i = 0; i < lm->smeta_sec; i++, paddr++) {
@@ -885,7 +880,7 @@ static void pblk_setup_e_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	rqd->opcode = NVM_OP_ERASE;
 	rqd->ppa_addr = ppa;
 	rqd->nr_ppas = 1;
-	rqd->flags = pblk_set_progr_mode(pblk, PBLK_ERASE);
+	rqd->is_seq = 1;
 	rqd->bio = NULL;
 }
 
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 5a46d7f9302f..07b7f82c7595 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -93,9 +93,7 @@ next:
 	}
 
 	if (pblk_io_aligned(pblk, nr_secs))
-		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
-	else
-		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+		rqd->is_seq = 1;
 
 #ifdef CONFIG_NVM_PBLK_DEBUG
 	atomic_long_add(nr_secs, &pblk->inflight_reads);
@@ -344,7 +342,6 @@ static int pblk_setup_partial_read(struct pblk *pblk, struct nvm_rq *rqd,
 
 	rqd->bio = new_bio;
 	rqd->nr_ppas = nr_holes;
-	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 
 	pr_ctx->ppa_ptr = NULL;
 	pr_ctx->orig_bio = bio;
@@ -438,8 +435,6 @@ retry:
 	} else {
 		rqd->ppa_addr = ppa;
 	}
-
-	rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 }
 
 int pblk_submit_read(struct pblk *pblk, struct bio *bio)
@@ -663,7 +658,6 @@ int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
 
 	rqd.opcode = NVM_OP_PREAD;
 	rqd.nr_ppas = gc_rq->secs_to_gc;
-	rqd.flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
 	rqd.bio = bio;
 
 	if (pblk_submit_io_sync(pblk, &rqd)) {
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index e232e47e1353..cf629ab016ba 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -159,9 +159,7 @@ next_read_rq:
 	rqd->dma_meta_list = dma_meta_list;
 
 	if (pblk_io_aligned(pblk, rq_ppas))
-		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
-	else
-		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+		rqd->is_seq = 1;
 
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
@@ -302,7 +300,7 @@ next_pad_rq:
 
 	rqd->bio = bio;
 	rqd->opcode = NVM_OP_PWRITE;
-	rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
+	rqd->is_seq = 1;
 	rqd->meta_list = meta_list;
 	rqd->nr_ppas = rq_ppas;
 	rqd->ppa_list = ppa_list;
@@ -436,9 +434,7 @@ next_rq:
 	rqd->dma_meta_list = dma_meta_list;
 
 	if (pblk_io_aligned(pblk, rq_ppas))
-		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
-	else
-		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+		rqd->is_seq = 1;
 
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
@@ -567,9 +563,7 @@ next_rq:
 	rqd->dma_meta_list = dma_meta_list;
 
 	if (pblk_io_aligned(pblk, rq_ppas))
-		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_SEQUENTIAL);
-	else
-		rqd->flags = pblk_set_read_mode(pblk, PBLK_READ_RANDOM);
+		rqd->is_seq = 1;
 
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index ee774a86cf1e..508c63701eda 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -302,7 +302,7 @@ static int pblk_alloc_w_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	/* Setup write request */
 	rqd->opcode = NVM_OP_PWRITE;
 	rqd->nr_ppas = nr_secs;
-	rqd->flags = pblk_set_progr_mode(pblk, PBLK_WRITE);
+	rqd->is_seq = 1;
 	rqd->private = pblk;
 	rqd->end_io = end_io;
 
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 4760af7b6499..48b3035df3c4 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -1255,44 +1255,6 @@ static inline u32 pblk_calc_emeta_crc(struct pblk *pblk,
 	return crc;
 }
 
-static inline int pblk_set_progr_mode(struct pblk *pblk, int type)
-{
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-	int flags;
-
-	if (geo->version == NVM_OCSSD_SPEC_20)
-		return 0;
-
-	flags = geo->pln_mode >> 1;
-
-	if (type == PBLK_WRITE)
-		flags |= NVM_IO_SCRAMBLE_ENABLE;
-
-	return flags;
-}
-
-enum {
-	PBLK_READ_RANDOM	= 0,
-	PBLK_READ_SEQUENTIAL	= 1,
-};
-
-static inline int pblk_set_read_mode(struct pblk *pblk, int type)
-{
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-	int flags;
-
-	if (geo->version == NVM_OCSSD_SPEC_20)
-		return 0;
-
-	flags = NVM_IO_SUSPEND | NVM_IO_SCRAMBLE_ENABLE;
-	if (type == PBLK_READ_SEQUENTIAL)
-		flags |= geo->pln_mode >> 1;
-
-	return flags;
-}
-
 static inline int pblk_io_aligned(struct pblk *pblk, int nr_secs)
 {
 	return !(nr_secs % pblk->min_write_pgs);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index e9e0d1c7eaf5..8acc2fe277d6 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -305,6 +305,8 @@ struct nvm_rq {
 	u64 ppa_status; /* ppa media status */
 	int error;
 
+	int is_seq; /* Sequential hint flag. 1.2 only */
+
 	void *private;
 };
 
-- 
cgit v1.2.3


From 656e33ca3d405196f94133babc4e38454a49cb73 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <mb@lightnvm.io>
Date: Tue, 9 Oct 2018 13:11:34 +0200
Subject: lightnvm: move device L2P detection to core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A 1.2 device is able to manage the logical to physical mapping
table internally or leave it to the host.

A target only supports one of those approaches, and therefore must
check on initialization. Move this check to core to avoid each target
implement the check.

Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/core.c      | 5 +++++
 drivers/lightnvm/pblk-init.c | 7 -------
 include/linux/lightnvm.h     | 6 ++++++
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 68553c7ae937..964352720a03 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -355,6 +355,11 @@ static int nvm_create_tgt(struct nvm_dev *dev, struct nvm_ioctl_create *create)
 		return -EINVAL;
 	}
 
+	if ((tt->flags & NVM_TGT_F_HOST_L2P) != (dev->geo.dom & NVM_RSP_L2P)) {
+		pr_err("nvm: device is incompatible with target L2P type.\n");
+		return -EINVAL;
+	}
+
 	if (nvm_target_exists(create->tgtname)) {
 		pr_err("nvm: target name already exists (%s)\n",
 							create->tgtname);
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 537e98f2b24a..039f62d05e84 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -1210,13 +1210,6 @@ static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (geo->version == NVM_OCSSD_SPEC_12 && geo->dom & NVM_RSP_L2P) {
-		pblk_err(pblk, "host-side L2P table not supported. (%x)\n",
-							geo->dom);
-		kfree(pblk);
-		return ERR_PTR(-EINVAL);
-	}
-
 	spin_lock_init(&pblk->resubmit_lock);
 	spin_lock_init(&pblk->trans_lock);
 	spin_lock_init(&pblk->lock);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 8acc2fe277d6..f4a84694e5e2 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -495,9 +495,15 @@ typedef void (nvm_tgt_exit_fn)(void *, bool);
 typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *);
 typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *);
 
+enum {
+	NVM_TGT_F_DEV_L2P = 0,
+	NVM_TGT_F_HOST_L2P = 1 << 0,
+};
+
 struct nvm_tgt_type {
 	const char *name;
 	unsigned int version[3];
+	int flags;
 
 	/* target entry points */
 	nvm_tgt_make_rq_fn *make_rq;
-- 
cgit v1.2.3


From aff3fb18f957de93e629c7d3d2c4ef1f360aa511 Mon Sep 17 00:00:00 2001
From: Matias Bjørling <mb@lightnvm.io>
Date: Tue, 9 Oct 2018 13:11:36 +0200
Subject: lightnvm: move bad block and chunk state logic to core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

pblk implements two data paths for recovery line state. One for 1.2
and another for 2.0, instead of having pblk implement these, combine
them in the core to reduce complexity and make available to other
targets.

The new interface will adhere to the 2.0 chunk definition,
including managing open chunks with an active write pointer. To provide
this interface, a 1.2 device recovers the state of the chunks by
manually detecting if a chunk is either free/open/close/offline, and if
open, scanning the flash pages sequentially to find the next writeable
page. This process takes on average ~10 seconds on a device with 64 dies,
1024 blocks and 60us read access time. The process can be parallelized
but is left out for maintenance simplicity, as the 1.2 specification is
deprecated. For 2.0 devices, the logic is maintained internally in the
drive and retrieved through the 2.0 interface.

Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/core.c      | 309 +++++++++++++++++++++++++++++++++++--------
 drivers/lightnvm/pblk-core.c |   6 +-
 drivers/lightnvm/pblk-init.c | 116 +---------------
 drivers/lightnvm/pblk.h      |   2 +-
 drivers/nvme/host/lightnvm.c |   4 +-
 include/linux/lightnvm.h     |  15 +--
 6 files changed, 265 insertions(+), 187 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 964352720a03..8df188e0767e 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -717,46 +717,6 @@ static void nvm_free_rqd_ppalist(struct nvm_tgt_dev *tgt_dev,
 	nvm_dev_dma_free(tgt_dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
 }
 
-int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct nvm_chk_meta *meta,
-		struct ppa_addr ppa, int nchks)
-{
-	struct nvm_dev *dev = tgt_dev->parent;
-
-	nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
-
-	return dev->ops->get_chk_meta(tgt_dev->parent, meta,
-						(sector_t)ppa.ppa, nchks);
-}
-EXPORT_SYMBOL(nvm_get_chunk_meta);
-
-int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
-		       int nr_ppas, int type)
-{
-	struct nvm_dev *dev = tgt_dev->parent;
-	struct nvm_rq rqd;
-	int ret;
-
-	if (nr_ppas > NVM_MAX_VLBA) {
-		pr_err("nvm: unable to update all blocks atomically\n");
-		return -EINVAL;
-	}
-
-	memset(&rqd, 0, sizeof(struct nvm_rq));
-
-	nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
-	nvm_rq_tgt_to_dev(tgt_dev, &rqd);
-
-	ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
-	nvm_free_rqd_ppalist(tgt_dev, &rqd);
-	if (ret) {
-		pr_err("nvm: failed bb mark\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(nvm_set_tgt_bb_tbl);
-
 static int nvm_set_flags(struct nvm_geo *geo, struct nvm_rq *rqd)
 {
 	int flags = 0;
@@ -830,27 +790,159 @@ void nvm_end_io(struct nvm_rq *rqd)
 }
 EXPORT_SYMBOL(nvm_end_io);
 
+static int nvm_submit_io_sync_raw(struct nvm_dev *dev, struct nvm_rq *rqd)
+{
+	if (!dev->ops->submit_io_sync)
+		return -ENODEV;
+
+	rqd->flags = nvm_set_flags(&dev->geo, rqd);
+
+	return dev->ops->submit_io_sync(dev, rqd);
+}
+
+static int nvm_bb_chunk_sense(struct nvm_dev *dev, struct ppa_addr ppa)
+{
+	struct nvm_rq rqd = { NULL };
+	struct bio bio;
+	struct bio_vec bio_vec;
+	struct page *page;
+	int ret;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	bio_init(&bio, &bio_vec, 1);
+	bio_add_page(&bio, page, PAGE_SIZE, 0);
+	bio_set_op_attrs(&bio, REQ_OP_READ, 0);
+
+	rqd.bio = &bio;
+	rqd.opcode = NVM_OP_PREAD;
+	rqd.is_seq = 1;
+	rqd.nr_ppas = 1;
+	rqd.ppa_addr = generic_to_dev_addr(dev, ppa);
+
+	ret = nvm_submit_io_sync_raw(dev, &rqd);
+	if (ret)
+		return ret;
+
+	__free_page(page);
+
+	return rqd.error;
+}
+
 /*
- * folds a bad block list from its plane representation to its virtual
- * block representation. The fold is done in place and reduced size is
- * returned.
- *
- * If any of the planes status are bad or grown bad block, the virtual block
- * is marked bad. If not bad, the first plane state acts as the block state.
+ * Scans a 1.2 chunk first and last page to determine if its state.
+ * If the chunk is found to be open, also scan it to update the write
+ * pointer.
  */
-int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
+static int nvm_bb_chunk_scan(struct nvm_dev *dev, struct ppa_addr ppa,
+			     struct nvm_chk_meta *meta)
 {
 	struct nvm_geo *geo = &dev->geo;
-	int blk, offset, pl, blktype;
+	int ret, pg, pl;
 
-	if (nr_blks != geo->num_chk * geo->pln_mode)
-		return -EINVAL;
+	/* sense first page */
+	ret = nvm_bb_chunk_sense(dev, ppa);
+	if (ret < 0) /* io error */
+		return ret;
+	else if (ret == 0) /* valid data */
+		meta->state = NVM_CHK_ST_OPEN;
+	else if (ret > 0) {
+		/*
+		 * If empty page, the chunk is free, else it is an
+		 * actual io error. In that case, mark it offline.
+		 */
+		switch (ret) {
+		case NVM_RSP_ERR_EMPTYPAGE:
+			meta->state = NVM_CHK_ST_FREE;
+			return 0;
+		case NVM_RSP_ERR_FAILCRC:
+		case NVM_RSP_ERR_FAILECC:
+		case NVM_RSP_WARN_HIGHECC:
+			meta->state = NVM_CHK_ST_OPEN;
+			goto scan;
+		default:
+			return -ret; /* other io error */
+		}
+	}
+
+	/* sense last page */
+	ppa.g.pg = geo->num_pg - 1;
+	ppa.g.pl = geo->num_pln - 1;
+
+	ret = nvm_bb_chunk_sense(dev, ppa);
+	if (ret < 0) /* io error */
+		return ret;
+	else if (ret == 0) { /* Chunk fully written */
+		meta->state = NVM_CHK_ST_CLOSED;
+		meta->wp = geo->clba;
+		return 0;
+	} else if (ret > 0) {
+		switch (ret) {
+		case NVM_RSP_ERR_EMPTYPAGE:
+		case NVM_RSP_ERR_FAILCRC:
+		case NVM_RSP_ERR_FAILECC:
+		case NVM_RSP_WARN_HIGHECC:
+			meta->state = NVM_CHK_ST_OPEN;
+			break;
+		default:
+			return -ret; /* other io error */
+		}
+	}
+
+scan:
+	/*
+	 * chunk is open, we scan sequentially to update the write pointer.
+	 * We make the assumption that targets write data across all planes
+	 * before moving to the next page.
+	 */
+	for (pg = 0; pg < geo->num_pg; pg++) {
+		for (pl = 0; pl < geo->num_pln; pl++) {
+			ppa.g.pg = pg;
+			ppa.g.pl = pl;
+
+			ret = nvm_bb_chunk_sense(dev, ppa);
+			if (ret < 0) /* io error */
+				return ret;
+			else if (ret == 0) {
+				meta->wp += geo->ws_min;
+			} else if (ret > 0) {
+				switch (ret) {
+				case NVM_RSP_ERR_EMPTYPAGE:
+					return 0;
+				case NVM_RSP_ERR_FAILCRC:
+				case NVM_RSP_ERR_FAILECC:
+				case NVM_RSP_WARN_HIGHECC:
+					meta->wp += geo->ws_min;
+					break;
+				default:
+					return -ret; /* other io error */
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * folds a bad block list from its plane representation to its
+ * chunk representation.
+ *
+ * If any of the planes status are bad or grown bad, the chunk is marked
+ * offline. If not bad, the first plane state acts as the chunk state.
+ */
+static int nvm_bb_to_chunk(struct nvm_dev *dev, struct ppa_addr ppa,
+			   u8 *blks, int nr_blks, struct nvm_chk_meta *meta)
+{
+	struct nvm_geo *geo = &dev->geo;
+	int ret, blk, pl, offset, blktype;
 
 	for (blk = 0; blk < geo->num_chk; blk++) {
 		offset = blk * geo->pln_mode;
 		blktype = blks[offset];
 
-		/* Bad blocks on any planes take precedence over other types */
 		for (pl = 0; pl < geo->pln_mode; pl++) {
 			if (blks[offset + pl] &
 					(NVM_BLK_T_BAD|NVM_BLK_T_GRWN_BAD)) {
@@ -859,23 +951,124 @@ int nvm_bb_tbl_fold(struct nvm_dev *dev, u8 *blks, int nr_blks)
 			}
 		}
 
-		blks[blk] = blktype;
+		ppa.g.blk = blk;
+
+		meta->wp = 0;
+		meta->type = NVM_CHK_TP_W_SEQ;
+		meta->wi = 0;
+		meta->slba = generic_to_dev_addr(dev, ppa).ppa;
+		meta->cnlb = dev->geo.clba;
+
+		if (blktype == NVM_BLK_T_FREE) {
+			ret = nvm_bb_chunk_scan(dev, ppa, meta);
+			if (ret)
+				return ret;
+		} else {
+			meta->state = NVM_CHK_ST_OFFLINE;
+		}
+
+		meta++;
 	}
 
-	return geo->num_chk;
+	return 0;
+}
+
+static int nvm_get_bb_meta(struct nvm_dev *dev, sector_t slba,
+			   int nchks, struct nvm_chk_meta *meta)
+{
+	struct nvm_geo *geo = &dev->geo;
+	struct ppa_addr ppa;
+	u8 *blks;
+	int ch, lun, nr_blks;
+	int ret;
+
+	ppa.ppa = slba;
+	ppa = dev_to_generic_addr(dev, ppa);
+
+	if (ppa.g.blk != 0)
+		return -EINVAL;
+
+	if ((nchks % geo->num_chk) != 0)
+		return -EINVAL;
+
+	nr_blks = geo->num_chk * geo->pln_mode;
+
+	blks = kmalloc(nr_blks, GFP_KERNEL);
+	if (!blks)
+		return -ENOMEM;
+
+	for (ch = ppa.g.ch; ch < geo->num_ch; ch++) {
+		for (lun = ppa.g.lun; lun < geo->num_lun; lun++) {
+			struct ppa_addr ppa_gen, ppa_dev;
+
+			if (!nchks)
+				goto done;
+
+			ppa_gen.ppa = 0;
+			ppa_gen.g.ch = ch;
+			ppa_gen.g.lun = lun;
+			ppa_dev = generic_to_dev_addr(dev, ppa_gen);
+
+			ret = dev->ops->get_bb_tbl(dev, ppa_dev, blks);
+			if (ret)
+				goto done;
+
+			ret = nvm_bb_to_chunk(dev, ppa_gen, blks, nr_blks,
+									meta);
+			if (ret)
+				goto done;
+
+			meta += geo->num_chk;
+			nchks -= geo->num_chk;
+		}
+	}
+done:
+	kfree(blks);
+	return ret;
 }
-EXPORT_SYMBOL(nvm_bb_tbl_fold);
 
-int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
-		       u8 *blks)
+int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr ppa,
+		       int nchks, struct nvm_chk_meta *meta)
 {
 	struct nvm_dev *dev = tgt_dev->parent;
 
 	nvm_ppa_tgt_to_dev(tgt_dev, &ppa, 1);
 
-	return dev->ops->get_bb_tbl(dev, ppa, blks);
+	if (dev->geo.version == NVM_OCSSD_SPEC_12)
+		return nvm_get_bb_meta(dev, (sector_t)ppa.ppa, nchks, meta);
+
+	return dev->ops->get_chk_meta(dev, (sector_t)ppa.ppa, nchks, meta);
+}
+EXPORT_SYMBOL_GPL(nvm_get_chunk_meta);
+
+int nvm_set_chunk_meta(struct nvm_tgt_dev *tgt_dev, struct ppa_addr *ppas,
+		       int nr_ppas, int type)
+{
+	struct nvm_dev *dev = tgt_dev->parent;
+	struct nvm_rq rqd;
+	int ret;
+
+	if (dev->geo.version == NVM_OCSSD_SPEC_20)
+		return 0;
+
+	if (nr_ppas > NVM_MAX_VLBA) {
+		pr_err("nvm: unable to update all blocks atomically\n");
+		return -EINVAL;
+	}
+
+	memset(&rqd, 0, sizeof(struct nvm_rq));
+
+	nvm_set_rqd_ppalist(tgt_dev, &rqd, ppas, nr_ppas);
+	nvm_rq_tgt_to_dev(tgt_dev, &rqd);
+
+	ret = dev->ops->set_bb_tbl(dev, &rqd.ppa_addr, rqd.nr_ppas, type);
+	nvm_free_rqd_ppalist(tgt_dev, &rqd);
+	if (ret)
+		return -EINVAL;
+
+	return 0;
 }
-EXPORT_SYMBOL(nvm_get_tgt_bb_tbl);
+EXPORT_SYMBOL_GPL(nvm_set_chunk_meta);
 
 static int nvm_core_init(struct nvm_dev *dev)
 {
diff --git a/drivers/lightnvm/pblk-core.c b/drivers/lightnvm/pblk-core.c
index 72de7456845b..e0b513d07e14 100644
--- a/drivers/lightnvm/pblk-core.c
+++ b/drivers/lightnvm/pblk-core.c
@@ -27,7 +27,7 @@ static void pblk_line_mark_bb(struct work_struct *work)
 	struct ppa_addr *ppa = line_ws->priv;
 	int ret;
 
-	ret = nvm_set_tgt_bb_tbl(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
+	ret = nvm_set_chunk_meta(dev, ppa, 1, NVM_BLK_T_GRWN_BAD);
 	if (ret) {
 		struct pblk_line *line;
 		int pos;
@@ -110,7 +110,7 @@ static void pblk_end_io_erase(struct nvm_rq *rqd)
  *
  * The caller is responsible for freeing the returned structure
  */
-struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk)
+struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
 	struct nvm_geo *geo = &dev->geo;
@@ -126,7 +126,7 @@ struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk)
 	if (!meta)
 		return ERR_PTR(-ENOMEM);
 
-	ret = nvm_get_chunk_meta(dev, meta, ppa, geo->all_chunks);
+	ret = nvm_get_chunk_meta(dev, ppa, geo->all_chunks, meta);
 	if (ret) {
 		kfree(meta);
 		return ERR_PTR(-EIO);
diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
index 039f62d05e84..53bd52114aee 100644
--- a/drivers/lightnvm/pblk-init.c
+++ b/drivers/lightnvm/pblk-init.c
@@ -540,67 +540,6 @@ static void pblk_lines_free(struct pblk *pblk)
 	kfree(pblk->lines);
 }
 
-static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun,
-			   u8 *blks, int nr_blks)
-{
-	struct ppa_addr ppa;
-	int ret;
-
-	ppa.ppa = 0;
-	ppa.g.ch = rlun->bppa.g.ch;
-	ppa.g.lun = rlun->bppa.g.lun;
-
-	ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
-	if (ret)
-		return ret;
-
-	nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
-	if (nr_blks < 0)
-		return -EIO;
-
-	return 0;
-}
-
-static void *pblk_bb_get_meta(struct pblk *pblk)
-{
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-	u8 *meta;
-	int i, nr_blks, blk_per_lun;
-	int ret;
-
-	blk_per_lun = geo->num_chk * geo->pln_mode;
-	nr_blks = blk_per_lun * geo->all_luns;
-
-	meta = kmalloc(nr_blks, GFP_KERNEL);
-	if (!meta)
-		return ERR_PTR(-ENOMEM);
-
-	for (i = 0; i < geo->all_luns; i++) {
-		struct pblk_lun *rlun = &pblk->luns[i];
-		u8 *meta_pos = meta + i * blk_per_lun;
-
-		ret = pblk_bb_get_tbl(dev, rlun, meta_pos, blk_per_lun);
-		if (ret) {
-			kfree(meta);
-			return ERR_PTR(-EIO);
-		}
-	}
-
-	return meta;
-}
-
-static void *pblk_chunk_get_meta(struct pblk *pblk)
-{
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-
-	if (geo->version == NVM_OCSSD_SPEC_12)
-		return pblk_bb_get_meta(pblk);
-	else
-		return pblk_chunk_get_info(pblk);
-}
-
 static int pblk_luns_init(struct pblk *pblk)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
@@ -699,51 +638,7 @@ static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
 	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
 }
 
-static int pblk_setup_line_meta_12(struct pblk *pblk, struct pblk_line *line,
-				   void *chunk_meta)
-{
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
-	struct pblk_line_meta *lm = &pblk->lm;
-	int i, chk_per_lun, nr_bad_chks = 0;
-
-	chk_per_lun = geo->num_chk * geo->pln_mode;
-
-	for (i = 0; i < lm->blk_per_line; i++) {
-		struct pblk_lun *rlun = &pblk->luns[i];
-		struct nvm_chk_meta *chunk;
-		int pos = pblk_ppa_to_pos(geo, rlun->bppa);
-		u8 *lun_bb_meta = chunk_meta + pos * chk_per_lun;
-
-		chunk = &line->chks[pos];
-
-		/*
-		 * In 1.2 spec. chunk state is not persisted by the device. Thus
-		 * some of the values are reset each time pblk is instantiated,
-		 * so we have to assume that the block is closed.
-		 */
-		if (lun_bb_meta[line->id] == NVM_BLK_T_FREE)
-			chunk->state =  NVM_CHK_ST_CLOSED;
-		else
-			chunk->state = NVM_CHK_ST_OFFLINE;
-
-		chunk->type = NVM_CHK_TP_W_SEQ;
-		chunk->wi = 0;
-		chunk->slba = -1;
-		chunk->cnlb = geo->clba;
-		chunk->wp = 0;
-
-		if (!(chunk->state & NVM_CHK_ST_OFFLINE))
-			continue;
-
-		set_bit(pos, line->blk_bitmap);
-		nr_bad_chks++;
-	}
-
-	return nr_bad_chks;
-}
-
-static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
+static int pblk_setup_line_meta_chk(struct pblk *pblk, struct pblk_line *line,
 				   struct nvm_chk_meta *meta)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
@@ -790,8 +685,6 @@ static int pblk_setup_line_meta_20(struct pblk *pblk, struct pblk_line *line,
 static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
 				 void *chunk_meta, int line_id)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
 	long nr_bad_chks, chk_in_line;
@@ -804,10 +697,7 @@ static long pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
 	line->vsc = &l_mg->vsc_list[line_id];
 	spin_lock_init(&line->lock);
 
-	if (geo->version == NVM_OCSSD_SPEC_12)
-		nr_bad_chks = pblk_setup_line_meta_12(pblk, line, chunk_meta);
-	else
-		nr_bad_chks = pblk_setup_line_meta_20(pblk, line, chunk_meta);
+	nr_bad_chks = pblk_setup_line_meta_chk(pblk, line, chunk_meta);
 
 	chk_in_line = lm->blk_per_line - nr_bad_chks;
 	if (nr_bad_chks < 0 || nr_bad_chks > lm->blk_per_line ||
@@ -1058,7 +948,7 @@ static int pblk_lines_init(struct pblk *pblk)
 	if (ret)
 		goto fail_free_meta;
 
-	chunk_meta = pblk_chunk_get_meta(pblk);
+	chunk_meta = pblk_get_chunk_meta(pblk);
 	if (IS_ERR(chunk_meta)) {
 		ret = PTR_ERR(chunk_meta);
 		goto fail_free_luns;
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 48b3035df3c4..579b4ea9716c 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -774,7 +774,7 @@ void pblk_set_sec_per_write(struct pblk *pblk, int sec_per_write);
 int pblk_setup_w_rec_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			struct pblk_c_ctx *c_ctx);
 void pblk_discard(struct pblk *pblk, struct bio *bio);
-struct nvm_chk_meta *pblk_chunk_get_info(struct pblk *pblk);
+struct nvm_chk_meta *pblk_get_chunk_meta(struct pblk *pblk);
 struct nvm_chk_meta *pblk_chunk_get_off(struct pblk *pblk,
 					      struct nvm_chk_meta *lp,
 					      struct ppa_addr ppa);
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index 1e4f97538838..e42af7771fe5 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -567,8 +567,8 @@ static int nvme_nvm_set_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr *ppas,
  * Expect the lba in device format
  */
 static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
-				 struct nvm_chk_meta *meta,
-				 sector_t slba, int nchks)
+				 sector_t slba, int nchks,
+				 struct nvm_chk_meta *meta)
 {
 	struct nvm_geo *geo = &ndev->geo;
 	struct nvme_ns *ns = ndev->q->queuedata;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index f4a84694e5e2..0106984400bc 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -86,8 +86,8 @@ struct nvm_chk_meta;
 typedef int (nvm_id_fn)(struct nvm_dev *);
 typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
 typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
-typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, struct nvm_chk_meta *,
-								sector_t, int);
+typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int,
+							struct nvm_chk_meta *);
 typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *);
 typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *);
@@ -532,18 +532,13 @@ extern struct nvm_dev *nvm_alloc_dev(int);
 extern int nvm_register(struct nvm_dev *);
 extern void nvm_unregister(struct nvm_dev *);
 
-
-extern int nvm_get_chunk_meta(struct nvm_tgt_dev *tgt_dev,
-			      struct nvm_chk_meta *meta, struct ppa_addr ppa,
-			      int nchks);
-
-extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
+extern int nvm_get_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr,
+			      int, struct nvm_chk_meta *);
+extern int nvm_set_chunk_meta(struct nvm_tgt_dev *, struct ppa_addr *,
 			      int, int);
 extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
 extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *);
 extern void nvm_end_io(struct nvm_rq *);
-extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
-extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
 
 #else /* CONFIG_NVM */
 struct nvm_dev_ops;
-- 
cgit v1.2.3


From 2cf99bbd106f89fc72f778e8ad9d5538f1ef939b Mon Sep 17 00:00:00 2001
From: Javier González <javier@javigon.com>
Date: Tue, 9 Oct 2018 13:11:41 +0200
Subject: lightnvm: pblk: add helpers for chunk addresses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement helpers to go from ppas to a chunk within a line and an
address within a chunk.

These helpers will be used on the patches adding trace support in pblk,
which will be sent in this window.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/pblk.h  | 19 +++++++++++++++++++
 include/linux/lightnvm.h | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index be67bbfa3d0a..f95fe75fef6e 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -1034,6 +1034,25 @@ static inline struct ppa_addr addr_to_gen_ppa(struct pblk *pblk, u64 paddr,
 	return ppa;
 }
 
+static inline struct nvm_chk_meta *pblk_dev_ppa_to_chunk(struct pblk *pblk,
+							struct ppa_addr p)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+	struct nvm_geo *geo = &dev->geo;
+	struct pblk_line *line = &pblk->lines[pblk_ppa_to_line(p)];
+	int pos = pblk_ppa_to_pos(geo, p);
+
+	return &line->chks[pos];
+}
+
+static inline u64 pblk_dev_ppa_to_chunk_addr(struct pblk *pblk,
+							struct ppa_addr p)
+{
+	struct nvm_tgt_dev *dev = pblk->dev;
+
+	return dev_to_chunk_addr(dev->parent, &pblk->addrf, p);
+}
+
 static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
 							struct ppa_addr p)
 {
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 0106984400bc..77743a02ec0d 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -487,6 +487,25 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_dev *dev,
 	return l;
 }
 
+static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf,
+				    struct ppa_addr p)
+{
+	struct nvm_geo *geo = &dev->geo;
+	u64 caddr;
+
+	if (geo->version == NVM_OCSSD_SPEC_12) {
+		struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)addrf;
+
+		caddr = (u64)p.g.pg << ppaf->pg_offset;
+		caddr |= (u64)p.g.pl << ppaf->pln_offset;
+		caddr |= (u64)p.g.sec << ppaf->sec_offset;
+	} else {
+		caddr = p.m.sec;
+	}
+
+	return caddr;
+}
+
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
-- 
cgit v1.2.3


From d68a9344041b6dd304ff382d0c7805869f09944f Mon Sep 17 00:00:00 2001
From: Hans Holmberg <hans.holmberg@cnexlabs.com>
Date: Tue, 9 Oct 2018 13:11:46 +0200
Subject: lightnvm: introduce nvm_rq_to_ppa_list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a number of places in the lightnvm subsystem where the user
iterates over the ppa list. Before iterating, the user must know if it
is a single or multiple LBAs due to vector commands using either the
nvm_rq ->ppa_addr or ->ppa_list fields on command submission, which
leads to open-coding the if/else statement.

Instead of having multiple if/else's, move it into a function that can
be called by its users.

A nice side effect of this cleanup is that this patch fixes up a
bunch of cases where we don't consider the single-ppa case in pblk.

Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/core.c          | 14 ++++----------
 drivers/lightnvm/pblk-map.c      | 10 ++++++----
 drivers/lightnvm/pblk-read.c     | 11 ++++-------
 drivers/lightnvm/pblk-recovery.c |  9 ++++++---
 drivers/lightnvm/pblk-write.c    | 18 ++++++++----------
 drivers/lightnvm/pblk.h          |  4 +---
 include/linux/lightnvm.h         |  5 +++++
 7 files changed, 34 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/core.c b/drivers/lightnvm/core.c
index 8df188e0767e..efb976a863d2 100644
--- a/drivers/lightnvm/core.c
+++ b/drivers/lightnvm/core.c
@@ -603,22 +603,16 @@ static void nvm_ppa_dev_to_tgt(struct nvm_tgt_dev *tgt_dev,
 
 static void nvm_rq_tgt_to_dev(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
-	if (rqd->nr_ppas == 1) {
-		nvm_ppa_tgt_to_dev(tgt_dev, &rqd->ppa_addr, 1);
-		return;
-	}
+	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 
-	nvm_ppa_tgt_to_dev(tgt_dev, rqd->ppa_list, rqd->nr_ppas);
+	nvm_ppa_tgt_to_dev(tgt_dev, ppa_list, rqd->nr_ppas);
 }
 
 static void nvm_rq_dev_to_tgt(struct nvm_tgt_dev *tgt_dev, struct nvm_rq *rqd)
 {
-	if (rqd->nr_ppas == 1) {
-		nvm_ppa_dev_to_tgt(tgt_dev, &rqd->ppa_addr, 1);
-		return;
-	}
+	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 
-	nvm_ppa_dev_to_tgt(tgt_dev, rqd->ppa_list, rqd->nr_ppas);
+	nvm_ppa_dev_to_tgt(tgt_dev, ppa_list, rqd->nr_ppas);
 }
 
 int nvm_register_tgt_type(struct nvm_tgt_type *tt)
diff --git a/drivers/lightnvm/pblk-map.c b/drivers/lightnvm/pblk-map.c
index 953ca31dda68..dc0efb852475 100644
--- a/drivers/lightnvm/pblk-map.c
+++ b/drivers/lightnvm/pblk-map.c
@@ -88,13 +88,14 @@ void pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
 		 unsigned int off)
 {
 	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 	unsigned int map_secs;
 	int min = pblk->min_write_pgs;
 	int i;
 
 	for (i = off; i < rqd->nr_ppas; i += min) {
 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
-		if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+		if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
 					lun_bitmap, &meta_list[i], map_secs)) {
 			bio_put(rqd->bio);
 			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
@@ -112,6 +113,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_sec_meta *meta_list = rqd->meta_list;
+	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 	struct pblk_line *e_line, *d_line;
 	unsigned int map_secs;
 	int min = pblk->min_write_pgs;
@@ -119,14 +121,14 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 
 	for (i = 0; i < rqd->nr_ppas; i += min) {
 		map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
-		if (pblk_map_page_data(pblk, sentry + i, &rqd->ppa_list[i],
+		if (pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
 					lun_bitmap, &meta_list[i], map_secs)) {
 			bio_put(rqd->bio);
 			pblk_free_rqd(pblk, rqd, PBLK_WRITE);
 			pblk_pipeline_stop(pblk);
 		}
 
-		erase_lun = pblk_ppa_to_pos(geo, rqd->ppa_list[i]);
+		erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
 
 		/* line can change after page map. We might also be writing the
 		 * last line.
@@ -141,7 +143,7 @@ void pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
 			set_bit(erase_lun, e_line->erase_bitmap);
 			atomic_dec(&e_line->left_eblks);
 
-			*erase_ppa = rqd->ppa_list[i];
+			*erase_ppa = ppa_list[i];
 			erase_ppa->a.blk = e_line->id;
 
 			spin_unlock(&e_line->lock);
diff --git a/drivers/lightnvm/pblk-read.c b/drivers/lightnvm/pblk-read.c
index 49744caaa300..d3ff8c3e9010 100644
--- a/drivers/lightnvm/pblk-read.c
+++ b/drivers/lightnvm/pblk-read.c
@@ -116,10 +116,9 @@ static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
 
 		if (lba != blba + i) {
 #ifdef CONFIG_NVM_PBLK_DEBUG
-			struct ppa_addr *p;
+			struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 
-			p = (nr_lbas == 1) ? &rqd->ppa_list[i] : &rqd->ppa_addr;
-			print_ppa(pblk, p, "seq", i);
+			print_ppa(pblk, &ppa_list[i], "seq", i);
 #endif
 			pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
 							lba, (u64)blba + i);
@@ -148,11 +147,9 @@ static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
 
 		if (lba != meta_lba) {
 #ifdef CONFIG_NVM_PBLK_DEBUG
-			struct ppa_addr *p;
-			int nr_ppas = rqd->nr_ppas;
+			struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 
-			p = (nr_ppas == 1) ? &rqd->ppa_list[j] : &rqd->ppa_addr;
-			print_ppa(pblk, p, "seq", j);
+			print_ppa(pblk, &ppa_list[j], "seq", j);
 #endif
 			pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
 								lba, meta_lba);
diff --git a/drivers/lightnvm/pblk-recovery.c b/drivers/lightnvm/pblk-recovery.c
index cf629ab016ba..3bd2b6b0a359 100644
--- a/drivers/lightnvm/pblk-recovery.c
+++ b/drivers/lightnvm/pblk-recovery.c
@@ -161,6 +161,8 @@ next_read_rq:
 	if (pblk_io_aligned(pblk, rq_ppas))
 		rqd->is_seq = 1;
 
+	ppa_list = nvm_rq_to_ppa_list(rqd);
+
 	for (i = 0; i < rqd->nr_ppas; ) {
 		struct ppa_addr ppa;
 		int pos;
@@ -175,7 +177,7 @@ next_read_rq:
 		}
 
 		for (j = 0; j < pblk->min_write_pgs; j++, i++, r_ptr_int++)
-			rqd->ppa_list[i] =
+			ppa_list[i] =
 				addr_to_gen_ppa(pblk, r_ptr_int, line->id);
 	}
 
@@ -202,7 +204,7 @@ next_read_rq:
 		if (lba == ADDR_EMPTY || lba > pblk->rl.nr_secs)
 			continue;
 
-		pblk_update_map(pblk, lba, rqd->ppa_list[i]);
+		pblk_update_map(pblk, lba, ppa_list[i]);
 	}
 
 	left_ppas -= rq_ppas;
@@ -221,10 +223,11 @@ static void pblk_recov_complete(struct kref *ref)
 
 static void pblk_end_io_recov(struct nvm_rq *rqd)
 {
+	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 	struct pblk_pad_rq *pad_rq = rqd->private;
 	struct pblk *pblk = pad_rq->pblk;
 
-	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+	pblk_up_page(pblk, ppa_list, rqd->nr_ppas);
 
 	pblk_free_rqd(pblk, rqd, PBLK_WRITE_INT);
 
diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 625ed5a3a020..c20bb7f6d703 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -208,15 +208,10 @@ static void pblk_submit_rec(struct work_struct *work)
 	struct pblk *pblk = recovery->pblk;
 	struct nvm_rq *rqd = recovery->rqd;
 	struct pblk_c_ctx *c_ctx = nvm_rq_to_pdu(rqd);
-	struct ppa_addr *ppa_list;
+	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 
 	pblk_log_write_err(pblk, rqd);
 
-	if (rqd->nr_ppas == 1)
-		ppa_list = &rqd->ppa_addr;
-	else
-		ppa_list = rqd->ppa_list;
-
 	pblk_map_remaining(pblk, ppa_list);
 	pblk_queue_resubmit(pblk, c_ctx);
 
@@ -273,9 +268,10 @@ static void pblk_end_io_write_meta(struct nvm_rq *rqd)
 	struct pblk_g_ctx *m_ctx = nvm_rq_to_pdu(rqd);
 	struct pblk_line *line = m_ctx->private;
 	struct pblk_emeta *emeta = line->emeta;
+	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 	int sync;
 
-	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+	pblk_up_page(pblk, ppa_list, rqd->nr_ppas);
 
 	if (rqd->error) {
 		pblk_log_write_err(pblk, rqd);
@@ -375,6 +371,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
 	struct pblk_line_meta *lm = &pblk->lm;
 	struct pblk_emeta *emeta = meta_line->emeta;
+	struct ppa_addr *ppa_list;
 	struct pblk_g_ctx *m_ctx;
 	struct bio *bio;
 	struct nvm_rq *rqd;
@@ -409,12 +406,13 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	if (ret)
 		goto fail_free_bio;
 
+	ppa_list = nvm_rq_to_ppa_list(rqd);
 	for (i = 0; i < rqd->nr_ppas; ) {
 		spin_lock(&meta_line->lock);
 		paddr = __pblk_alloc_page(pblk, meta_line, rq_ppas);
 		spin_unlock(&meta_line->lock);
 		for (j = 0; j < rq_ppas; j++, i++, paddr++)
-			rqd->ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
+			ppa_list[i] = addr_to_gen_ppa(pblk, paddr, id);
 	}
 
 	spin_lock(&l_mg->close_lock);
@@ -423,7 +421,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 		list_del(&meta_line->list);
 	spin_unlock(&l_mg->close_lock);
 
-	pblk_down_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+	pblk_down_page(pblk, ppa_list, rqd->nr_ppas);
 
 	ret = pblk_submit_io(pblk, rqd);
 	if (ret) {
@@ -434,7 +432,7 @@ int pblk_submit_meta_io(struct pblk *pblk, struct pblk_line *meta_line)
 	return NVM_IO_OK;
 
 fail_rollback:
-	pblk_up_page(pblk, rqd->ppa_list, rqd->nr_ppas);
+	pblk_up_page(pblk, ppa_list, rqd->nr_ppas);
 	spin_lock(&l_mg->close_lock);
 	pblk_dealloc_page(pblk, meta_line, rq_ppas);
 	list_add(&meta_line->list, &meta_line->list);
diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 1865ac1560fa..60c509a00574 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -1362,9 +1362,7 @@ static inline int pblk_boundary_ppa_checks(struct nvm_tgt_dev *tgt_dev,
 static inline int pblk_check_io(struct pblk *pblk, struct nvm_rq *rqd)
 {
 	struct nvm_tgt_dev *dev = pblk->dev;
-	struct ppa_addr *ppa_list;
-
-	ppa_list = (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
+	struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
 
 	if (pblk_boundary_ppa_checks(dev, ppa_list, rqd->nr_ppas)) {
 		WARN_ON(1);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 77743a02ec0d..50ac5b21297c 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -320,6 +320,11 @@ static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata)
 	return rqdata + 1;
 }
 
+static inline struct ppa_addr *nvm_rq_to_ppa_list(struct nvm_rq *rqd)
+{
+	return (rqd->nr_ppas > 1) ? rqd->ppa_list : &rqd->ppa_addr;
+}
+
 enum {
 	NVM_BLK_ST_FREE =	0x1,	/* Free block */
 	NVM_BLK_ST_TGT =	0x2,	/* Block in use by target */
-- 
cgit v1.2.3


From 7f985f9a691dc25ddcfc9dc7ff21199f5ee1569c Mon Sep 17 00:00:00 2001
From: Javier González <javier@javigon.com>
Date: Tue, 9 Oct 2018 13:11:56 +0200
Subject: lightnvm: move ppa transformations to core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Continuing the effort of moving 1.2 and 2.0 specific code to core, move
64_to_32 and 32_to_64 ppa helpers from pblk to core.

Signed-off-by: Javier González <javier@cnexlabs.com>
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/pblk.h  | 78 +++------------------------------------------
 include/linux/lightnvm.h | 83 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
index 7f4e46dfb0d7..a2cc581951ef 100644
--- a/drivers/lightnvm/pblk.h
+++ b/drivers/lightnvm/pblk.h
@@ -1102,86 +1102,16 @@ static inline u64 pblk_dev_ppa_to_line_addr(struct pblk *pblk,
 
 static inline struct ppa_addr pblk_ppa32_to_ppa64(struct pblk *pblk, u32 ppa32)
 {
-	struct ppa_addr ppa64;
-
-	ppa64.ppa = 0;
-
-	if (ppa32 == -1) {
-		ppa64.ppa = ADDR_EMPTY;
-	} else if (ppa32 & (1U << 31)) {
-		ppa64.c.line = ppa32 & ((~0U) >> 1);
-		ppa64.c.is_cached = 1;
-	} else {
-		struct nvm_tgt_dev *dev = pblk->dev;
-		struct nvm_geo *geo = &dev->geo;
-
-		if (geo->version == NVM_OCSSD_SPEC_12) {
-			struct nvm_addrf_12 *ppaf =
-					(struct nvm_addrf_12 *)&pblk->addrf;
-
-			ppa64.g.ch = (ppa32 & ppaf->ch_mask) >>
-							ppaf->ch_offset;
-			ppa64.g.lun = (ppa32 & ppaf->lun_mask) >>
-							ppaf->lun_offset;
-			ppa64.g.blk = (ppa32 & ppaf->blk_mask) >>
-							ppaf->blk_offset;
-			ppa64.g.pg = (ppa32 & ppaf->pg_mask) >>
-							ppaf->pg_offset;
-			ppa64.g.pl = (ppa32 & ppaf->pln_mask) >>
-							ppaf->pln_offset;
-			ppa64.g.sec = (ppa32 & ppaf->sec_mask) >>
-							ppaf->sec_offset;
-		} else {
-			struct nvm_addrf *lbaf = &pblk->addrf;
-
-			ppa64.m.grp = (ppa32 & lbaf->ch_mask) >>
-							lbaf->ch_offset;
-			ppa64.m.pu = (ppa32 & lbaf->lun_mask) >>
-							lbaf->lun_offset;
-			ppa64.m.chk = (ppa32 & lbaf->chk_mask) >>
-							lbaf->chk_offset;
-			ppa64.m.sec = (ppa32 & lbaf->sec_mask) >>
-							lbaf->sec_offset;
-		}
-	}
+	struct nvm_tgt_dev *dev = pblk->dev;
 
-	return ppa64;
+	return nvm_ppa32_to_ppa64(dev->parent, &pblk->addrf, ppa32);
 }
 
 static inline u32 pblk_ppa64_to_ppa32(struct pblk *pblk, struct ppa_addr ppa64)
 {
-	u32 ppa32 = 0;
-
-	if (ppa64.ppa == ADDR_EMPTY) {
-		ppa32 = ~0U;
-	} else if (ppa64.c.is_cached) {
-		ppa32 |= ppa64.c.line;
-		ppa32 |= 1U << 31;
-	} else {
-		struct nvm_tgt_dev *dev = pblk->dev;
-		struct nvm_geo *geo = &dev->geo;
-
-		if (geo->version == NVM_OCSSD_SPEC_12) {
-			struct nvm_addrf_12 *ppaf =
-					(struct nvm_addrf_12 *)&pblk->addrf;
-
-			ppa32 |= ppa64.g.ch << ppaf->ch_offset;
-			ppa32 |= ppa64.g.lun << ppaf->lun_offset;
-			ppa32 |= ppa64.g.blk << ppaf->blk_offset;
-			ppa32 |= ppa64.g.pg << ppaf->pg_offset;
-			ppa32 |= ppa64.g.pl << ppaf->pln_offset;
-			ppa32 |= ppa64.g.sec << ppaf->sec_offset;
-		} else {
-			struct nvm_addrf *lbaf = &pblk->addrf;
-
-			ppa32 |= ppa64.m.grp << lbaf->ch_offset;
-			ppa32 |= ppa64.m.pu << lbaf->lun_offset;
-			ppa32 |= ppa64.m.chk << lbaf->chk_offset;
-			ppa32 |= ppa64.m.sec << lbaf->sec_offset;
-		}
-	}
+	struct nvm_tgt_dev *dev = pblk->dev;
 
-	return ppa32;
+	return nvm_ppa64_to_ppa32(dev->parent, &pblk->addrf, ppa64);
 }
 
 static inline struct ppa_addr pblk_trans_map_get(struct pblk *pblk,
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 50ac5b21297c..eb7300c20f24 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -511,6 +511,89 @@ static inline u64 dev_to_chunk_addr(struct nvm_dev *dev, void *addrf,
 	return caddr;
 }
 
+static inline struct ppa_addr nvm_ppa32_to_ppa64(struct nvm_dev *dev,
+						 void *addrf, u32 ppa32)
+{
+	struct ppa_addr ppa64;
+
+	ppa64.ppa = 0;
+
+	if (ppa32 == -1) {
+		ppa64.ppa = ADDR_EMPTY;
+	} else if (ppa32 & (1U << 31)) {
+		ppa64.c.line = ppa32 & ((~0U) >> 1);
+		ppa64.c.is_cached = 1;
+	} else {
+		struct nvm_geo *geo = &dev->geo;
+
+		if (geo->version == NVM_OCSSD_SPEC_12) {
+			struct nvm_addrf_12 *ppaf = addrf;
+
+			ppa64.g.ch = (ppa32 & ppaf->ch_mask) >>
+							ppaf->ch_offset;
+			ppa64.g.lun = (ppa32 & ppaf->lun_mask) >>
+							ppaf->lun_offset;
+			ppa64.g.blk = (ppa32 & ppaf->blk_mask) >>
+							ppaf->blk_offset;
+			ppa64.g.pg = (ppa32 & ppaf->pg_mask) >>
+							ppaf->pg_offset;
+			ppa64.g.pl = (ppa32 & ppaf->pln_mask) >>
+							ppaf->pln_offset;
+			ppa64.g.sec = (ppa32 & ppaf->sec_mask) >>
+							ppaf->sec_offset;
+		} else {
+			struct nvm_addrf *lbaf = addrf;
+
+			ppa64.m.grp = (ppa32 & lbaf->ch_mask) >>
+							lbaf->ch_offset;
+			ppa64.m.pu = (ppa32 & lbaf->lun_mask) >>
+							lbaf->lun_offset;
+			ppa64.m.chk = (ppa32 & lbaf->chk_mask) >>
+							lbaf->chk_offset;
+			ppa64.m.sec = (ppa32 & lbaf->sec_mask) >>
+							lbaf->sec_offset;
+		}
+	}
+
+	return ppa64;
+}
+
+static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev,
+				     void *addrf, struct ppa_addr ppa64)
+{
+	u32 ppa32 = 0;
+
+	if (ppa64.ppa == ADDR_EMPTY) {
+		ppa32 = ~0U;
+	} else if (ppa64.c.is_cached) {
+		ppa32 |= ppa64.c.line;
+		ppa32 |= 1U << 31;
+	} else {
+		struct nvm_geo *geo = &dev->geo;
+
+		if (geo->version == NVM_OCSSD_SPEC_12) {
+			struct nvm_addrf_12 *ppaf = addrf;
+
+			ppa32 |= ppa64.g.ch << ppaf->ch_offset;
+			ppa32 |= ppa64.g.lun << ppaf->lun_offset;
+			ppa32 |= ppa64.g.blk << ppaf->blk_offset;
+			ppa32 |= ppa64.g.pg << ppaf->pg_offset;
+			ppa32 |= ppa64.g.pl << ppaf->pln_offset;
+			ppa32 |= ppa64.g.sec << ppaf->sec_offset;
+		} else {
+			struct nvm_addrf *lbaf = addrf;
+
+			ppa32 |= ppa64.m.grp << lbaf->ch_offset;
+			ppa32 |= ppa64.m.pu << lbaf->lun_offset;
+			ppa32 |= ppa64.m.chk << lbaf->chk_offset;
+			ppa32 |= ppa64.m.sec << lbaf->sec_offset;
+		}
+	}
+
+	return ppa32;
+}
+
+
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
 typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
-- 
cgit v1.2.3


From bf82fa2f584f1c6e12df8c04fca715d53e7f32d5 Mon Sep 17 00:00:00 2001
From: Hans Holmberg <hans.holmberg@cnexlabs.com>
Date: Tue, 9 Oct 2018 13:11:59 +0200
Subject: lightnvm: pblk: fix mapping issue on failed writes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On 1.2-devices, the mapping-out of remaning sectors in the
failed-write's block can result in an infinite loop,
stalling the write pipeline, fix this.

Fixes: 6a3abf5beef6 ("lightnvm: pblk: rework write error recovery path")
Signed-off-by: Hans Holmberg <hans.holmberg@cnexlabs.com>
Signed-off-by: Matias Bjørling <mb@lightnvm.io>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/lightnvm/pblk-write.c | 12 +-----------
 include/linux/lightnvm.h      | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lightnvm/pblk-write.c b/drivers/lightnvm/pblk-write.c
index 61fe22ccc7a1..9554febee480 100644
--- a/drivers/lightnvm/pblk-write.c
+++ b/drivers/lightnvm/pblk-write.c
@@ -106,8 +106,6 @@ retry:
 /* Map remaining sectors in chunk, starting from ppa */
 static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
 {
-	struct nvm_tgt_dev *dev = pblk->dev;
-	struct nvm_geo *geo = &dev->geo;
 	struct pblk_line *line;
 	struct ppa_addr map_ppa = *ppa;
 	u64 paddr;
@@ -125,15 +123,7 @@ static void pblk_map_remaining(struct pblk *pblk, struct ppa_addr *ppa)
 		if (!test_and_set_bit(paddr, line->invalid_bitmap))
 			le32_add_cpu(line->vsc, -1);
 
-		if (geo->version == NVM_OCSSD_SPEC_12) {
-			map_ppa.ppa++;
-			if (map_ppa.g.pg == geo->num_pg)
-				done = 1;
-		} else {
-			map_ppa.m.sec++;
-			if (map_ppa.m.sec == geo->clba)
-				done = 1;
-		}
+		done = nvm_next_ppa_in_chk(pblk->dev, &map_ppa);
 	}
 
 	line->w_err_gc->has_write_err = 1;
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index eb7300c20f24..2fdeac1a420d 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -593,6 +593,42 @@ static inline u32 nvm_ppa64_to_ppa32(struct nvm_dev *dev,
 	return ppa32;
 }
 
+static inline int nvm_next_ppa_in_chk(struct nvm_tgt_dev *dev,
+				      struct ppa_addr *ppa)
+{
+	struct nvm_geo *geo = &dev->geo;
+	int last = 0;
+
+	if (geo->version == NVM_OCSSD_SPEC_12) {
+		int sec = ppa->g.sec;
+
+		sec++;
+		if (sec == geo->ws_min) {
+			int pg = ppa->g.pg;
+
+			sec = 0;
+			pg++;
+			if (pg == geo->num_pg) {
+				int pl = ppa->g.pl;
+
+				pg = 0;
+				pl++;
+				if (pl == geo->num_pln)
+					last = 1;
+
+				ppa->g.pl = pl;
+			}
+			ppa->g.pg = pg;
+		}
+		ppa->g.sec = sec;
+	} else {
+		ppa->m.sec++;
+		if (ppa->m.sec == geo->clba)
+			last = 1;
+	}
+
+	return last;
+}
 
 typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
 typedef sector_t (nvm_tgt_capacity_fn)(void *);
-- 
cgit v1.2.3


From 7d49b28a80b830c3ca876d33bedc58d62a78e16f Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@surriel.com>
Date: Tue, 25 Sep 2018 23:58:41 -0400
Subject: smp,cpumask: introduce on_each_cpu_cond_mask

Introduce a variant of on_each_cpu_cond that iterates only over the
CPUs in a cpumask, in order to avoid making callbacks for every single
CPU in the system when we only need to test a subset.

Cc: npiggin@gmail.com
Cc: mingo@kernel.org
Cc: will.deacon@arm.com
Cc: songliubraving@fb.com
Cc: kernel-team@fb.com
Cc: hpa@zytor.com
Cc: luto@kernel.org
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180926035844.1420-5-riel@surriel.com
---
 include/linux/smp.h |  4 ++++
 kernel/smp.c        | 17 +++++++++++++----
 kernel/up.c         | 14 +++++++++++---
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 9fb239e12b82..a56f08ff3097 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -53,6 +53,10 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 		smp_call_func_t func, void *info, bool wait,
 		gfp_t gfp_flags);
 
+void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
+		smp_call_func_t func, void *info, bool wait,
+		gfp_t gfp_flags, const struct cpumask *mask);
+
 int smp_call_function_single_async(int cpu, call_single_data_t *csd);
 
 #ifdef CONFIG_SMP
diff --git a/kernel/smp.c b/kernel/smp.c
index a7d4f9f50a49..163c451af42e 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -669,9 +669,9 @@ EXPORT_SYMBOL(on_each_cpu_mask);
  * You must not call this function with disabled interrupts or
  * from a hardware interrupt handler or from a bottom half handler.
  */
-void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
 			smp_call_func_t func, void *info, bool wait,
-			gfp_t gfp_flags)
+			gfp_t gfp_flags, const struct cpumask *mask)
 {
 	cpumask_var_t cpus;
 	int cpu, ret;
@@ -680,7 +680,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 
 	if (likely(zalloc_cpumask_var(&cpus, (gfp_flags|__GFP_NOWARN)))) {
 		preempt_disable();
-		for_each_online_cpu(cpu)
+		for_each_cpu(cpu, mask)
 			if (cond_func(cpu, info))
 				__cpumask_set_cpu(cpu, cpus);
 		on_each_cpu_mask(cpus, func, info, wait);
@@ -692,7 +692,7 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 		 * just have to IPI them one by one.
 		 */
 		preempt_disable();
-		for_each_online_cpu(cpu)
+		for_each_cpu(cpu, mask)
 			if (cond_func(cpu, info)) {
 				ret = smp_call_function_single(cpu, func,
 								info, wait);
@@ -701,6 +701,15 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 		preempt_enable();
 	}
 }
+EXPORT_SYMBOL(on_each_cpu_cond_mask);
+
+void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+			smp_call_func_t func, void *info, bool wait,
+			gfp_t gfp_flags)
+{
+	on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags,
+				cpu_online_mask);
+}
 EXPORT_SYMBOL(on_each_cpu_cond);
 
 static void do_nothing(void *unused)
diff --git a/kernel/up.c b/kernel/up.c
index 42c46bf3e0a5..ff536f9cc8a2 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -68,9 +68,9 @@ EXPORT_SYMBOL(on_each_cpu_mask);
  * Preemption is disabled here to make sure the cond_func is called under the
  * same condtions in UP and SMP.
  */
-void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
-		      smp_call_func_t func, void *info, bool wait,
-		      gfp_t gfp_flags)
+void on_each_cpu_cond_mask(bool (*cond_func)(int cpu, void *info),
+			   smp_call_func_t func, void *info, bool wait,
+			   gfp_t gfp_flags, const struct cpumask *mask)
 {
 	unsigned long flags;
 
@@ -82,6 +82,14 @@ void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
 	}
 	preempt_enable();
 }
+EXPORT_SYMBOL(on_each_cpu_cond_mask);
+
+void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+		      smp_call_func_t func, void *info, bool wait,
+		      gfp_t gfp_flags)
+{
+	on_each_cpu_cond_mask(cond_func, func, info, wait, gfp_flags, NULL);
+}
 EXPORT_SYMBOL(on_each_cpu_cond);
 
 int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
-- 
cgit v1.2.3


From f2e74abfaad446765ce0350aed1d9c5eed5b1b36 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:38 +0200
Subject: remoteproc: add release ops in rproc_mem_entry struct

Memory entry could be allocated in different ways (ioremap,
dma_alloc_coherent, internal RAM allocator...).
This patch introduces a release ops in rproc_mem_entry structure
to associate dedicated release mechanism to each memory entry descriptor
in order to keep remoteproc core generic.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 23 +++++++++++++++++++++--
 include/linux/remoteproc.h           |  5 ++++-
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index ebadaad070a5..674f88d237b8 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -599,6 +599,24 @@ out:
 	return ret;
 }
 
+/**
+ * rproc_release_carveout() - release acquired carveout
+ * @rproc: rproc handle
+ * @mem: the memory entry to release
+ *
+ * This function releases specified memory entry @mem allocated via
+ * dma_alloc_coherent() function by @rproc.
+ */
+static int rproc_release_carveout(struct rproc *rproc,
+				  struct rproc_mem_entry *mem)
+{
+	struct device *dev = &rproc->dev;
+
+	/* clean up carveout allocations */
+	dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma);
+	return 0;
+}
+
 /**
  * rproc_handle_carveout() - handle phys contig memory allocation requests
  * @rproc: rproc handle
@@ -733,6 +751,7 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	carveout->len = rsc->len;
 	carveout->dma = dma;
 	carveout->da = rsc->da;
+	carveout->release = rproc_release_carveout;
 
 	list_add_tail(&carveout->node, &rproc->carveouts);
 
@@ -920,8 +939,8 @@ static void rproc_resource_cleanup(struct rproc *rproc)
 
 	/* clean up carveout allocations */
 	list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) {
-		dma_free_coherent(dev->parent, entry->len, entry->va,
-				  entry->dma);
+		if (entry->release)
+			entry->release(rproc, entry);
 		list_del(&entry->node);
 		kfree(entry);
 	}
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 75f9ca05b865..4116e92d4b3d 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -305,12 +305,15 @@ struct fw_rsc_vdev {
 	struct fw_rsc_vdev_vring vring[0];
 } __packed;
 
+struct rproc;
+
 /**
  * struct rproc_mem_entry - memory entry descriptor
  * @va:	virtual address
  * @dma: dma address
  * @len: length, in bytes
  * @da: device address
+ * @release: release associated memory
  * @priv: associated data
  * @node: list node
  */
@@ -321,9 +324,9 @@ struct rproc_mem_entry {
 	u32 da;
 	void *priv;
 	struct list_head node;
+	int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
 };
 
-struct rproc;
 struct firmware;
 
 /**
-- 
cgit v1.2.3


From 3265230c5b05fe919291d09e266a8aedc85ebad0 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:39 +0200
Subject: remoteproc: add name in rproc_mem_entry struct

Add name field in struct rproc_mem_entry.
This new field will be used to match memory area
requested in resource table with pre-registered carveout.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c    | 1 +
 drivers/remoteproc/remoteproc_debugfs.c | 1 +
 include/linux/remoteproc.h              | 2 ++
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 674f88d237b8..a2d338fc8ddd 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -752,6 +752,7 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	carveout->dma = dma;
 	carveout->da = rsc->da;
 	carveout->release = rproc_release_carveout;
+	strlcpy(carveout->name, rsc->name, sizeof(carveout->name));
 
 	list_add_tail(&carveout->node, &rproc->carveouts);
 
diff --git a/drivers/remoteproc/remoteproc_debugfs.c b/drivers/remoteproc/remoteproc_debugfs.c
index a5c29f2764a3..e90135c64af0 100644
--- a/drivers/remoteproc/remoteproc_debugfs.c
+++ b/drivers/remoteproc/remoteproc_debugfs.c
@@ -260,6 +260,7 @@ static int rproc_carveouts_show(struct seq_file *seq, void *p)
 
 	list_for_each_entry(carveout, &rproc->carveouts, node) {
 		seq_puts(seq, "Carveout memory entry:\n");
+		seq_printf(seq, "\tName: %s\n", carveout->name);
 		seq_printf(seq, "\tVirtual address: %pK\n", carveout->va);
 		seq_printf(seq, "\tDMA address: %pad\n", &carveout->dma);
 		seq_printf(seq, "\tDevice address: 0x%x\n", carveout->da);
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 4116e92d4b3d..19908c930f47 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -315,6 +315,7 @@ struct rproc;
  * @da: device address
  * @release: release associated memory
  * @priv: associated data
+ * @name: associated memory region name (optional)
  * @node: list node
  */
 struct rproc_mem_entry {
@@ -323,6 +324,7 @@ struct rproc_mem_entry {
 	int len;
 	u32 da;
 	void *priv;
+	char name[32];
 	struct list_head node;
 	int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
 };
-- 
cgit v1.2.3


From 72029c901a0244ca2e1eb09e1c453413a17f5787 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:40 +0200
Subject: remoteproc: add helper function to allocate and init rproc_mem_entry
 struct

This patch introduces rproc_mem_entry_init helper function to
simplify rproc_mem_entry structure allocation and filling by
client.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 65 +++++++++++++++++++++++++++---------
 include/linux/remoteproc.h           |  6 ++++
 2 files changed, 55 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index a2d338fc8ddd..9decc598944d 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -639,7 +639,7 @@ static int rproc_handle_carveout(struct rproc *rproc,
 				 struct fw_rsc_carveout *rsc,
 				 int offset, int avail)
 {
-	struct rproc_mem_entry *carveout, *mapping;
+	struct rproc_mem_entry *carveout, *mapping = NULL;
 	struct device *dev = &rproc->dev;
 	dma_addr_t dma;
 	void *va;
@@ -659,16 +659,11 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 0x%x\n",
 		rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags);
 
-	carveout = kzalloc(sizeof(*carveout), GFP_KERNEL);
-	if (!carveout)
-		return -ENOMEM;
-
 	va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL);
 	if (!va) {
 		dev_err(dev->parent,
 			"failed to allocate dma memory: len 0x%x\n", rsc->len);
-		ret = -ENOMEM;
-		goto free_carv;
+		return -ENOMEM;
 	}
 
 	dev_dbg(dev, "carveout va %pK, dma %pad, len 0x%x\n",
@@ -747,27 +742,65 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	 */
 	rsc->pa = (u32)rproc_va_to_pa(va);
 
-	carveout->va = va;
-	carveout->len = rsc->len;
-	carveout->dma = dma;
-	carveout->da = rsc->da;
-	carveout->release = rproc_release_carveout;
-	strlcpy(carveout->name, rsc->name, sizeof(carveout->name));
+	carveout = rproc_mem_entry_init(dev, va, dma, rsc->len, rsc->da,
+					rproc_release_carveout, rsc->name);
+	if (!carveout)
+		goto free_carv;
 
 	list_add_tail(&carveout->node, &rproc->carveouts);
 
 	return 0;
 
+free_carv:
+	kfree(carveout);
 free_mapping:
 	kfree(mapping);
 dma_free:
 	dma_free_coherent(dev->parent, rsc->len, va, dma);
-free_carv:
-	kfree(carveout);
 	return ret;
 }
 
-/*
+/**
+ * rproc_mem_entry_init() - allocate and initialize rproc_mem_entry struct
+ * @dev: pointer on device struct
+ * @va: virtual address
+ * @dma: dma address
+ * @len: memory carveout length
+ * @da: device address
+ * @release: memory carveout function
+ * @name: carveout name
+ *
+ * This function allocates a rproc_mem_entry struct and fill it with parameters
+ * provided by client.
+ */
+struct rproc_mem_entry *
+rproc_mem_entry_init(struct device *dev,
+		     void *va, dma_addr_t dma, int len, u32 da,
+		     int (*release)(struct rproc *, struct rproc_mem_entry *),
+		     const char *name, ...)
+{
+	struct rproc_mem_entry *mem;
+	va_list args;
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return mem;
+
+	mem->va = va;
+	mem->dma = dma;
+	mem->da = da;
+	mem->len = len;
+	mem->release = release;
+
+	va_start(args, name);
+	vsnprintf(mem->name, sizeof(mem->name), name, args);
+	va_end(args);
+
+	return mem;
+}
+EXPORT_SYMBOL(rproc_mem_entry_init);
+
+/**
  * A lookup table for resource handlers. The indices are defined in
  * enum fw_resource_type.
  */
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 19908c930f47..9e2b84fa2efa 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -559,6 +559,12 @@ int rproc_add(struct rproc *rproc);
 int rproc_del(struct rproc *rproc);
 void rproc_free(struct rproc *rproc);
 
+struct rproc_mem_entry *
+rproc_mem_entry_init(struct device *dev,
+		     void *va, dma_addr_t dma, int len, u32 da,
+		     int (*release)(struct rproc *, struct rproc_mem_entry *),
+		     const char *name, ...);
+
 int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
-- 
cgit v1.2.3


From 15c0b0258e4f1c3c817f34d092d2cc6ff5178bdd Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:41 +0200
Subject: remoteproc: introduce rproc_add_carveout function

This patch introduces a new API to allow platform driver to register
platform specific carveout regions.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Acked-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 16 +++++++++++++++-
 include/linux/remoteproc.h           |  2 ++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 9decc598944d..db771e53f097 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -747,7 +747,7 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	if (!carveout)
 		goto free_carv;
 
-	list_add_tail(&carveout->node, &rproc->carveouts);
+	rproc_add_carveout(rproc, carveout);
 
 	return 0;
 
@@ -760,6 +760,20 @@ dma_free:
 	return ret;
 }
 
+/**
+ * rproc_add_carveout() - register an allocated carveout region
+ * @rproc: rproc handle
+ * @mem: memory entry to register
+ *
+ * This function registers specified memory entry in @rproc carveouts list.
+ * Specified carveout should have been allocated before registering.
+ */
+void rproc_add_carveout(struct rproc *rproc, struct rproc_mem_entry *mem)
+{
+	list_add_tail(&mem->node, &rproc->carveouts);
+}
+EXPORT_SYMBOL(rproc_add_carveout);
+
 /**
  * rproc_mem_entry_init() - allocate and initialize rproc_mem_entry struct
  * @dev: pointer on device struct
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 9e2b84fa2efa..8a350265d883 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -559,6 +559,8 @@ int rproc_add(struct rproc *rproc);
 int rproc_del(struct rproc *rproc);
 void rproc_free(struct rproc *rproc);
 
+void rproc_add_carveout(struct rproc *rproc, struct rproc_mem_entry *mem);
+
 struct rproc_mem_entry *
 rproc_mem_entry_init(struct device *dev,
 		     void *va, dma_addr_t dma, int len, u32 da,
-- 
cgit v1.2.3


From d7c51706d0956472b7c0530b1bf8fba32d82ee6b Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:43 +0200
Subject: remoteproc: add alloc ops in rproc_mem_entry struct

Memory entry could be allocated in different ways (ioremap,
dma_alloc_coherent, internal RAM allocator...).
This patch introduces an alloc ops in rproc_mem_entry structure
to associate dedicated allocation mechanism to each memory entry
descriptor in order to do remote core agnostic from memory allocators.

The introduction of this ops allows to perform allocation of all registered
carveout at the same time, just before calling rproc_start().
It simplifies and makes uniform carveout management whatever origin.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 261 ++++++++++++++++++++++-------------
 include/linux/remoteproc.h           |   7 +
 2 files changed, 175 insertions(+), 93 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 800320d06cb8..9d17b3079506 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -642,74 +642,31 @@ out:
 }
 
 /**
- * rproc_release_carveout() - release acquired carveout
+ * rproc_alloc_carveout() - allocated specified carveout
  * @rproc: rproc handle
- * @mem: the memory entry to release
- *
- * This function releases specified memory entry @mem allocated via
- * dma_alloc_coherent() function by @rproc.
- */
-static int rproc_release_carveout(struct rproc *rproc,
-				  struct rproc_mem_entry *mem)
-{
-	struct device *dev = &rproc->dev;
-
-	/* clean up carveout allocations */
-	dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma);
-	return 0;
-}
-
-/**
- * rproc_handle_carveout() - handle phys contig memory allocation requests
- * @rproc: rproc handle
- * @rsc: the resource entry
- * @avail: size of available data (for image validation)
- *
- * This function will handle firmware requests for allocation of physically
- * contiguous memory regions.
- *
- * These request entries should come first in the firmware's resource table,
- * as other firmware entries might request placing other data objects inside
- * these memory regions (e.g. data/code segments, trace resource entries, ...).
+ * @mem: the memory entry to allocate
  *
- * Allocating memory this way helps utilizing the reserved physical memory
- * (e.g. CMA) more efficiently, and also minimizes the number of TLB entries
- * needed to map it (in case @rproc is using an IOMMU). Reducing the TLB
- * pressure is important; it may have a substantial impact on performance.
+ * This function allocate specified memory entry @mem using
+ * dma_alloc_coherent() as default allocator
  */
-static int rproc_handle_carveout(struct rproc *rproc,
-				 struct fw_rsc_carveout *rsc,
-				 int offset, int avail)
+static int rproc_alloc_carveout(struct rproc *rproc,
+				struct rproc_mem_entry *mem)
 {
-	struct rproc_mem_entry *carveout, *mapping = NULL;
+	struct rproc_mem_entry *mapping = NULL;
 	struct device *dev = &rproc->dev;
 	dma_addr_t dma;
 	void *va;
 	int ret;
 
-	if (sizeof(*rsc) > avail) {
-		dev_err(dev, "carveout rsc is truncated\n");
-		return -EINVAL;
-	}
-
-	/* make sure reserved bytes are zeroes */
-	if (rsc->reserved) {
-		dev_err(dev, "carveout rsc has non zero reserved bytes\n");
-		return -EINVAL;
-	}
-
-	dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 0x%x\n",
-		rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags);
-
-	va = dma_alloc_coherent(dev->parent, rsc->len, &dma, GFP_KERNEL);
+	va = dma_alloc_coherent(dev->parent, mem->len, &dma, GFP_KERNEL);
 	if (!va) {
 		dev_err(dev->parent,
-			"failed to allocate dma memory: len 0x%x\n", rsc->len);
+			"failed to allocate dma memory: len 0x%x\n", mem->len);
 		return -ENOMEM;
 	}
 
 	dev_dbg(dev, "carveout va %pK, dma %pad, len 0x%x\n",
-		va, &dma, rsc->len);
+		va, &dma, mem->len);
 
 	/*
 	 * Ok, this is non-standard.
@@ -729,22 +686,22 @@ static int rproc_handle_carveout(struct rproc *rproc,
 	 * physical address in this case.
 	 */
 
-	if (rsc->da != FW_RSC_ADDR_ANY && !rproc->domain) {
-		dev_err(dev->parent,
-			"Bad carveout rsc configuration\n");
-		ret = -ENOMEM;
-		goto dma_free;
-	}
+	if (mem->da != FW_RSC_ADDR_ANY) {
+		if (!rproc->domain) {
+			dev_err(dev->parent,
+				"Bad carveout rsc configuration\n");
+			ret = -ENOMEM;
+			goto dma_free;
+		}
 
-	if (rsc->da != FW_RSC_ADDR_ANY && rproc->domain) {
 		mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
 		if (!mapping) {
 			ret = -ENOMEM;
 			goto dma_free;
 		}
 
-		ret = iommu_map(rproc->domain, rsc->da, dma, rsc->len,
-				rsc->flags);
+		ret = iommu_map(rproc->domain, mem->da, dma, mem->len,
+				mem->flags);
 		if (ret) {
 			dev_err(dev, "iommu_map failed: %d\n", ret);
 			goto free_mapping;
@@ -757,51 +714,101 @@ static int rproc_handle_carveout(struct rproc *rproc,
 		 * We can't trust the remote processor not to change the
 		 * resource table, so we must maintain this info independently.
 		 */
-		mapping->da = rsc->da;
-		mapping->len = rsc->len;
+		mapping->da = mem->da;
+		mapping->len = mem->len;
 		list_add_tail(&mapping->node, &rproc->mappings);
 
 		dev_dbg(dev, "carveout mapped 0x%x to %pad\n",
-			rsc->da, &dma);
+			mem->da, &dma);
+	} else {
+		mem->da = (u32)dma;
 	}
 
-	/*
-	 * Some remote processors might need to know the pa
-	 * even though they are behind an IOMMU. E.g., OMAP4's
-	 * remote M3 processor needs this so it can control
-	 * on-chip hardware accelerators that are not behind
-	 * the IOMMU, and therefor must know the pa.
-	 *
-	 * Generally we don't want to expose physical addresses
-	 * if we don't have to (remote processors are generally
-	 * _not_ trusted), so we might want to do this only for
-	 * remote processor that _must_ have this (e.g. OMAP4's
-	 * dual M3 subsystem).
-	 *
-	 * Non-IOMMU processors might also want to have this info.
-	 * In this case, the device address and the physical address
-	 * are the same.
-	 */
-	rsc->pa = (u32)rproc_va_to_pa(va);
-
-	carveout = rproc_mem_entry_init(dev, va, dma, rsc->len, rsc->da,
-					rproc_release_carveout, rsc->name);
-	if (!carveout)
-		goto free_carv;
-
-	rproc_add_carveout(rproc, carveout);
+	mem->dma = (u32)dma;
+	mem->va = va;
 
 	return 0;
 
-free_carv:
-	kfree(carveout);
 free_mapping:
 	kfree(mapping);
 dma_free:
-	dma_free_coherent(dev->parent, rsc->len, va, dma);
+	dma_free_coherent(dev->parent, mem->len, va, dma);
 	return ret;
 }
 
+/**
+ * rproc_release_carveout() - release acquired carveout
+ * @rproc: rproc handle
+ * @mem: the memory entry to release
+ *
+ * This function releases specified memory entry @mem allocated via
+ * rproc_alloc_carveout() function by @rproc.
+ */
+static int rproc_release_carveout(struct rproc *rproc,
+				  struct rproc_mem_entry *mem)
+{
+	struct device *dev = &rproc->dev;
+
+	/* clean up carveout allocations */
+	dma_free_coherent(dev->parent, mem->len, mem->va, mem->dma);
+	return 0;
+}
+
+/**
+ * rproc_handle_carveout() - handle phys contig memory allocation requests
+ * @rproc: rproc handle
+ * @rsc: the resource entry
+ * @avail: size of available data (for image validation)
+ *
+ * This function will handle firmware requests for allocation of physically
+ * contiguous memory regions.
+ *
+ * These request entries should come first in the firmware's resource table,
+ * as other firmware entries might request placing other data objects inside
+ * these memory regions (e.g. data/code segments, trace resource entries, ...).
+ *
+ * Allocating memory this way helps utilizing the reserved physical memory
+ * (e.g. CMA) more efficiently, and also minimizes the number of TLB entries
+ * needed to map it (in case @rproc is using an IOMMU). Reducing the TLB
+ * pressure is important; it may have a substantial impact on performance.
+ */
+static int rproc_handle_carveout(struct rproc *rproc,
+				 struct fw_rsc_carveout *rsc,
+				 int offset, int avail)
+{
+	struct rproc_mem_entry *carveout;
+	struct device *dev = &rproc->dev;
+
+	if (sizeof(*rsc) > avail) {
+		dev_err(dev, "carveout rsc is truncated\n");
+		return -EINVAL;
+	}
+
+	/* make sure reserved bytes are zeroes */
+	if (rsc->reserved) {
+		dev_err(dev, "carveout rsc has non zero reserved bytes\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "carveout rsc: name: %s, da 0x%x, pa 0x%x, len 0x%x, flags 0x%x\n",
+		rsc->name, rsc->da, rsc->pa, rsc->len, rsc->flags);
+
+	/* Register carveout in in list */
+	carveout = rproc_mem_entry_init(dev, 0, 0, rsc->len, rsc->da,
+					rproc_alloc_carveout,
+					rproc_release_carveout, rsc->name);
+	if (!carveout) {
+		dev_err(dev, "Can't allocate memory entry structure\n");
+		return -ENOMEM;
+	}
+
+	carveout->flags = rsc->flags;
+	carveout->rsc_offset = offset;
+	rproc_add_carveout(rproc, carveout);
+
+	return 0;
+}
+
 /**
  * rproc_add_carveout() - register an allocated carveout region
  * @rproc: rproc handle
@@ -832,6 +839,7 @@ EXPORT_SYMBOL(rproc_add_carveout);
 struct rproc_mem_entry *
 rproc_mem_entry_init(struct device *dev,
 		     void *va, dma_addr_t dma, int len, u32 da,
+		     int (*alloc)(struct rproc *, struct rproc_mem_entry *),
 		     int (*release)(struct rproc *, struct rproc_mem_entry *),
 		     const char *name, ...)
 {
@@ -846,7 +854,9 @@ rproc_mem_entry_init(struct device *dev,
 	mem->dma = dma;
 	mem->da = da;
 	mem->len = len;
+	mem->alloc = alloc;
 	mem->release = release;
+	mem->rsc_offset = FW_RSC_ADDR_ANY;
 
 	va_start(args, name);
 	vsnprintf(mem->name, sizeof(mem->name), name, args);
@@ -977,6 +987,63 @@ static void rproc_unprepare_subdevices(struct rproc *rproc)
 	}
 }
 
+/**
+ * rproc_alloc_registered_carveouts() - allocate all carveouts registered
+ * in the list
+ * @rproc: the remote processor handle
+ *
+ * This function parses registered carveout list, performs allocation
+ * if alloc() ops registered and updates resource table information
+ * if rsc_offset set.
+ *
+ * Return: 0 on success
+ */
+static int rproc_alloc_registered_carveouts(struct rproc *rproc)
+{
+	struct rproc_mem_entry *entry, *tmp;
+	struct fw_rsc_carveout *rsc;
+	struct device *dev = &rproc->dev;
+	int ret;
+
+	list_for_each_entry_safe(entry, tmp, &rproc->carveouts, node) {
+		if (entry->alloc) {
+			ret = entry->alloc(rproc, entry);
+			if (ret) {
+				dev_err(dev, "Unable to allocate carveout %s: %d\n",
+					entry->name, ret);
+				return -ENOMEM;
+			}
+		}
+
+		if (entry->rsc_offset != FW_RSC_ADDR_ANY) {
+			/* update resource table */
+			rsc = (void *)rproc->table_ptr + entry->rsc_offset;
+
+			/*
+			 * Some remote processors might need to know the pa
+			 * even though they are behind an IOMMU. E.g., OMAP4's
+			 * remote M3 processor needs this so it can control
+			 * on-chip hardware accelerators that are not behind
+			 * the IOMMU, and therefor must know the pa.
+			 *
+			 * Generally we don't want to expose physical addresses
+			 * if we don't have to (remote processors are generally
+			 * _not_ trusted), so we might want to do this only for
+			 * remote processor that _must_ have this (e.g. OMAP4's
+			 * dual M3 subsystem).
+			 *
+			 * Non-IOMMU processors might also want to have this info.
+			 * In this case, the device address and the physical address
+			 * are the same.
+			 */
+			if (entry->va)
+				rsc->pa = (u32)rproc_va_to_pa(entry->va);
+		}
+	}
+
+	return 0;
+}
+
 /**
  * rproc_coredump_cleanup() - clean up dump_segments list
  * @rproc: the remote processor handle
@@ -1149,6 +1216,14 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 		goto clean_up_resources;
 	}
 
+	/* Allocate carveout resources associated to rproc */
+	ret = rproc_alloc_registered_carveouts(rproc);
+	if (ret) {
+		dev_err(dev, "Failed to allocate associated carveouts: %d\n",
+			ret);
+		goto clean_up_resources;
+	}
+
 	ret = rproc_start(rproc, fw);
 	if (ret)
 		goto clean_up_resources;
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 8a350265d883..d251c091303c 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -317,6 +317,9 @@ struct rproc;
  * @priv: associated data
  * @name: associated memory region name (optional)
  * @node: list node
+ * @rsc_offset: offset in resource table
+ * @flags: iommu protection flags
+ * @alloc: specific memory allocator function
  */
 struct rproc_mem_entry {
 	void *va;
@@ -326,6 +329,9 @@ struct rproc_mem_entry {
 	void *priv;
 	char name[32];
 	struct list_head node;
+	u32 rsc_offset;
+	u32 flags;
+	int (*alloc)(struct rproc *rproc, struct rproc_mem_entry *mem);
 	int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
 };
 
@@ -564,6 +570,7 @@ void rproc_add_carveout(struct rproc *rproc, struct rproc_mem_entry *mem);
 struct rproc_mem_entry *
 rproc_mem_entry_init(struct device *dev,
 		     void *va, dma_addr_t dma, int len, u32 da,
+		     int (*alloc)(struct rproc *, struct rproc_mem_entry *),
 		     int (*release)(struct rproc *, struct rproc_mem_entry *),
 		     const char *name, ...);
 
-- 
cgit v1.2.3


From 1429cca1175f4cb64dd5d61ffd6037895a41d672 Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:44 +0200
Subject: remoteproc: add helper function to allocate rproc_mem_entry from
 reserved memory

This patch introduces rproc_res_mem_entry_init() helper function to
allocate a rproc_mem_entry structure from a reserved memory region.
In that case, rproc_mem_entry structure has no alloc and release ops.
It will be used to assigned the specified reserved memory to any
rproc sub device.
Relation between rproc_mem_entry and rproc sub device will be done
by name.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 37 ++++++++++++++++++++++++++++++++++++
 include/linux/remoteproc.h           |  6 ++++++
 2 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 9d17b3079506..d7a623b8801c 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -857,6 +857,7 @@ rproc_mem_entry_init(struct device *dev,
 	mem->alloc = alloc;
 	mem->release = release;
 	mem->rsc_offset = FW_RSC_ADDR_ANY;
+	mem->of_resm_idx = -1;
 
 	va_start(args, name);
 	vsnprintf(mem->name, sizeof(mem->name), name, args);
@@ -866,6 +867,42 @@ rproc_mem_entry_init(struct device *dev,
 }
 EXPORT_SYMBOL(rproc_mem_entry_init);
 
+/**
+ * rproc_of_resm_mem_entry_init() - allocate and initialize rproc_mem_entry struct
+ * from a reserved memory phandle
+ * @dev: pointer on device struct
+ * @of_resm_idx: reserved memory phandle index in "memory-region"
+ * @len: memory carveout length
+ * @da: device address
+ * @name: carveout name
+ *
+ * This function allocates a rproc_mem_entry struct and fill it with parameters
+ * provided by client.
+ */
+struct rproc_mem_entry *
+rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, int len,
+			     u32 da, const char *name, ...)
+{
+	struct rproc_mem_entry *mem;
+	va_list args;
+
+	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+	if (!mem)
+		return mem;
+
+	mem->da = da;
+	mem->len = len;
+	mem->rsc_offset = FW_RSC_ADDR_ANY;
+	mem->of_resm_idx = of_resm_idx;
+
+	va_start(args, name);
+	vsnprintf(mem->name, sizeof(mem->name), name, args);
+	va_end(args);
+
+	return mem;
+}
+EXPORT_SYMBOL(rproc_of_resm_mem_entry_init);
+
 /**
  * A lookup table for resource handlers. The indices are defined in
  * enum fw_resource_type.
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index d251c091303c..d4cabe8da507 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -319,6 +319,7 @@ struct rproc;
  * @node: list node
  * @rsc_offset: offset in resource table
  * @flags: iommu protection flags
+ * @of_resm_idx: reserved memory phandle index
  * @alloc: specific memory allocator function
  */
 struct rproc_mem_entry {
@@ -331,6 +332,7 @@ struct rproc_mem_entry {
 	struct list_head node;
 	u32 rsc_offset;
 	u32 flags;
+	u32 of_resm_idx;
 	int (*alloc)(struct rproc *rproc, struct rproc_mem_entry *mem);
 	int (*release)(struct rproc *rproc, struct rproc_mem_entry *mem);
 };
@@ -574,6 +576,10 @@ rproc_mem_entry_init(struct device *dev,
 		     int (*release)(struct rproc *, struct rproc_mem_entry *),
 		     const char *name, ...);
 
+struct rproc_mem_entry *
+rproc_of_resm_mem_entry_init(struct device *dev, u32 of_resm_idx, int len,
+			     u32 da, const char *name, ...);
+
 int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
-- 
cgit v1.2.3


From f8ec92a9f63b3b11e399409141b7868bb405e6b5 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Fri, 5 Oct 2018 08:52:58 +0200
Subject: gpiolib: Add init_valid_mask exported function

Add a function that allows initializing the valid_mask from
gpiochip_add_data.

This prevents race conditions during gpiochip initialization.

If the function is not exported, then the old behaviour is respected,
this is, set all gpios as valid.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Tested-by: Jeffrey Hugo <jhugo@codeaurora.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 16 ++++++++++++++--
 include/linux/gpio/driver.h |  7 ++++++-
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 02660bf11189..b66aae75223b 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -360,7 +360,7 @@ static unsigned long *gpiochip_allocate_mask(struct gpio_chip *chip)
 	return p;
 }
 
-static int gpiochip_init_valid_mask(struct gpio_chip *gpiochip)
+static int gpiochip_alloc_valid_mask(struct gpio_chip *gpiochip)
 {
 #ifdef CONFIG_OF_GPIO
 	int size;
@@ -381,6 +381,14 @@ static int gpiochip_init_valid_mask(struct gpio_chip *gpiochip)
 	return 0;
 }
 
+static int gpiochip_init_valid_mask(struct gpio_chip *gpiochip)
+{
+	if (gpiochip->init_valid_mask)
+		return gpiochip->init_valid_mask(gpiochip);
+
+	return 0;
+}
+
 static void gpiochip_free_valid_mask(struct gpio_chip *gpiochip)
 {
 	kfree(gpiochip->valid_mask);
@@ -1369,7 +1377,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 	if (status)
 		goto err_remove_from_list;
 
-	status = gpiochip_init_valid_mask(chip);
+	status = gpiochip_alloc_valid_mask(chip);
 	if (status)
 		goto err_remove_irqchip_mask;
 
@@ -1381,6 +1389,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data,
 	if (status)
 		goto err_remove_chip;
 
+	status = gpiochip_init_valid_mask(chip);
+	if (status)
+		goto err_remove_chip;
+
 	acpi_gpiochip_add(chip);
 
 	machine_gpiochip_add(chip);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index f6b95734073f..c5a51af8f76e 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -280,6 +280,9 @@ struct gpio_chip {
 
 	void			(*dbg_show)(struct seq_file *s,
 						struct gpio_chip *chip);
+
+	int			(*init_valid_mask)(struct gpio_chip *chip);
+
 	int			base;
 	u16			ngpio;
 	const char		*const *names;
@@ -318,7 +321,9 @@ struct gpio_chip {
 	/**
 	 * @need_valid_mask:
 	 *
-	 * If set core allocates @valid_mask with all bits set to one.
+	 * If set core allocates @valid_mask with all its values initialized
+	 * with init_valid_mask() or set to one if init_valid_mask() is not
+	 * defined
 	 */
 	bool need_valid_mask;
 
-- 
cgit v1.2.3


From 3e779a2e7f909015f21428b66834127496110b6d Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Mon, 8 Oct 2018 09:32:13 -0700
Subject: gpio: Assign gpio_irq_chip::parents to non-stack pointer

gpiochip_set_cascaded_irqchip() is passed 'parent_irq' as an argument
and then the address of that argument is assigned to the gpio chips
gpio_irq_chip 'parents' pointer shortly thereafter. This can't ever
work, because we've just assigned some stack address to a pointer that
we plan to dereference later in gpiochip_irq_map(). I ran into this
issue with the KASAN report below when gpiochip_irq_map() tried to setup
the parent irq with a total junk pointer for the 'parents' array.

BUG: KASAN: stack-out-of-bounds in gpiochip_irq_map+0x228/0x248
Read of size 4 at addr ffffffc0dde472e0 by task swapper/0/1

CPU: 7 PID: 1 Comm: swapper/0 Not tainted 4.14.72 #34
Call trace:
[<ffffff9008093638>] dump_backtrace+0x0/0x718
[<ffffff9008093da4>] show_stack+0x20/0x2c
[<ffffff90096b9224>] __dump_stack+0x20/0x28
[<ffffff90096b91c8>] dump_stack+0x80/0xbc
[<ffffff900845a350>] print_address_description+0x70/0x238
[<ffffff900845a8e4>] kasan_report+0x1cc/0x260
[<ffffff900845aa14>] __asan_report_load4_noabort+0x2c/0x38
[<ffffff900897e098>] gpiochip_irq_map+0x228/0x248
[<ffffff900820cc08>] irq_domain_associate+0x114/0x2ec
[<ffffff900820d13c>] irq_create_mapping+0x120/0x234
[<ffffff900820da78>] irq_create_fwspec_mapping+0x4c8/0x88c
[<ffffff900820e2d8>] irq_create_of_mapping+0x180/0x210
[<ffffff900917114c>] of_irq_get+0x138/0x198
[<ffffff9008dc70ac>] spi_drv_probe+0x94/0x178
[<ffffff9008ca5168>] driver_probe_device+0x51c/0x824
[<ffffff9008ca6538>] __device_attach_driver+0x148/0x20c
[<ffffff9008ca14cc>] bus_for_each_drv+0x120/0x188
[<ffffff9008ca570c>] __device_attach+0x19c/0x2dc
[<ffffff9008ca586c>] device_initial_probe+0x20/0x2c
[<ffffff9008ca18bc>] bus_probe_device+0x80/0x154
[<ffffff9008c9b9b4>] device_add+0x9b8/0xbdc
[<ffffff9008dc7640>] spi_add_device+0x1b8/0x380
[<ffffff9008dcbaf0>] spi_register_controller+0x111c/0x1378
[<ffffff9008dd6b10>] spi_geni_probe+0x4dc/0x6f8
[<ffffff9008cab058>] platform_drv_probe+0xdc/0x130
[<ffffff9008ca5168>] driver_probe_device+0x51c/0x824
[<ffffff9008ca59cc>] __driver_attach+0x100/0x194
[<ffffff9008ca0ea8>] bus_for_each_dev+0x104/0x16c
[<ffffff9008ca58c0>] driver_attach+0x48/0x54
[<ffffff9008ca1edc>] bus_add_driver+0x274/0x498
[<ffffff9008ca8448>] driver_register+0x1ac/0x230
[<ffffff9008caaf6c>] __platform_driver_register+0xcc/0xdc
[<ffffff9009c4b33c>] spi_geni_driver_init+0x1c/0x24
[<ffffff9008084cb8>] do_one_initcall+0x240/0x3dc
[<ffffff9009c017d0>] kernel_init_freeable+0x378/0x468
[<ffffff90096e8240>] kernel_init+0x14/0x110
[<ffffff9008086fcc>] ret_from_fork+0x10/0x18

The buggy address belongs to the page:
page:ffffffbf037791c0 count:0 mapcount:0 mapping:          (null) index:0x0
flags: 0x4000000000000000()
raw: 4000000000000000 0000000000000000 0000000000000000 00000000ffffffff
raw: ffffffbf037791e0 ffffffbf037791e0 0000000000000000 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffffffc0dde47180: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffffffc0dde47200: f1 f1 f1 f1 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f2 f2
>ffffffc0dde47280: f2 f2 00 00 00 00 00 00 00 00 00 00 f3 f3 f3 f3
                                                       ^
 ffffffc0dde47300: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffffffc0dde47380: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00

Let's leave around one unsigned int in the gpio_irq_chip struct for the
single parent irq case and repoint the 'parents' array at it. This way
code is left mostly intact to setup parents and we waste an extra few
bytes per structure of which there should be only a handful in a system.

Cc: Evan Green <evgreen@chromium.org>
Cc: Thierry Reding <treding@nvidia.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Fixes: e0d897289813 ("gpio: Implement tighter IRQ chip integration")
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 3 ++-
 include/linux/gpio/driver.h | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a57300c1d649..25187403e3ac 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1682,7 +1682,8 @@ static void gpiochip_set_cascaded_irqchip(struct gpio_chip *gpiochip,
 		irq_set_chained_handler_and_data(parent_irq, parent_handler,
 						 gpiochip);
 
-		gpiochip->irq.parents = &parent_irq;
+		gpiochip->irq.parent_irq = parent_irq;
+		gpiochip->irq.parents = &gpiochip->irq.parent_irq;
 		gpiochip->irq.num_parents = 1;
 	}
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 0ea328e71ec9..a4d5eb37744a 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -94,6 +94,13 @@ struct gpio_irq_chip {
 	 */
 	unsigned int num_parents;
 
+	/**
+	 * @parent_irq:
+	 *
+	 * For use by gpiochip_set_cascaded_irqchip()
+	 */
+	unsigned int parent_irq;
+
 	/**
 	 * @parents:
 	 *
-- 
cgit v1.2.3


From 5039563e7c25eccd7fec1de6706011009d1c5665 Mon Sep 17 00:00:00 2001
From: Trent Piepho <tpiepho@impinj.com>
Date: Thu, 20 Sep 2018 19:18:32 +0000
Subject: spi: Add driver_override SPI device attribute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This attribute works the same was as the identically named attribute
for PCI, AMBA, and platform devices.  For reference, see:

commit 3cf385713460 ("ARM: 8256/1: driver coamba: add device binding
path 'driver_override'")
commit 3d713e0e382e ("driver core: platform: add device binding path
'driver_override'")
commit 782a985d7af2 ("PCI: Introduce new device binding path using
pci_dev.driver_override")

If the name of a driver is written to this attribute, then the device
will bind to the named driver and only the named driver.

The device will bind to the driver even if the driver does not list the
device in its id table.  This behavior is different than the driver's
bind attribute, which only allows binding to devices that are listed as
supported by the driver.

It can be used to bind a generic driver, like spidev, to a device.

Signed-off-by: Trent Piepho <tpiepho@impinj.com>
Reviewed-by: Jan KundrÃ¡t <jan.kundrat@cesnet.cz>
Tested-by: Jan KundrÃ¡t <jan.kundrat@cesnet.cz>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi.c       | 51 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/spi/spi.h |  1 +
 2 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 2cfc3df821f6..fcbd4cfd2818 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -51,6 +51,7 @@ static void spidev_release(struct device *dev)
 		spi->controller->cleanup(spi);
 
 	spi_controller_put(spi->controller);
+	kfree(spi->driver_override);
 	kfree(spi);
 }
 
@@ -68,6 +69,51 @@ modalias_show(struct device *dev, struct device_attribute *a, char *buf)
 }
 static DEVICE_ATTR_RO(modalias);
 
+static ssize_t driver_override_store(struct device *dev,
+				     struct device_attribute *a,
+				     const char *buf, size_t count)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	const char *end = memchr(buf, '\n', count);
+	const size_t len = end ? end - buf : count;
+	const char *driver_override, *old;
+
+	/* We need to keep extra room for a newline when displaying value */
+	if (len >= (PAGE_SIZE - 1))
+		return -EINVAL;
+
+	driver_override = kstrndup(buf, len, GFP_KERNEL);
+	if (!driver_override)
+		return -ENOMEM;
+
+	device_lock(dev);
+	old = spi->driver_override;
+	if (len) {
+		spi->driver_override = driver_override;
+	} else {
+		/* Emptry string, disable driver override */
+		spi->driver_override = NULL;
+		kfree(driver_override);
+	}
+	device_unlock(dev);
+	kfree(old);
+
+	return count;
+}
+
+static ssize_t driver_override_show(struct device *dev,
+				    struct device_attribute *a, char *buf)
+{
+	const struct spi_device *spi = to_spi_device(dev);
+	ssize_t len;
+
+	device_lock(dev);
+	len = snprintf(buf, PAGE_SIZE, "%s\n", spi->driver_override ? : "");
+	device_unlock(dev);
+	return len;
+}
+static DEVICE_ATTR_RW(driver_override);
+
 #define SPI_STATISTICS_ATTRS(field, file)				\
 static ssize_t spi_controller_##field##_show(struct device *dev,	\
 					     struct device_attribute *attr, \
@@ -149,6 +195,7 @@ SPI_STATISTICS_SHOW(transfers_split_maxsize, "%lu");
 
 static struct attribute *spi_dev_attrs[] = {
 	&dev_attr_modalias.attr,
+	&dev_attr_driver_override.attr,
 	NULL,
 };
 
@@ -296,6 +343,10 @@ static int spi_match_device(struct device *dev, struct device_driver *drv)
 	const struct spi_device	*spi = to_spi_device(dev);
 	const struct spi_driver	*sdrv = to_spi_driver(drv);
 
+	/* Check override first, and if set, only use the named driver */
+	if (spi->driver_override)
+		return strcmp(spi->driver_override, drv->name) == 0;
+
 	/* Attempt an OF style match */
 	if (of_driver_match_device(dev, drv))
 		return 1;
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index f08824ea1968..3de9958ca28b 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -159,6 +159,7 @@ struct spi_device {
 	void			*controller_state;
 	void			*controller_data;
 	char			modalias[SPI_NAME_SIZE];
+	const char		*driver_override;
 	int			cs_gpio;	/* chip select gpio */
 
 	/* the statistics */
-- 
cgit v1.2.3


From 52916982af48d9f9fc01ad825259de1eb3a9b25e Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 4 Oct 2018 15:27:35 -0600
Subject: PCI/P2PDMA: Support peer-to-peer memory

Some PCI devices may have memory mapped in a BAR space that's intended for
use in peer-to-peer transactions.  To enable such transactions the memory
must be registered with ZONE_DEVICE pages so it can be used by DMA
interfaces in existing drivers.

Add an interface for other subsystems to find and allocate chunks of P2P
memory as necessary to facilitate transfers between two PCI peers:

  struct pci_dev *pci_p2pmem_find[_many]();
  int pci_p2pdma_distance[_many]();
  void *pci_alloc_p2pmem();

The new interface requires a driver to collect a list of client devices
involved in the transaction then call pci_p2pmem_find() to obtain any
suitable P2P memory.  Alternatively, if the caller knows a device which
provides P2P memory, they can use pci_p2pdma_distance() to determine if it
is usable.  With a suitable p2pmem device, memory can then be allocated
with pci_alloc_p2pmem() for use in DMA transactions.

Depending on hardware, using peer-to-peer memory may reduce the bandwidth
of the transfer but can significantly reduce pressure on system memory.
This may be desirable in many cases: for example a system could be designed
with a small CPU connected to a PCIe switch by a small number of lanes
which would maximize the number of lanes available to connect to NVMe
devices.

The code is designed to only utilize the p2pmem device if all the devices
involved in a transfer are behind the same PCI bridge.  This is because we
have no way of knowing whether peer-to-peer routing between PCIe Root Ports
is supported (PCIe r4.0, sec 1.3.1).  Additionally, the benefits of P2P
transfers that go through the RC is limited to only reducing DRAM usage
and, in some cases, coding convenience.  The PCI-SIG may be exploring
adding a new capability bit to advertise whether this is possible for
future hardware.

This commit includes significant rework and feedback from Christoph
Hellwig.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
[bhelgaas: fold in fix from Keith Busch <keith.busch@intel.com>:
https://lore.kernel.org/linux-pci/20181012155920.15418-1-keith.busch@intel.com,
to address comment from Dan Carpenter <dan.carpenter@oracle.com>, fold in
https://lore.kernel.org/linux-pci/20181017160510.17926-1-logang@deltatee.com]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/Kconfig        |  17 ++
 drivers/pci/Makefile       |   1 +
 drivers/pci/p2pdma.c       | 626 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/memremap.h   |   5 +
 include/linux/mm.h         |  18 ++
 include/linux/pci-p2pdma.h |  92 +++++++
 include/linux/pci.h        |   4 +
 7 files changed, 763 insertions(+)
 create mode 100644 drivers/pci/p2pdma.c
 create mode 100644 include/linux/pci-p2pdma.h

(limited to 'include/linux')

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 56ff8f6d31fc..deb68be4fdac 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -132,6 +132,23 @@ config PCI_PASID
 
 	  If unsure, say N.
 
+config PCI_P2PDMA
+	bool "PCI peer-to-peer transfer support"
+	depends on PCI && ZONE_DEVICE
+	select GENERIC_ALLOCATOR
+	help
+	  Enableѕ drivers to do PCI peer-to-peer transactions to and from
+	  BARs that are exposed in other devices that are the part of
+	  the hierarchy where peer-to-peer DMA is guaranteed by the PCI
+	  specification to work (ie. anything below a single PCI bridge).
+
+	  Many PCIe root complexes do not support P2P transactions and
+	  it's hard to tell which support it at all, so at this time,
+	  P2P DMA transations must be between devices behind the same root
+	  port.
+
+	  If unsure, say N.
+
 config PCI_LABEL
 	def_bool y if (DMI || ACPI)
 	depends on PCI
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 1b2cfe51e8d7..85f4a703b2be 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_PCI_SYSCALL)	+= syscall.o
 obj-$(CONFIG_PCI_STUB)		+= pci-stub.o
 obj-$(CONFIG_PCI_PF_STUB)	+= pci-pf-stub.o
 obj-$(CONFIG_PCI_ECAM)		+= ecam.o
+obj-$(CONFIG_PCI_P2PDMA)	+= p2pdma.o
 obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
 
 # Endpoint library must be initialized before its users
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
new file mode 100644
index 000000000000..24d0dbb36ba6
--- /dev/null
+++ b/drivers/pci/p2pdma.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCI Peer 2 Peer DMA support.
+ *
+ * Copyright (c) 2016-2018, Logan Gunthorpe
+ * Copyright (c) 2016-2017, Microsemi Corporation
+ * Copyright (c) 2017, Christoph Hellwig
+ * Copyright (c) 2018, Eideticom Inc.
+ */
+
+#include <linux/pci-p2pdma.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/genalloc.h>
+#include <linux/memremap.h>
+#include <linux/percpu-refcount.h>
+#include <linux/random.h>
+#include <linux/seq_buf.h>
+
+struct pci_p2pdma {
+	struct percpu_ref devmap_ref;
+	struct completion devmap_ref_done;
+	struct gen_pool *pool;
+	bool p2pmem_published;
+};
+
+static void pci_p2pdma_percpu_release(struct percpu_ref *ref)
+{
+	struct pci_p2pdma *p2p =
+		container_of(ref, struct pci_p2pdma, devmap_ref);
+
+	complete_all(&p2p->devmap_ref_done);
+}
+
+static void pci_p2pdma_percpu_kill(void *data)
+{
+	struct percpu_ref *ref = data;
+
+	/*
+	 * pci_p2pdma_add_resource() may be called multiple times
+	 * by a driver and may register the percpu_kill devm action multiple
+	 * times. We only want the first action to actually kill the
+	 * percpu_ref.
+	 */
+	if (percpu_ref_is_dying(ref))
+		return;
+
+	percpu_ref_kill(ref);
+}
+
+static void pci_p2pdma_release(void *data)
+{
+	struct pci_dev *pdev = data;
+
+	if (!pdev->p2pdma)
+		return;
+
+	wait_for_completion(&pdev->p2pdma->devmap_ref_done);
+	percpu_ref_exit(&pdev->p2pdma->devmap_ref);
+
+	gen_pool_destroy(pdev->p2pdma->pool);
+	pdev->p2pdma = NULL;
+}
+
+static int pci_p2pdma_setup(struct pci_dev *pdev)
+{
+	int error = -ENOMEM;
+	struct pci_p2pdma *p2p;
+
+	p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL);
+	if (!p2p)
+		return -ENOMEM;
+
+	p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
+	if (!p2p->pool)
+		goto out;
+
+	init_completion(&p2p->devmap_ref_done);
+	error = percpu_ref_init(&p2p->devmap_ref,
+			pci_p2pdma_percpu_release, 0, GFP_KERNEL);
+	if (error)
+		goto out_pool_destroy;
+
+	error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
+	if (error)
+		goto out_pool_destroy;
+
+	pdev->p2pdma = p2p;
+
+	return 0;
+
+out_pool_destroy:
+	gen_pool_destroy(p2p->pool);
+out:
+	devm_kfree(&pdev->dev, p2p);
+	return error;
+}
+
+/**
+ * pci_p2pdma_add_resource - add memory for use as p2p memory
+ * @pdev: the device to add the memory to
+ * @bar: PCI BAR to add
+ * @size: size of the memory to add, may be zero to use the whole BAR
+ * @offset: offset into the PCI BAR
+ *
+ * The memory will be given ZONE_DEVICE struct pages so that it may
+ * be used with any DMA request.
+ */
+int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
+			    u64 offset)
+{
+	struct dev_pagemap *pgmap;
+	void *addr;
+	int error;
+
+	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
+		return -EINVAL;
+
+	if (offset >= pci_resource_len(pdev, bar))
+		return -EINVAL;
+
+	if (!size)
+		size = pci_resource_len(pdev, bar) - offset;
+
+	if (size + offset > pci_resource_len(pdev, bar))
+		return -EINVAL;
+
+	if (!pdev->p2pdma) {
+		error = pci_p2pdma_setup(pdev);
+		if (error)
+			return error;
+	}
+
+	pgmap = devm_kzalloc(&pdev->dev, sizeof(*pgmap), GFP_KERNEL);
+	if (!pgmap)
+		return -ENOMEM;
+
+	pgmap->res.start = pci_resource_start(pdev, bar) + offset;
+	pgmap->res.end = pgmap->res.start + size - 1;
+	pgmap->res.flags = pci_resource_flags(pdev, bar);
+	pgmap->ref = &pdev->p2pdma->devmap_ref;
+	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
+
+	addr = devm_memremap_pages(&pdev->dev, pgmap);
+	if (IS_ERR(addr)) {
+		error = PTR_ERR(addr);
+		goto pgmap_free;
+	}
+
+	error = gen_pool_add_virt(pdev->p2pdma->pool, (unsigned long)addr,
+			pci_bus_address(pdev, bar) + offset,
+			resource_size(&pgmap->res), dev_to_node(&pdev->dev));
+	if (error)
+		goto pgmap_free;
+
+	error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_percpu_kill,
+					  &pdev->p2pdma->devmap_ref);
+	if (error)
+		goto pgmap_free;
+
+	pci_info(pdev, "added peer-to-peer DMA memory %pR\n",
+		 &pgmap->res);
+
+	return 0;
+
+pgmap_free:
+	devm_kfree(&pdev->dev, pgmap);
+	return error;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
+
+/*
+ * Note this function returns the parent PCI device with a
+ * reference taken. It is the caller's responsibily to drop
+ * the reference.
+ */
+static struct pci_dev *find_parent_pci_dev(struct device *dev)
+{
+	struct device *parent;
+
+	dev = get_device(dev);
+
+	while (dev) {
+		if (dev_is_pci(dev))
+			return to_pci_dev(dev);
+
+		parent = get_device(dev->parent);
+		put_device(dev);
+		dev = parent;
+	}
+
+	return NULL;
+}
+
+/*
+ * Check if a PCI bridge has its ACS redirection bits set to redirect P2P
+ * TLPs upstream via ACS. Returns 1 if the packets will be redirected
+ * upstream, 0 otherwise.
+ */
+static int pci_bridge_has_acs_redir(struct pci_dev *pdev)
+{
+	int pos;
+	u16 ctrl;
+
+	pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ACS);
+	if (!pos)
+		return 0;
+
+	pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
+
+	if (ctrl & (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC))
+		return 1;
+
+	return 0;
+}
+
+static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
+{
+	if (!buf)
+		return;
+
+	seq_buf_printf(buf, "%s;", pci_name(pdev));
+}
+
+/*
+ * Find the distance through the nearest common upstream bridge between
+ * two PCI devices.
+ *
+ * If the two devices are the same device then 0 will be returned.
+ *
+ * If there are two virtual functions of the same device behind the same
+ * bridge port then 2 will be returned (one step down to the PCIe switch,
+ * then one step back to the same device).
+ *
+ * In the case where two devices are connected to the same PCIe switch, the
+ * value 4 will be returned. This corresponds to the following PCI tree:
+ *
+ *     -+  Root Port
+ *      \+ Switch Upstream Port
+ *       +-+ Switch Downstream Port
+ *       + \- Device A
+ *       \-+ Switch Downstream Port
+ *         \- Device B
+ *
+ * The distance is 4 because we traverse from Device A through the downstream
+ * port of the switch, to the common upstream port, back up to the second
+ * downstream port and then to Device B.
+ *
+ * Any two devices that don't have a common upstream bridge will return -1.
+ * In this way devices on separate PCIe root ports will be rejected, which
+ * is what we want for peer-to-peer seeing each PCIe root port defines a
+ * separate hierarchy domain and there's no way to determine whether the root
+ * complex supports forwarding between them.
+ *
+ * In the case where two devices are connected to different PCIe switches,
+ * this function will still return a positive distance as long as both
+ * switches eventually have a common upstream bridge. Note this covers
+ * the case of using multiple PCIe switches to achieve a desired level of
+ * fan-out from a root port. The exact distance will be a function of the
+ * number of switches between Device A and Device B.
+ *
+ * If a bridge which has any ACS redirection bits set is in the path
+ * then this functions will return -2. This is so we reject any
+ * cases where the TLPs are forwarded up into the root complex.
+ * In this case, a list of all infringing bridge addresses will be
+ * populated in acs_list (assuming it's non-null) for printk purposes.
+ */
+static int upstream_bridge_distance(struct pci_dev *a,
+				    struct pci_dev *b,
+				    struct seq_buf *acs_list)
+{
+	int dist_a = 0;
+	int dist_b = 0;
+	struct pci_dev *bb = NULL;
+	int acs_cnt = 0;
+
+	/*
+	 * Note, we don't need to take references to devices returned by
+	 * pci_upstream_bridge() seeing we hold a reference to a child
+	 * device which will already hold a reference to the upstream bridge.
+	 */
+
+	while (a) {
+		dist_b = 0;
+
+		if (pci_bridge_has_acs_redir(a)) {
+			seq_buf_print_bus_devfn(acs_list, a);
+			acs_cnt++;
+		}
+
+		bb = b;
+
+		while (bb) {
+			if (a == bb)
+				goto check_b_path_acs;
+
+			bb = pci_upstream_bridge(bb);
+			dist_b++;
+		}
+
+		a = pci_upstream_bridge(a);
+		dist_a++;
+	}
+
+	return -1;
+
+check_b_path_acs:
+	bb = b;
+
+	while (bb) {
+		if (a == bb)
+			break;
+
+		if (pci_bridge_has_acs_redir(bb)) {
+			seq_buf_print_bus_devfn(acs_list, bb);
+			acs_cnt++;
+		}
+
+		bb = pci_upstream_bridge(bb);
+	}
+
+	if (acs_cnt)
+		return -2;
+
+	return dist_a + dist_b;
+}
+
+static int upstream_bridge_distance_warn(struct pci_dev *provider,
+					 struct pci_dev *client)
+{
+	struct seq_buf acs_list;
+	int ret;
+
+	seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
+	if (!acs_list.buffer)
+		return -ENOMEM;
+
+	ret = upstream_bridge_distance(provider, client, &acs_list);
+	if (ret == -2) {
+		pci_warn(client, "cannot be used for peer-to-peer DMA as ACS redirect is set between the client and provider (%s)\n",
+			 pci_name(provider));
+		/* Drop final semicolon */
+		acs_list.buffer[acs_list.len-1] = 0;
+		pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
+			 acs_list.buffer);
+
+	} else if (ret < 0) {
+		pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge\n",
+			 pci_name(provider));
+	}
+
+	kfree(acs_list.buffer);
+
+	return ret;
+}
+
+/**
+ * pci_p2pdma_distance_many - Determive the cumulative distance between
+ *	a p2pdma provider and the clients in use.
+ * @provider: p2pdma provider to check against the client list
+ * @clients: array of devices to check (NULL-terminated)
+ * @num_clients: number of clients in the array
+ * @verbose: if true, print warnings for devices when we return -1
+ *
+ * Returns -1 if any of the clients are not compatible (behind the same
+ * root port as the provider), otherwise returns a positive number where
+ * a lower number is the preferrable choice. (If there's one client
+ * that's the same as the provider it will return 0, which is best choice).
+ *
+ * For now, "compatible" means the provider and the clients are all behind
+ * the same PCI root port. This cuts out cases that may work but is safest
+ * for the user. Future work can expand this to white-list root complexes that
+ * can safely forward between each ports.
+ */
+int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
+			     int num_clients, bool verbose)
+{
+	bool not_supported = false;
+	struct pci_dev *pci_client;
+	int distance = 0;
+	int i, ret;
+
+	if (num_clients == 0)
+		return -1;
+
+	for (i = 0; i < num_clients; i++) {
+		pci_client = find_parent_pci_dev(clients[i]);
+		if (!pci_client) {
+			if (verbose)
+				dev_warn(clients[i],
+					 "cannot be used for peer-to-peer DMA as it is not a PCI device\n");
+			return -1;
+		}
+
+		if (verbose)
+			ret = upstream_bridge_distance_warn(provider,
+							    pci_client);
+		else
+			ret = upstream_bridge_distance(provider, pci_client,
+						       NULL);
+
+		pci_dev_put(pci_client);
+
+		if (ret < 0)
+			not_supported = true;
+
+		if (not_supported && !verbose)
+			break;
+
+		distance += ret;
+	}
+
+	if (not_supported)
+		return -1;
+
+	return distance;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many);
+
+/**
+ * pci_has_p2pmem - check if a given PCI device has published any p2pmem
+ * @pdev: PCI device to check
+ */
+bool pci_has_p2pmem(struct pci_dev *pdev)
+{
+	return pdev->p2pdma && pdev->p2pdma->p2pmem_published;
+}
+EXPORT_SYMBOL_GPL(pci_has_p2pmem);
+
+/**
+ * pci_p2pmem_find - find a peer-to-peer DMA memory device compatible with
+ *	the specified list of clients and shortest distance (as determined
+ *	by pci_p2pmem_dma())
+ * @clients: array of devices to check (NULL-terminated)
+ * @num_clients: number of client devices in the list
+ *
+ * If multiple devices are behind the same switch, the one "closest" to the
+ * client devices in use will be chosen first. (So if one of the providers are
+ * the same as one of the clients, that provider will be used ahead of any
+ * other providers that are unrelated). If multiple providers are an equal
+ * distance away, one will be chosen at random.
+ *
+ * Returns a pointer to the PCI device with a reference taken (use pci_dev_put
+ * to return the reference) or NULL if no compatible device is found. The
+ * found provider will also be assigned to the client list.
+ */
+struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients)
+{
+	struct pci_dev *pdev = NULL;
+	int distance;
+	int closest_distance = INT_MAX;
+	struct pci_dev **closest_pdevs;
+	int dev_cnt = 0;
+	const int max_devs = PAGE_SIZE / sizeof(*closest_pdevs);
+	int i;
+
+	closest_pdevs = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (!closest_pdevs)
+		return NULL;
+
+	while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
+		if (!pci_has_p2pmem(pdev))
+			continue;
+
+		distance = pci_p2pdma_distance_many(pdev, clients,
+						    num_clients, false);
+		if (distance < 0 || distance > closest_distance)
+			continue;
+
+		if (distance == closest_distance && dev_cnt >= max_devs)
+			continue;
+
+		if (distance < closest_distance) {
+			for (i = 0; i < dev_cnt; i++)
+				pci_dev_put(closest_pdevs[i]);
+
+			dev_cnt = 0;
+			closest_distance = distance;
+		}
+
+		closest_pdevs[dev_cnt++] = pci_dev_get(pdev);
+	}
+
+	if (dev_cnt)
+		pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]);
+
+	for (i = 0; i < dev_cnt; i++)
+		pci_dev_put(closest_pdevs[i]);
+
+	kfree(closest_pdevs);
+	return pdev;
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
+
+/**
+ * pci_alloc_p2p_mem - allocate peer-to-peer DMA memory
+ * @pdev: the device to allocate memory from
+ * @size: number of bytes to allocate
+ *
+ * Returns the allocated memory or NULL on error.
+ */
+void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
+{
+	void *ret;
+
+	if (unlikely(!pdev->p2pdma))
+		return NULL;
+
+	if (unlikely(!percpu_ref_tryget_live(&pdev->p2pdma->devmap_ref)))
+		return NULL;
+
+	ret = (void *)gen_pool_alloc(pdev->p2pdma->pool, size);
+
+	if (unlikely(!ret))
+		percpu_ref_put(&pdev->p2pdma->devmap_ref);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
+
+/**
+ * pci_free_p2pmem - free peer-to-peer DMA memory
+ * @pdev: the device the memory was allocated from
+ * @addr: address of the memory that was allocated
+ * @size: number of bytes that was allocated
+ */
+void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
+{
+	gen_pool_free(pdev->p2pdma->pool, (uintptr_t)addr, size);
+	percpu_ref_put(&pdev->p2pdma->devmap_ref);
+}
+EXPORT_SYMBOL_GPL(pci_free_p2pmem);
+
+/**
+ * pci_virt_to_bus - return the PCI bus address for a given virtual
+ *	address obtained with pci_alloc_p2pmem()
+ * @pdev: the device the memory was allocated from
+ * @addr: address of the memory that was allocated
+ */
+pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr)
+{
+	if (!addr)
+		return 0;
+	if (!pdev->p2pdma)
+		return 0;
+
+	/*
+	 * Note: when we added the memory to the pool we used the PCI
+	 * bus address as the physical address. So gen_pool_virt_to_phys()
+	 * actually returns the bus address despite the misleading name.
+	 */
+	return gen_pool_virt_to_phys(pdev->p2pdma->pool, (unsigned long)addr);
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus);
+
+/**
+ * pci_p2pmem_alloc_sgl - allocate peer-to-peer DMA memory in a scatterlist
+ * @pdev: the device to allocate memory from
+ * @nents: the number of SG entries in the list
+ * @length: number of bytes to allocate
+ *
+ * Returns 0 on success
+ */
+struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
+					 unsigned int *nents, u32 length)
+{
+	struct scatterlist *sg;
+	void *addr;
+
+	sg = kzalloc(sizeof(*sg), GFP_KERNEL);
+	if (!sg)
+		return NULL;
+
+	sg_init_table(sg, 1);
+
+	addr = pci_alloc_p2pmem(pdev, length);
+	if (!addr)
+		goto out_free_sg;
+
+	sg_set_buf(sg, addr, length);
+	*nents = 1;
+	return sg;
+
+out_free_sg:
+	kfree(sg);
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_alloc_sgl);
+
+/**
+ * pci_p2pmem_free_sgl - free a scatterlist allocated by pci_p2pmem_alloc_sgl()
+ * @pdev: the device to allocate memory from
+ * @sgl: the allocated scatterlist
+ */
+void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl)
+{
+	struct scatterlist *sg;
+	int count;
+
+	for_each_sg(sgl, sg, INT_MAX, count) {
+		if (!sg)
+			break;
+
+		pci_free_p2pmem(pdev, sg_virt(sg), sg->length);
+	}
+	kfree(sgl);
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl);
+
+/**
+ * pci_p2pmem_publish - publish the peer-to-peer DMA memory for use by
+ *	other devices with pci_p2pmem_find()
+ * @pdev: the device with peer-to-peer DMA memory to publish
+ * @publish: set to true to publish the memory, false to unpublish it
+ *
+ * Published memory can be used by other PCI device drivers for
+ * peer-2-peer DMA operations. Non-published memory is reserved for
+ * exlusive use of the device driver that registers the peer-to-peer
+ * memory.
+ */
+void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
+{
+	if (pdev->p2pdma)
+		pdev->p2pdma->p2pmem_published = publish;
+}
+EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f91f9e763557..9553370ebdad 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -53,11 +53,16 @@ struct vmem_altmap {
  * wakeup event whenever a page is unpinned and becomes idle. This
  * wakeup is used to coordinate physical address space management (ex:
  * fs truncate/hole punch) vs pinned pages (ex: device dma).
+ *
+ * MEMORY_DEVICE_PCI_P2PDMA:
+ * Device memory residing in a PCI BAR intended for use with Peer-to-Peer
+ * transactions.
  */
 enum memory_type {
 	MEMORY_DEVICE_PRIVATE = 1,
 	MEMORY_DEVICE_PUBLIC,
 	MEMORY_DEVICE_FS_DAX,
+	MEMORY_DEVICE_PCI_P2PDMA,
 };
 
 /*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a61ebe8ad4ca..2055df412a77 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -890,6 +890,19 @@ static inline bool is_device_public_page(const struct page *page)
 		page->pgmap->type == MEMORY_DEVICE_PUBLIC;
 }
 
+#ifdef CONFIG_PCI_P2PDMA
+static inline bool is_pci_p2pdma_page(const struct page *page)
+{
+	return is_zone_device_page(page) &&
+		page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
+}
+#else /* CONFIG_PCI_P2PDMA */
+static inline bool is_pci_p2pdma_page(const struct page *page)
+{
+	return false;
+}
+#endif /* CONFIG_PCI_P2PDMA */
+
 #else /* CONFIG_DEV_PAGEMAP_OPS */
 static inline void dev_pagemap_get_ops(void)
 {
@@ -913,6 +926,11 @@ static inline bool is_device_public_page(const struct page *page)
 {
 	return false;
 }
+
+static inline bool is_pci_p2pdma_page(const struct page *page)
+{
+	return false;
+}
 #endif /* CONFIG_DEV_PAGEMAP_OPS */
 
 static inline void get_page(struct page *page)
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
new file mode 100644
index 000000000000..7bdaacfd5892
--- /dev/null
+++ b/include/linux/pci-p2pdma.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PCI Peer 2 Peer DMA support.
+ *
+ * Copyright (c) 2016-2018, Logan Gunthorpe
+ * Copyright (c) 2016-2017, Microsemi Corporation
+ * Copyright (c) 2017, Christoph Hellwig
+ * Copyright (c) 2018, Eideticom Inc.
+ */
+
+#ifndef _LINUX_PCI_P2PDMA_H
+#define _LINUX_PCI_P2PDMA_H
+
+#include <linux/pci.h>
+
+struct block_device;
+struct scatterlist;
+
+#ifdef CONFIG_PCI_P2PDMA
+int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
+		u64 offset);
+int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
+			     int num_clients, bool verbose);
+bool pci_has_p2pmem(struct pci_dev *pdev);
+struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients);
+void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size);
+void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size);
+pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr);
+struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
+					 unsigned int *nents, u32 length);
+void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
+void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
+#else /* CONFIG_PCI_P2PDMA */
+static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
+		size_t size, u64 offset)
+{
+	return -EOPNOTSUPP;
+}
+static inline int pci_p2pdma_distance_many(struct pci_dev *provider,
+	struct device **clients, int num_clients, bool verbose)
+{
+	return -1;
+}
+static inline bool pci_has_p2pmem(struct pci_dev *pdev)
+{
+	return false;
+}
+static inline struct pci_dev *pci_p2pmem_find_many(struct device **clients,
+						   int num_clients)
+{
+	return NULL;
+}
+static inline void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
+{
+	return NULL;
+}
+static inline void pci_free_p2pmem(struct pci_dev *pdev, void *addr,
+		size_t size)
+{
+}
+static inline pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev,
+						    void *addr)
+{
+	return 0;
+}
+static inline struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
+		unsigned int *nents, u32 length)
+{
+	return NULL;
+}
+static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
+		struct scatterlist *sgl)
+{
+}
+static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
+{
+}
+#endif /* CONFIG_PCI_P2PDMA */
+
+
+static inline int pci_p2pdma_distance(struct pci_dev *provider,
+	struct device *client, bool verbose)
+{
+	return pci_p2pdma_distance_many(provider, &client, 1, verbose);
+}
+
+static inline struct pci_dev *pci_p2pmem_find(struct device *client)
+{
+	return pci_p2pmem_find_many(&client, 1);
+}
+
+#endif /* _LINUX_PCI_P2P_H */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6925828f9f25..bf5277768f69 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -281,6 +281,7 @@ struct pcie_link_state;
 struct pci_vpd;
 struct pci_sriov;
 struct pci_ats;
+struct pci_p2pdma;
 
 /* The pci_dev structure describes PCI devices */
 struct pci_dev {
@@ -438,6 +439,9 @@ struct pci_dev {
 #endif
 #ifdef CONFIG_PCI_PASID
 	u16		pasid_features;
+#endif
+#ifdef CONFIG_PCI_P2PDMA
+	struct pci_p2pdma *p2pdma;
 #endif
 	phys_addr_t	rom;		/* Physical address if not from BAR */
 	size_t		romlen;		/* Length if not from BAR */
-- 
cgit v1.2.3


From df2fc43d09d3ee5ede82cab9299df5e78aa427b5 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 13 Sep 2018 11:17:23 +0200
Subject: list: introduce list_bulk_move_tail helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move all entries between @first and including @last before @head.

This is useful for LRU lists where a whole block of entries should be
moved to the end of the list.

Used as a band aid in TTM, but better placed in the common list headers.

Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/ttm/ttm_bo.c | 25 +++++--------------------
 include/linux/list.h         | 23 +++++++++++++++++++++++
 2 files changed, 28 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index b2a33bf1ef10..26b889f86670 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -247,20 +247,6 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
 
-static void ttm_list_move_bulk_tail(struct list_head *list,
-				    struct list_head *first,
-				    struct list_head *last)
-{
-	first->prev->next = last->next;
-	last->next->prev = first->prev;
-
-	list->prev->next = first;
-	first->prev = list->prev;
-
-	last->next = list;
-	list->prev = last;
-}
-
 void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 {
 	unsigned i;
@@ -276,8 +262,8 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		reservation_object_assert_held(pos->last->resv);
 
 		man = &pos->first->bdev->man[TTM_PL_TT];
-		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
-					&pos->last->lru);
+		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
+				    &pos->last->lru);
 	}
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
@@ -291,8 +277,8 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		reservation_object_assert_held(pos->last->resv);
 
 		man = &pos->first->bdev->man[TTM_PL_VRAM];
-		ttm_list_move_bulk_tail(&man->lru[i], &pos->first->lru,
-					&pos->last->lru);
+		list_bulk_move_tail(&man->lru[i], &pos->first->lru,
+				    &pos->last->lru);
 	}
 
 	for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) {
@@ -306,8 +292,7 @@ void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk)
 		reservation_object_assert_held(pos->last->resv);
 
 		lru = &pos->first->bdev->glob->swap_lru[i];
-		ttm_list_move_bulk_tail(lru, &pos->first->swap,
-					&pos->last->swap);
+		list_bulk_move_tail(lru, &pos->first->swap, &pos->last->swap);
 	}
 }
 EXPORT_SYMBOL(ttm_bo_bulk_move_lru_tail);
diff --git a/include/linux/list.h b/include/linux/list.h
index de04cc5ed536..edb7628e46ed 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -183,6 +183,29 @@ static inline void list_move_tail(struct list_head *list,
 	list_add_tail(list, head);
 }
 
+/**
+ * list_bulk_move_tail - move a subsection of a list to its tail
+ * @head: the head that will follow our entry
+ * @first: first entry to move
+ * @last: last entry to move, can be the same as first
+ *
+ * Move all entries between @first and including @last before @head.
+ * All three entries must belong to the same linked list.
+ */
+static inline void list_bulk_move_tail(struct list_head *head,
+				       struct list_head *first,
+				       struct list_head *last)
+{
+	first->prev->next = last->next;
+	last->next->prev = first->prev;
+
+	head->prev->next = first;
+	first->prev = head->prev;
+
+	last->next = head;
+	head->prev = last;
+}
+
 /**
  * list_is_last - tests whether @list is the last entry in list @head
  * @list: the entry to test
-- 
cgit v1.2.3


From 18b01b16e8bae9cd227909f6e6d2783d74855f65 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 9 Oct 2018 16:08:22 +0200
Subject: PCI: Remove pci_unmap_addr() wrappers for DMA API

Only some of these were still used by the cxgb4 driver, and that despite
the fact that the driver otherwise uses the generic DMA API.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/infiniband/hw/cxgb4/qp.c | 10 +++++-----
 drivers/infiniband/hw/cxgb4/t4.h |  2 +-
 include/linux/pci-dma.h          | 12 ------------
 include/linux/pci.h              |  1 -
 4 files changed, 6 insertions(+), 19 deletions(-)
 delete mode 100644 include/linux/pci-dma.h

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 347fe18b1a41..62d6f197ec0b 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -99,7 +99,7 @@ static void dealloc_oc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
 static void dealloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
 {
 	dma_free_coherent(&(rdev->lldi.pdev->dev), sq->memsize, sq->queue,
-			  pci_unmap_addr(sq, mapping));
+			  dma_unmap_addr(sq, mapping));
 }
 
 static void dealloc_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
@@ -132,7 +132,7 @@ static int alloc_host_sq(struct c4iw_rdev *rdev, struct t4_sq *sq)
 	if (!sq->queue)
 		return -ENOMEM;
 	sq->phys_addr = virt_to_phys(sq->queue);
-	pci_unmap_addr_set(sq, mapping, sq->dma_addr);
+	dma_unmap_addr_set(sq, mapping, sq->dma_addr);
 	return 0;
 }
 
@@ -2521,7 +2521,7 @@ static void free_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
 
 	dma_free_coherent(&rdev->lldi.pdev->dev,
 			  wq->memsize, wq->queue,
-			pci_unmap_addr(wq, mapping));
+			dma_unmap_addr(wq, mapping));
 	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
 	kfree(wq->sw_rq);
 	c4iw_put_qpid(rdev, wq->qid, uctx);
@@ -2570,7 +2570,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
 		goto err_free_rqtpool;
 
 	memset(wq->queue, 0, wq->memsize);
-	pci_unmap_addr_set(wq, mapping, wq->dma_addr);
+	dma_unmap_addr_set(wq, mapping, wq->dma_addr);
 
 	wq->bar2_va = c4iw_bar2_addrs(rdev, wq->qid, T4_BAR2_QTYPE_EGRESS,
 				      &wq->bar2_qid,
@@ -2649,7 +2649,7 @@ static int alloc_srq_queue(struct c4iw_srq *srq, struct c4iw_dev_ucontext *uctx,
 err_free_queue:
 	dma_free_coherent(&rdev->lldi.pdev->dev,
 			  wq->memsize, wq->queue,
-			pci_unmap_addr(wq, mapping));
+			dma_unmap_addr(wq, mapping));
 err_free_rqtpool:
 	c4iw_rqtpool_free(rdev, wq->rqt_hwaddr, wq->rqt_size);
 err_free_pending_wrs:
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index e42021fd6fd6..fff6d48d262f 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -397,7 +397,7 @@ struct t4_srq_pending_wr {
 struct t4_srq {
 	union t4_recv_wr *queue;
 	dma_addr_t dma_addr;
-	DECLARE_PCI_UNMAP_ADDR(mapping);
+	DEFINE_DMA_UNMAP_ADDR(mapping);
 	struct t4_swrqe *sw_rq;
 	void __iomem *bar2_va;
 	u64 bar2_pa;
diff --git a/include/linux/pci-dma.h b/include/linux/pci-dma.h
deleted file mode 100644
index 0f7aa7353ca3..000000000000
--- a/include/linux/pci-dma.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_PCI_DMA_H
-#define _LINUX_PCI_DMA_H
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) DEFINE_DMA_UNMAP_ADDR(ADDR_NAME);
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)   DEFINE_DMA_UNMAP_LEN(LEN_NAME);
-#define pci_unmap_addr             dma_unmap_addr
-#define pci_unmap_addr_set         dma_unmap_addr_set
-#define pci_unmap_len              dma_unmap_len
-#define pci_unmap_len_set          dma_unmap_len_set
-
-#endif
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 6925828f9f25..e938e80e59c1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1342,7 +1342,6 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
 
 /* kmem_cache style wrapper around pci_alloc_consistent() */
 
-#include <linux/pci-dma.h>
 #include <linux/dmapool.h>
 
 #define	pci_pool dma_pool
-- 
cgit v1.2.3


From a6f44cf9f5cc60471cf06f3d5391fc6041eb37a5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 9 Oct 2018 16:08:23 +0200
Subject: PCI: Remove pci_set_dma_seg_boundary()

The two callers can just use dma_set_seg_boundary() directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/probe.c            | 2 +-
 drivers/s390/net/ism_drv.c     | 2 +-
 include/linux/pci-dma-compat.h | 9 ---------
 3 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 7c422ccbf9b4..72dd926680be 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2398,7 +2398,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	dev->dev.coherent_dma_mask = 0xffffffffull;
 
 	pci_set_dma_max_seg_size(dev, 65536);
-	pci_set_dma_seg_boundary(dev, 0xffffffff);
+	dma_set_seg_boundary(&dev->dev, 0xffffffff);
 
 	/* Fix up broken headers */
 	pci_fixup_device(pci_fixup_header, dev);
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index c0631895154e..8688c0fff761 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -515,7 +515,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (ret)
 		goto err_unmap;
 
-	pci_set_dma_seg_boundary(pdev, SZ_1M - 1);
+	dma_set_seg_boundary(&pdev->dev, SZ_1M - 1);
 	pci_set_dma_max_seg_size(pdev, SZ_1M);
 	pci_set_master(pdev);
 
diff --git a/include/linux/pci-dma-compat.h b/include/linux/pci-dma-compat.h
index c3f1b44ade29..558a109ab497 100644
--- a/include/linux/pci-dma-compat.h
+++ b/include/linux/pci-dma-compat.h
@@ -125,12 +125,6 @@ static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
 {
 	return dma_set_max_seg_size(&dev->dev, size);
 }
-
-static inline int pci_set_dma_seg_boundary(struct pci_dev *dev,
-					   unsigned long mask)
-{
-	return dma_set_seg_boundary(&dev->dev, mask);
-}
 #else
 static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
 { return -EIO; }
@@ -139,9 +133,6 @@ static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
 static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
 					   unsigned int size)
 { return -EIO; }
-static inline int pci_set_dma_seg_boundary(struct pci_dev *dev,
-					   unsigned long mask)
-{ return -EIO; }
 #endif
 
 #endif
-- 
cgit v1.2.3


From b0da3498c587c20e64799c4c7ba65a31314b2182 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 9 Oct 2018 16:08:24 +0200
Subject: PCI: Remove pci_set_dma_max_seg_size()

The few callers can just use dma_set_max_seg_size ()directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/ata/sata_inic162x.c    | 2 +-
 drivers/block/rsxx/core.c      | 2 +-
 drivers/pci/probe.c            | 2 +-
 drivers/s390/net/ism_drv.c     | 2 +-
 drivers/scsi/aacraid/linit.c   | 2 +-
 include/linux/pci-dma-compat.h | 9 ---------
 6 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 9b6d7930d1c7..e0bcf9b2dab0 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -873,7 +873,7 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * like others but it will lock up the whole machine HARD if
 	 * 65536 byte PRD entry is fed. Reduce maximum segment size.
 	 */
-	rc = pci_set_dma_max_seg_size(pdev, 65536 - 512);
+	rc = dma_set_max_seg_size(&pdev->dev, 65536 - 512);
 	if (rc) {
 		dev_err(&pdev->dev, "failed to set the maximum segment size\n");
 		return rc;
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index f2c631ce793c..37df486c7c3c 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -780,7 +780,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
 		goto failed_enable;
 
 	pci_set_master(dev);
-	pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE);
+	dma_set_max_seg_size(&dev->dev, RSXX_HW_BLK_SIZE);
 
 	st = pci_set_dma_mask(dev, DMA_BIT_MASK(64));
 	if (st) {
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 72dd926680be..75d896549360 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2397,7 +2397,7 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus)
 	dev->dev.dma_parms = &dev->dma_parms;
 	dev->dev.coherent_dma_mask = 0xffffffffull;
 
-	pci_set_dma_max_seg_size(dev, 65536);
+	dma_set_max_seg_size(&dev->dev, 65536);
 	dma_set_seg_boundary(&dev->dev, 0xffffffff);
 
 	/* Fix up broken headers */
diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
index 8688c0fff761..f96ec68af2e5 100644
--- a/drivers/s390/net/ism_drv.c
+++ b/drivers/s390/net/ism_drv.c
@@ -516,7 +516,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_unmap;
 
 	dma_set_seg_boundary(&pdev->dev, SZ_1M - 1);
-	pci_set_dma_max_seg_size(pdev, SZ_1M);
+	dma_set_max_seg_size(&pdev->dev, SZ_1M);
 	pci_set_master(pdev);
 
 	ism->smcd = smcd_alloc_dev(&pdev->dev, dev_name(&pdev->dev), &ism_ops,
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 04443577d48b..53eb2e9569b9 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -1747,7 +1747,7 @@ static int aac_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		shost->max_sectors = (shost->sg_tablesize * 8) + 112;
 	}
 
-	error = pci_set_dma_max_seg_size(pdev,
+	error = dma_set_max_seg_size(&pdev->dev,
 		(aac->adapter_info.options & AAC_OPT_NEW_COMM) ?
 			(shost->max_sectors << 9) : 65536);
 	if (error)
diff --git a/include/linux/pci-dma-compat.h b/include/linux/pci-dma-compat.h
index 558a109ab497..cb1adf0b78a9 100644
--- a/include/linux/pci-dma-compat.h
+++ b/include/linux/pci-dma-compat.h
@@ -119,20 +119,11 @@ static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
 {
 	return dma_set_coherent_mask(&dev->dev, mask);
 }
-
-static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
-					   unsigned int size)
-{
-	return dma_set_max_seg_size(&dev->dev, size);
-}
 #else
 static inline int pci_set_dma_mask(struct pci_dev *dev, u64 mask)
 { return -EIO; }
 static inline int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask)
 { return -EIO; }
-static inline int pci_set_dma_max_seg_size(struct pci_dev *dev,
-					   unsigned int size)
-{ return -EIO; }
 #endif
 
 #endif
-- 
cgit v1.2.3


From f6a8a19bb11b46d60250ddc4e3e1ba6aa166f488 Mon Sep 17 00:00:00 2001
From: Denis Drozdov <denisd@mellanox.com>
Date: Tue, 14 Aug 2018 14:08:51 +0300
Subject: RDMA/netdev: Hoist alloc_netdev_mqs out of the driver

netdev has several interfaces that expect to call alloc_netdev_mqs from
the core code, with the driver only providing the arguments.  This is
incompatible with the rdma_netdev interface that returns the netdev
directly.

Thus re-organize the API used by ipoib so that the verbs core code calls
alloc_netdev_mqs for the driver. This is done by allowing the drivers to
provide the allocation parameters via a 'get_params' callback and then
initializing an allocated netdev as a second step.

Fixes: cd565b4b51e5 ("IB/IPoIB: Support acceleration options callbacks")
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: Denis Drozdov <denisd@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/core/verbs.c                    | 32 ++++++++
 drivers/infiniband/hw/mlx5/main.c                  | 23 ++----
 drivers/infiniband/ulp/ipoib/ipoib_main.c          | 21 ++----
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  | 87 ++++++++++++----------
 include/linux/mlx5/driver.h                        | 14 +---
 include/rdma/ib_verbs.h                            | 23 +++++-
 6 files changed, 119 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 6ee03d6089eb..2f34d3412097 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -2621,3 +2621,35 @@ void ib_drain_qp(struct ib_qp *qp)
 		ib_drain_rq(qp);
 }
 EXPORT_SYMBOL(ib_drain_qp);
+
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+				     enum rdma_netdev_t type, const char *name,
+				     unsigned char name_assign_type,
+				     void (*setup)(struct net_device *))
+{
+	struct rdma_netdev_alloc_params params;
+	struct net_device *netdev;
+	int rc;
+
+	if (!device->rdma_netdev_get_params)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	rc = device->rdma_netdev_get_params(device, port_num, type, &params);
+	if (rc)
+		return ERR_PTR(rc);
+
+	netdev = alloc_netdev_mqs(params.sizeof_priv, name, name_assign_type,
+				  setup, params.txqs, params.rxqs);
+	if (!netdev)
+		return ERR_PTR(-ENOMEM);
+
+	rc = params.initialize_rdma_netdev(device, port_num, netdev,
+					   params.param);
+	if (rc) {
+		free_netdev(netdev);
+		return ERR_PTR(rc);
+	}
+
+	return netdev;
+}
+EXPORT_SYMBOL(rdma_alloc_netdev);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c414f3809e5c..5d9b7f62a0ba 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5163,22 +5163,14 @@ done:
 	return num_counters;
 }
 
-static struct net_device*
-mlx5_ib_alloc_rdma_netdev(struct ib_device *hca,
-			  u8 port_num,
-			  enum rdma_netdev_t type,
-			  const char *name,
-			  unsigned char name_assign_type,
-			  void (*setup)(struct net_device *))
+static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
+				 enum rdma_netdev_t type,
+				 struct rdma_netdev_alloc_params *params)
 {
-	struct net_device *netdev;
-
 	if (type != RDMA_NETDEV_IPOIB)
-		return ERR_PTR(-EOPNOTSUPP);
+		return -EOPNOTSUPP;
 
-	netdev = mlx5_rdma_netdev_alloc(to_mdev(hca)->mdev, hca,
-					name, setup);
-	return netdev;
+	return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params);
 }
 
 static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev)
@@ -5824,8 +5816,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_dev_fw_str      = get_dev_fw_str;
 	dev->ib_dev.get_vector_affinity	= mlx5_ib_get_vector_affinity;
-	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads))
-		dev->ib_dev.alloc_rdma_netdev	= mlx5_ib_alloc_rdma_netdev;
+	if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
+	    IS_ENABLED(CONFIG_MLX5_CORE_IPOIB))
+		dev->ib_dev.rdma_netdev_get_params = mlx5_ib_rn_get_params;
 
 	if (mlx5_core_is_pf(mdev)) {
 		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e3d28f9ad9c0..9c816cd41724 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2146,20 +2146,15 @@ static struct net_device *ipoib_get_netdev(struct ib_device *hca, u8 port,
 {
 	struct net_device *dev;
 
-	if (hca->alloc_rdma_netdev) {
-		dev = hca->alloc_rdma_netdev(hca, port,
-					     RDMA_NETDEV_IPOIB, name,
-					     NET_NAME_UNKNOWN,
-					     ipoib_setup_common);
-		if (IS_ERR_OR_NULL(dev) && PTR_ERR(dev) != -EOPNOTSUPP)
-			return NULL;
-	}
-
-	if (!hca->alloc_rdma_netdev || PTR_ERR(dev) == -EOPNOTSUPP)
-		dev = ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
-						  ipoib_setup_common);
+	dev = rdma_alloc_netdev(hca, port, RDMA_NETDEV_IPOIB, name,
+				NET_NAME_UNKNOWN, ipoib_setup_common);
+	if (!IS_ERR(dev))
+		return dev;
+	if (PTR_ERR(dev) != -EOPNOTSUPP)
+		return NULL;
 
-	return dev;
+	return ipoib_create_netdev_default(hca, name, NET_NAME_UNKNOWN,
+					   ipoib_setup_common);
 }
 
 struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 299e2a897f7e..af1a95f80404 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -658,53 +658,36 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev)
 	}
 }
 
-struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
-					  struct ib_device *ibdev,
-					  const char *name,
-					  void (*setup)(struct net_device *))
+static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev)
 {
-	const struct mlx5e_profile *profile;
-	struct net_device *netdev;
+	return mdev->mlx5e_res.pdn != 0;
+}
+
+static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
+{
+	if (mlx5_is_sub_interface(mdev))
+		return mlx5i_pkey_get_profile();
+	return &mlx5i_nic_profile;
+}
+
+static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num,
+			      struct net_device *netdev, void *param)
+{
+	struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param;
+	const struct mlx5e_profile *prof = mlx5_get_profile(mdev);
 	struct mlx5i_priv *ipriv;
 	struct mlx5e_priv *epriv;
 	struct rdma_netdev *rn;
-	bool sub_interface;
-	int nch;
 	int err;
 
-	if (mlx5i_check_required_hca_cap(mdev)) {
-		mlx5_core_warn(mdev, "Accelerated mode is not supported\n");
-		return ERR_PTR(-EOPNOTSUPP);
-	}
-
-	/* TODO: Need to find a better way to check if child device*/
-	sub_interface = (mdev->mlx5e_res.pdn != 0);
-
-	if (sub_interface)
-		profile = mlx5i_pkey_get_profile();
-	else
-		profile = &mlx5i_nic_profile;
-
-	nch = profile->max_nch(mdev);
-
-	netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv),
-				  name, NET_NAME_UNKNOWN,
-				  setup,
-				  nch * MLX5E_MAX_NUM_TC,
-				  nch);
-	if (!netdev) {
-		mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n");
-		return NULL;
-	}
-
 	ipriv = netdev_priv(netdev);
 	epriv = mlx5i_epriv(netdev);
 
 	epriv->wq = create_singlethread_workqueue("mlx5i");
 	if (!epriv->wq)
-		goto err_free_netdev;
+		return -ENOMEM;
 
-	ipriv->sub_interface = sub_interface;
+	ipriv->sub_interface = mlx5_is_sub_interface(mdev);
 	if (!ipriv->sub_interface) {
 		err = mlx5i_pkey_qpn_ht_init(netdev);
 		if (err) {
@@ -718,7 +701,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 			goto destroy_ht;
 	}
 
-	profile->init(mdev, netdev, profile, ipriv);
+	prof->init(mdev, netdev, prof, ipriv);
 
 	mlx5e_attach_netdev(epriv);
 	netif_carrier_off(netdev);
@@ -734,15 +717,37 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 	netdev->priv_destructor = mlx5_rdma_netdev_free;
 	netdev->needs_free_netdev = 1;
 
-	return netdev;
+	return 0;
 
 destroy_ht:
 	mlx5i_pkey_qpn_ht_cleanup(netdev);
 destroy_wq:
 	destroy_workqueue(epriv->wq);
-err_free_netdev:
-	free_netdev(netdev);
+	return err;
+}
+
+int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
+			    struct ib_device *device,
+			    struct rdma_netdev_alloc_params *params)
+{
+	int nch;
+	int rc;
+
+	rc = mlx5i_check_required_hca_cap(mdev);
+	if (rc)
+		return rc;
 
-	return NULL;
+	nch = mlx5_get_profile(mdev)->max_nch(mdev);
+
+	*params = (struct rdma_netdev_alloc_params){
+		.sizeof_priv = sizeof(struct mlx5i_priv) +
+			       sizeof(struct mlx5e_priv),
+		.txqs = nch * MLX5E_MAX_NUM_TC,
+		.rxqs = nch,
+		.param = mdev,
+		.initialize_rdma_netdev = mlx5_rdma_setup_rn,
+	};
+
+	return 0;
 }
-EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
+EXPORT_SYMBOL(mlx5_rdma_rn_get_params);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 26a92462f4ce..4b75796cac23 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1228,21 +1228,15 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 
-#ifndef CONFIG_MLX5_CORE_IPOIB
-static inline
-struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
-					  struct ib_device *ibdev,
-					  const char *name,
-					  void (*setup)(struct net_device *))
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-#else
+#ifdef CONFIG_MLX5_CORE_IPOIB
 struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
 					  struct ib_device *ibdev,
 					  const char *name,
 					  void (*setup)(struct net_device *));
 #endif /* CONFIG_MLX5_CORE_IPOIB */
+int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
+			    struct ib_device *device,
+			    struct rdma_netdev_alloc_params *params);
 
 struct mlx5_profile {
 	u64	mask;
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index e950c2a68f06..020216cee8f1 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2223,6 +2223,16 @@ struct rdma_netdev {
 			    union ib_gid *gid, u16 mlid);
 };
 
+struct rdma_netdev_alloc_params {
+	size_t sizeof_priv;
+	unsigned int txqs;
+	unsigned int rxqs;
+	void *param;
+
+	int (*initialize_rdma_netdev)(struct ib_device *device, u8 port_num,
+				      struct net_device *netdev, void *param);
+};
+
 struct ib_port_pkey_list {
 	/* Lock to hold while modifying the list. */
 	spinlock_t                    list_lock;
@@ -2523,8 +2533,8 @@ struct ib_device {
 	/**
 	 * rdma netdev operation
 	 *
-	 * Driver implementing alloc_rdma_netdev must return -EOPNOTSUPP if it
-	 * doesn't support the specified rdma netdev type.
+	 * Driver implementing alloc_rdma_netdev or rdma_netdev_get_params
+	 * must return -EOPNOTSUPP if it doesn't support the specified type.
 	 */
 	struct net_device *(*alloc_rdma_netdev)(
 					struct ib_device *device,
@@ -2534,6 +2544,10 @@ struct ib_device {
 					unsigned char name_assign_type,
 					void (*setup)(struct net_device *));
 
+	int (*rdma_netdev_get_params)(struct ib_device *device, u8 port_num,
+				      enum rdma_netdev_t type,
+				      struct rdma_netdev_alloc_params *params);
+
 	struct module               *owner;
 	struct device                dev;
 	struct kobject               *ports_parent;
@@ -4179,4 +4193,9 @@ struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
 
 int uverbs_destroy_def_handler(struct ib_uverbs_file *file,
 			       struct uverbs_attr_bundle *attrs);
+
+struct net_device *rdma_alloc_netdev(struct ib_device *device, u8 port_num,
+				     enum rdma_netdev_t type, const char *name,
+				     unsigned char name_assign_type,
+				     void (*setup)(struct net_device *));
 #endif /* IB_VERBS_H */
-- 
cgit v1.2.3


From 37fdffb217a45609edccbb8b407d031143f551c0 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 21 Aug 2018 14:41:41 +0300
Subject: net/mlx5: WQ, fixes for fragmented WQ buffers API

mlx5e netdevice used to calculate fragment edges by a call to
mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct
indication for queues smaller than a PAGE_SIZE, (broken by default on
PowerPC, where PAGE_SIZE == 64KB).  Here it is replaced by the correct new
calls/API.

Since (TX/RX) Work Queues buffers are fragmented, here we introduce
changes to the API in core driver, so that it gets a stride index and
returns the index of last stride on same fragment, and an additional
wrapping function that returns the number of physically contiguous
strides that can be written contiguously to the work queue.

This obsoletes the following API functions, and their buggy
usage in EN driver:
* mlx5_wq_cyc_get_frag_size()
* mlx5_wq_cyc_ctr2fragix()

The new API improves modularity and hides the details of such
calculation for mlx5e netdevice and mlx5_ib rdma drivers.

New calculation is also more efficient, and improves performance
as follows:

Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size.

Before: 16,477,619 pps
After:  17,085,793 pps

3.7% improvement

Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    | 12 +++++-------
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 22 +++++++++++-----------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h  |  5 ++---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c       |  5 -----
 drivers/net/ethernet/mellanox/mlx5/core/wq.h       | 11 +++++------
 include/linux/mlx5/driver.h                        |  8 ++++++++
 6 files changed, 31 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 15d8ae28c040..00172dee5339 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -432,10 +432,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
 
 static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
 					      struct mlx5_wq_cyc *wq,
-					      u16 pi, u16 frag_pi)
+					      u16 pi, u16 nnops)
 {
 	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
-	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
@@ -454,15 +453,14 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
 	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-	u16 pi, frag_pi;
+	u16 pi, contig_wqebbs_room;
 	int err;
 	int i;
 
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-
-	if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
-		mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
+		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index ae73ea992845..6dacaeba2fbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -290,10 +290,9 @@ dma_unmap_wqe_err:
 
 static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
 					   struct mlx5_wq_cyc *wq,
-					   u16 pi, u16 frag_pi)
+					   u16 pi, u16 nnops)
 {
 	struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-	u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;
 
 	edge_wi = wi + nnops;
 
@@ -348,8 +347,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
+	u16 headlen, ihs, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
-	u16 headlen, ihs, frag_pi;
 	u8 num_wqebbs, opcode;
 	u32 num_bytes;
 	int num_dma;
@@ -386,9 +385,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
 	}
 
@@ -636,7 +635,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
-	u16 headlen, ihs, pi, frag_pi;
+	u16 headlen, ihs, pi, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
 	u8 num_wqebbs, opcode;
 	u32 num_bytes;
@@ -672,13 +671,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
-	if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
+		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
 	}
 
-	mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
+	mlx5i_sq_fetch_wqe(sq, &wqe, pi);
 
 	/* fill wqe */
 	wi       = &sq->db.wqe_info[pi];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 08eac92fc26c..0982c579ec74 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -109,12 +109,11 @@ struct mlx5i_tx_wqe {
 
 static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
 				      struct mlx5i_tx_wqe **wqe,
-				      u16 *pi)
+				      u16 pi)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
 
-	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	*wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
+	*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
 	memset(*wqe, 0, sizeof(**wqe));
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 68e7f8df2a6d..ddca327e8950 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq)
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq)
-{
-	return wq->fbc.frag_sz_m1 + 1;
-}
-
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq)
 {
 	return wq->fbc.sz_m1 + 1;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 3a1a170bb2d7..b1293d153a58 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl);
 u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
@@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 	return ctr & wq->fbc.sz_m1;
 }
 
-static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr)
-{
-	return ctr & wq->fbc.frag_sz_m1;
-}
-
 static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq)
 {
 	return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr);
@@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe(struct mlx5_wq_cyc *wq, u16 ix)
 	return mlx5_frag_buf_get_wqe(&wq->fbc, ix);
 }
 
+static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix)
+{
+	return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1;
+}
+
 static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2)
 {
 	int equal   = (cc1 == cc2);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 66d94b4557cf..88a041b73abf 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1032,6 +1032,14 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
 }
 
+static inline u32
+mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix)
+{
+	u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1;
+
+	return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1);
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
 void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From a36700589b85443e28170be59fa11c8a104130a5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 10 Oct 2018 20:29:44 -0500
Subject: signal: Guard against negative signal numbers in
 copy_siginfo_from_user32

While fixing an out of bounds array access in known_siginfo_layout
reported by the kernel test robot it became apparent that the same bug
exists in siginfo_layout and affects copy_siginfo_from_user32.

The straight forward fix that makes guards against making this mistake
in the future and should keep the code size small is to just take an
unsigned signal number instead of a signed signal number, as I did to
fix known_siginfo_layout.

Cc: stable@vger.kernel.org
Fixes: cc731525f26a ("signal: Remove kernel interal si_code magic")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/signal.h | 2 +-
 kernel/signal.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 706a499d1eb1..200ed96a05af 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -40,7 +40,7 @@ enum siginfo_layout {
 	SIL_SYS,
 };
 
-enum siginfo_layout siginfo_layout(int sig, int si_code);
+enum siginfo_layout siginfo_layout(unsigned sig, int si_code);
 
 /*
  * Define some primitives to manipulate sigset_t.
diff --git a/kernel/signal.c b/kernel/signal.c
index 5f5bf374512b..4fd431ce4f91 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2879,7 +2879,7 @@ static bool known_siginfo_layout(unsigned sig, int si_code)
 	return false;
 }
 
-enum siginfo_layout siginfo_layout(int sig, int si_code)
+enum siginfo_layout siginfo_layout(unsigned sig, int si_code)
 {
 	enum siginfo_layout layout = SIL_KILL;
 	if ((si_code > SI_USER) && (si_code < SI_KERNEL)) {
-- 
cgit v1.2.3


From a6ca88b241d5e929e6e60b12ad8cd288f0ffa256 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 1 Oct 2018 22:36:36 -0700
Subject: trace_uprobe: support reference counter in fd-based uprobe

This patch enables uprobes with reference counter in fd-based uprobe.
Highest 32 bits of perf_event_attr.config is used to stored offset
of the reference count (semaphore).

Format information in /sys/bus/event_source/devices/uprobe/format/ is
updated to reflect this new feature.

Link: http://lkml.kernel.org/r/20181002053636.1896903-1-songliubraving@fb.com

Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-and-tested-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/trace_events.h    |  3 ++-
 kernel/events/core.c            | 49 ++++++++++++++++++++++++++++++++---------
 kernel/trace/trace_event_perf.c |  7 +++---
 kernel/trace/trace_probe.h      |  3 ++-
 kernel/trace/trace_uprobe.c     |  4 +++-
 5 files changed, 50 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 78a010e19ed4..4130a5497d40 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -575,7 +575,8 @@ extern int bpf_get_kprobe_info(const struct perf_event *event,
 			       bool perf_type_tracepoint);
 #endif
 #ifdef CONFIG_UPROBE_EVENTS
-extern int  perf_uprobe_init(struct perf_event *event, bool is_retprobe);
+extern int  perf_uprobe_init(struct perf_event *event,
+			     unsigned long ref_ctr_offset, bool is_retprobe);
 extern void perf_uprobe_destroy(struct perf_event *event);
 extern int bpf_get_uprobe_info(const struct perf_event *event,
 			       u32 *fd_type, const char **filename,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c80549bf82c6..65b30773af3e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8368,30 +8368,39 @@ static struct pmu perf_tracepoint = {
  *
  * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
  *                               if not set, create kprobe/uprobe
+ *
+ * The following values specify a reference counter (or semaphore in the
+ * terminology of tools like dtrace, systemtap, etc.) Userspace Statically
+ * Defined Tracepoints (USDT). Currently, we use 40 bit for the offset.
+ *
+ * PERF_UPROBE_REF_CTR_OFFSET_BITS	# of bits in config as th offset
+ * PERF_UPROBE_REF_CTR_OFFSET_SHIFT	# of bits to shift left
  */
 enum perf_probe_config {
 	PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0,  /* [k,u]retprobe */
+	PERF_UPROBE_REF_CTR_OFFSET_BITS = 32,
+	PERF_UPROBE_REF_CTR_OFFSET_SHIFT = 64 - PERF_UPROBE_REF_CTR_OFFSET_BITS,
 };
 
 PMU_FORMAT_ATTR(retprobe, "config:0");
+#endif
 
-static struct attribute *probe_attrs[] = {
+#ifdef CONFIG_KPROBE_EVENTS
+static struct attribute *kprobe_attrs[] = {
 	&format_attr_retprobe.attr,
 	NULL,
 };
 
-static struct attribute_group probe_format_group = {
+static struct attribute_group kprobe_format_group = {
 	.name = "format",
-	.attrs = probe_attrs,
+	.attrs = kprobe_attrs,
 };
 
-static const struct attribute_group *probe_attr_groups[] = {
-	&probe_format_group,
+static const struct attribute_group *kprobe_attr_groups[] = {
+	&kprobe_format_group,
 	NULL,
 };
-#endif
 
-#ifdef CONFIG_KPROBE_EVENTS
 static int perf_kprobe_event_init(struct perf_event *event);
 static struct pmu perf_kprobe = {
 	.task_ctx_nr	= perf_sw_context,
@@ -8401,7 +8410,7 @@ static struct pmu perf_kprobe = {
 	.start		= perf_swevent_start,
 	.stop		= perf_swevent_stop,
 	.read		= perf_swevent_read,
-	.attr_groups	= probe_attr_groups,
+	.attr_groups	= kprobe_attr_groups,
 };
 
 static int perf_kprobe_event_init(struct perf_event *event)
@@ -8433,6 +8442,24 @@ static int perf_kprobe_event_init(struct perf_event *event)
 #endif /* CONFIG_KPROBE_EVENTS */
 
 #ifdef CONFIG_UPROBE_EVENTS
+PMU_FORMAT_ATTR(ref_ctr_offset, "config:32-63");
+
+static struct attribute *uprobe_attrs[] = {
+	&format_attr_retprobe.attr,
+	&format_attr_ref_ctr_offset.attr,
+	NULL,
+};
+
+static struct attribute_group uprobe_format_group = {
+	.name = "format",
+	.attrs = uprobe_attrs,
+};
+
+static const struct attribute_group *uprobe_attr_groups[] = {
+	&uprobe_format_group,
+	NULL,
+};
+
 static int perf_uprobe_event_init(struct perf_event *event);
 static struct pmu perf_uprobe = {
 	.task_ctx_nr	= perf_sw_context,
@@ -8442,12 +8469,13 @@ static struct pmu perf_uprobe = {
 	.start		= perf_swevent_start,
 	.stop		= perf_swevent_stop,
 	.read		= perf_swevent_read,
-	.attr_groups	= probe_attr_groups,
+	.attr_groups	= uprobe_attr_groups,
 };
 
 static int perf_uprobe_event_init(struct perf_event *event)
 {
 	int err;
+	unsigned long ref_ctr_offset;
 	bool is_retprobe;
 
 	if (event->attr.type != perf_uprobe.type)
@@ -8463,7 +8491,8 @@ static int perf_uprobe_event_init(struct perf_event *event)
 		return -EOPNOTSUPP;
 
 	is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
-	err = perf_uprobe_init(event, is_retprobe);
+	ref_ctr_offset = event->attr.config >> PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
+	err = perf_uprobe_init(event, ref_ctr_offset, is_retprobe);
 	if (err)
 		return err;
 
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 69a3fe926e8c..76217bbef815 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -290,7 +290,8 @@ void perf_kprobe_destroy(struct perf_event *p_event)
 #endif /* CONFIG_KPROBE_EVENTS */
 
 #ifdef CONFIG_UPROBE_EVENTS
-int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
+int perf_uprobe_init(struct perf_event *p_event,
+		     unsigned long ref_ctr_offset, bool is_retprobe)
 {
 	int ret;
 	char *path = NULL;
@@ -312,8 +313,8 @@ int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
 		goto out;
 	}
 
-	tp_event = create_local_trace_uprobe(
-		path, p_event->attr.probe_offset, is_retprobe);
+	tp_event = create_local_trace_uprobe(path, p_event->attr.probe_offset,
+					     ref_ctr_offset, is_retprobe);
 	if (IS_ERR(tp_event)) {
 		ret = PTR_ERR(tp_event);
 		goto out;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 5f52668e165d..03b10f3201a5 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -412,6 +412,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
 extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
 
 extern struct trace_event_call *
-create_local_trace_uprobe(char *name, unsigned long offs, bool is_return);
+create_local_trace_uprobe(char *name, unsigned long offs,
+			  unsigned long ref_ctr_offset, bool is_return);
 extern void destroy_local_trace_uprobe(struct trace_event_call *event_call);
 #endif
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 3a7c73c40007..d09638706fe0 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1405,7 +1405,8 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
 
 #ifdef CONFIG_PERF_EVENTS
 struct trace_event_call *
-create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
+create_local_trace_uprobe(char *name, unsigned long offs,
+			  unsigned long ref_ctr_offset, bool is_return)
 {
 	struct trace_uprobe *tu;
 	struct path path;
@@ -1437,6 +1438,7 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
 
 	tu->offset = offs;
 	tu->path = path;
+	tu->ref_ctr_offset = ref_ctr_offset;
 	tu->filename = kstrdup(name, GFP_KERNEL);
 	init_trace_event_call(tu, &tu->tp.call);
 
-- 
cgit v1.2.3


From 68c0d69dee594e1488aebe12aa50fd79a3e5e5b5 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicoleotsuka@gmail.com>
Date: Fri, 5 Oct 2018 16:59:04 -0700
Subject: hwmon: (core) Add hwmon_in_enable attribute

According to hwmon ABI, in%d_enable is a sysfs interface that
allows user space to enable and disable the input sensor. So
this patch just simply adds the attribute to the list.

Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/hwmon.c | 1 +
 include/linux/hwmon.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c
index 33d51281272b..ac1cdf88840f 100644
--- a/drivers/hwmon/hwmon.c
+++ b/drivers/hwmon/hwmon.c
@@ -356,6 +356,7 @@ static const char * const hwmon_in_attr_templates[] = {
 	[hwmon_in_max_alarm] = "in%d_max_alarm",
 	[hwmon_in_lcrit_alarm] = "in%d_lcrit_alarm",
 	[hwmon_in_crit_alarm] = "in%d_crit_alarm",
+	[hwmon_in_enable] = "in%d_enable",
 };
 
 static const char * const hwmon_curr_attr_templates[] = {
diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h
index 9493d4a388db..99e0c1b0b5fb 100644
--- a/include/linux/hwmon.h
+++ b/include/linux/hwmon.h
@@ -118,6 +118,7 @@ enum hwmon_in_attributes {
 	hwmon_in_max_alarm,
 	hwmon_in_lcrit_alarm,
 	hwmon_in_crit_alarm,
+	hwmon_in_enable,
 };
 
 #define HWMON_I_INPUT		BIT(hwmon_in_input)
@@ -135,6 +136,7 @@ enum hwmon_in_attributes {
 #define HWMON_I_MAX_ALARM	BIT(hwmon_in_max_alarm)
 #define HWMON_I_LCRIT_ALARM	BIT(hwmon_in_lcrit_alarm)
 #define HWMON_I_CRIT_ALARM	BIT(hwmon_in_crit_alarm)
+#define HWMON_I_ENABLE		BIT(hwmon_in_enable)
 
 enum hwmon_curr_attributes {
 	hwmon_curr_input,
-- 
cgit v1.2.3


From af7d6cce53694a88d6a1bb60c9a239a6a5144459 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 9 Oct 2018 17:48:14 +0200
Subject: net: ipv4: update fnhe_pmtu when first hop's MTU changes

Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop
exceptions"), exceptions get deprecated separately from cached
routes. In particular, administrative changes don't clear PMTU anymore.

As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes
on PMTU of exceptions for MTU-less routes"), the PMTU discovered before
the local MTU change can become stale:
 - if the local MTU is now lower than the PMTU, that PMTU is now
   incorrect
 - if the local MTU was the lowest value in the path, and is increased,
   we might discover a higher PMTU

Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those
cases.

If the exception was locked, the discovered PMTU was smaller than the
minimal accepted PMTU. In that case, if the new local MTU is smaller
than the current PMTU, let PMTU discovery figure out if locking of the
exception is still needed.

To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU
notifier. By the time the notifier is called, dev->mtu has been
changed. This patch adds the old MTU as additional information in the
notifier structure, and a new call_netdevice_notifiers_u32() function.

Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 +++++++
 include/net/ip_fib.h      |  1 +
 net/core/dev.c            | 28 ++++++++++++++++++++++++--
 net/ipv4/fib_frontend.c   | 12 ++++++++----
 net/ipv4/fib_semantics.c  | 50 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 92 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c7861e4b402c..d837dad24b4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2458,6 +2458,13 @@ struct netdev_notifier_info {
 	struct netlink_ext_ack	*extack;
 };
 
+struct netdev_notifier_info_ext {
+	struct netdev_notifier_info info; /* must be first */
+	union {
+		u32 mtu;
+	} ext;
+};
+
 struct netdev_notifier_change_info {
 	struct netdev_notifier_info info; /* must be first */
 	unsigned int flags_changed;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 69c91d1934c1..c9b7b136939d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev);
 int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
 int fib_sync_down_addr(struct net_device *dev, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
diff --git a/net/core/dev.c b/net/core/dev.c
index 82114e1111e6..93243479085f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1752,6 +1752,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
+/**
+ *	call_netdevice_notifiers_mtu - call all network notifier blocks
+ *	@val: value passed unmodified to notifier function
+ *	@dev: net_device pointer passed unmodified to notifier function
+ *	@arg: additional u32 argument passed to the notifier function
+ *
+ *	Call all network notifier blocks.  Parameters and return value
+ *	are as for raw_notifier_call_chain().
+ */
+static int call_netdevice_notifiers_mtu(unsigned long val,
+					struct net_device *dev, u32 arg)
+{
+	struct netdev_notifier_info_ext info = {
+		.info.dev = dev,
+		.ext.mtu = arg,
+	};
+
+	BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0);
+
+	return call_netdevice_notifiers_info(val, &info.info);
+}
+
 #ifdef CONFIG_NET_INGRESS
 static DEFINE_STATIC_KEY_FALSE(ingress_needed_key);
 
@@ -7574,14 +7596,16 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu,
 	err = __dev_set_mtu(dev, new_mtu);
 
 	if (!err) {
-		err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+		err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+						   orig_mtu);
 		err = notifier_to_errno(err);
 		if (err) {
 			/* setting mtu back and notifying everyone again,
 			 * so that they have a chance to revert changes.
 			 */
 			__dev_set_mtu(dev, orig_mtu);
-			call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
+			call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev,
+						     new_mtu);
 		}
 	}
 	return err;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 2998b0e47d4b..0113993e9b2c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-	struct netdev_notifier_changeupper_info *info;
+	struct netdev_notifier_changeupper_info *upper_info = ptr;
+	struct netdev_notifier_info_ext *info_ext = ptr;
 	struct in_device *in_dev;
 	struct net *net = dev_net(dev);
 	unsigned int flags;
@@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 			fib_sync_up(dev, RTNH_F_LINKDOWN);
 		else
 			fib_sync_down_dev(dev, event, false);
-		/* fall through */
+		rt_cache_flush(net);
+		break;
 	case NETDEV_CHANGEMTU:
+		fib_sync_mtu(dev, info_ext->ext.mtu);
 		rt_cache_flush(net);
 		break;
 	case NETDEV_CHANGEUPPER:
-		info = ptr;
+		upper_info = ptr;
 		/* flush all routes if dev is linked to or unlinked from
 		 * an L3 master device (e.g., VRF)
 		 */
-		if (info->upper_dev && netif_is_l3_master(info->upper_dev))
+		if (upper_info->upper_dev &&
+		    netif_is_l3_master(upper_info->upper_dev))
 			fib_disable_ip(dev, NETDEV_DOWN, true);
 		break;
 	}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f3c89ccf14c5..446204ca7406 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
 	return NOTIFY_DONE;
 }
 
+/* Update the PMTU of exceptions when:
+ * - the new MTU of the first hop becomes smaller than the PMTU
+ * - the old MTU was the same as the PMTU, and it limited discovery of
+ *   larger MTUs on the path. With that limit raised, we can now
+ *   discover larger MTUs
+ * A special case is locked exceptions, for which the PMTU is smaller
+ * than the minimal accepted PMTU:
+ * - if the new MTU is greater than the PMTU, don't make any change
+ * - otherwise, unlock and set PMTU
+ */
+static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig)
+{
+	struct fnhe_hash_bucket *bucket;
+	int i;
+
+	bucket = rcu_dereference_protected(nh->nh_exceptions, 1);
+	if (!bucket)
+		return;
+
+	for (i = 0; i < FNHE_HASH_SIZE; i++) {
+		struct fib_nh_exception *fnhe;
+
+		for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
+		     fnhe;
+		     fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) {
+			if (fnhe->fnhe_mtu_locked) {
+				if (new <= fnhe->fnhe_pmtu) {
+					fnhe->fnhe_pmtu = new;
+					fnhe->fnhe_mtu_locked = false;
+				}
+			} else if (new < fnhe->fnhe_pmtu ||
+				   orig == fnhe->fnhe_pmtu) {
+				fnhe->fnhe_pmtu = new;
+			}
+		}
+	}
+}
+
+void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
+{
+	unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+	struct hlist_head *head = &fib_info_devhash[hash];
+	struct fib_nh *nh;
+
+	hlist_for_each_entry(nh, head, nh_hash) {
+		if (nh->nh_dev == dev)
+			nh_update_mtu(nh, dev->mtu, orig_mtu);
+	}
+}
+
 /* Event              force Flags           Description
  * NETDEV_CHANGE      0     LINKDOWN        Carrier OFF, not for scope host
  * NETDEV_DOWN        0     LINKDOWN|DEAD   Link down, not for scope host
-- 
cgit v1.2.3


From ce76d938dd98817f998c905e01fbb99b072c0bf6 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 5 Oct 2018 15:43:05 +0300
Subject: lib: Add memcat_p(): paste 2 pointer arrays together

This adds a helper to paste 2 pointer arrays together, useful for merging
various types of attribute arrays. There are a few places in the kernel
tree where this is open coded, and I just added one more in the STM class.

The naming is inspired by memset_p() and memcat(), and partial credit for
it goes to Andy Shevchenko.

This patch adds the function wrapped in a type-enforcing macro and a test
module.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Tested-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/string.h |   7 +++
 lib/Kconfig.debug      |   8 ++++
 lib/Makefile           |   1 +
 lib/string.c           |  31 +++++++++++++
 lib/test_memcat_p.c    | 115 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 162 insertions(+)
 create mode 100644 lib/test_memcat_p.c

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index 4a5a0eb7df51..27d0482e5e05 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -131,6 +131,13 @@ static inline void *memset_p(void **p, void *v, __kernel_size_t n)
 		return memset64((uint64_t *)p, (uintptr_t)v, n);
 }
 
+extern void **__memcat_p(void **a, void **b);
+#define memcat_p(a, b) ({					\
+	BUILD_BUG_ON_MSG(!__same_type(*(a), *(b)),		\
+			 "type mismatch in memcat_p()");	\
+	(typeof(*a) *)__memcat_p((void **)(a), (void **)(b));	\
+})
+
 #ifndef __HAVE_ARCH_MEMCPY
 extern void * memcpy(void *,const void *,__kernel_size_t);
 #endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4966c4fbe7f7..c0176510262e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1965,6 +1965,14 @@ config TEST_DEBUG_VIRTUAL
 
 	  If unsure, say N.
 
+config TEST_MEMCAT_P
+	tristate "Test memcat_p() helper function"
+	help
+	  Test the memcat_p() helper for correctly merging two
+	  pointer arrays together.
+
+	  If unsure, say N.
+
 endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
diff --git a/lib/Makefile b/lib/Makefile
index ca3f7ebb900d..c2588a2d7b1e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -71,6 +71,7 @@ obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
 obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
+obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/string.c b/lib/string.c
index 2c0900a5d51a..453f35994eb6 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -27,6 +27,7 @@
 #include <linux/export.h>
 #include <linux/bug.h>
 #include <linux/errno.h>
+#include <linux/slab.h>
 
 #include <asm/byteorder.h>
 #include <asm/word-at-a-time.h>
@@ -890,6 +891,36 @@ void *memscan(void *addr, int c, size_t size)
 EXPORT_SYMBOL(memscan);
 #endif
 
+/*
+ * Merge two NULL-terminated pointer arrays into a newly allocated
+ * array, which is also NULL-terminated. Nomenclature is inspired by
+ * memset_p() and memcat() found elsewhere in the kernel source tree.
+ */
+void **__memcat_p(void **a, void **b)
+{
+	void **p = a, **new;
+	int nr;
+
+	/* count the elements in both arrays */
+	for (nr = 0, p = a; *p; nr++, p++)
+		;
+	for (p = b; *p; nr++, p++)
+		;
+	/* one for the NULL-terminator */
+	nr++;
+
+	new = kmalloc_array(nr, sizeof(void *), GFP_KERNEL);
+	if (!new)
+		return NULL;
+
+	/* nr -> last index; p points to NULL in b[] */
+	for (nr--; nr >= 0; nr--, p = p == b ? &a[nr] : p - 1)
+		new[nr] = *p;
+
+	return new;
+}
+EXPORT_SYMBOL_GPL(__memcat_p);
+
 #ifndef __HAVE_ARCH_STRSTR
 /**
  * strstr - Find the first substring in a %NUL terminated string
diff --git a/lib/test_memcat_p.c b/lib/test_memcat_p.c
new file mode 100644
index 000000000000..2b163a749ecb
--- /dev/null
+++ b/lib/test_memcat_p.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test cases for memcat_p() in lib/string.c
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+struct test_struct {
+	int		num;
+	unsigned int	magic;
+};
+
+#define MAGIC		0xf00ff00f
+/* Size of each of the NULL-terminated input arrays */
+#define INPUT_MAX	128
+/* Expected number of non-NULL elements in the output array */
+#define EXPECT		(INPUT_MAX * 2 - 2)
+
+static int __init test_memcat_p_init(void)
+{
+	struct test_struct **in0, **in1, **out, **p;
+	int err = -ENOMEM, i, r, total = 0;
+
+	in0 = kcalloc(INPUT_MAX, sizeof(*in0), GFP_KERNEL);
+	if (!in0)
+		return err;
+
+	in1 = kcalloc(INPUT_MAX, sizeof(*in1), GFP_KERNEL);
+	if (!in1)
+		goto err_free_in0;
+
+	for (i = 0, r = 1; i < INPUT_MAX - 1; i++) {
+		in0[i] = kmalloc(sizeof(**in0), GFP_KERNEL);
+		if (!in0[i])
+			goto err_free_elements;
+
+		in1[i] = kmalloc(sizeof(**in1), GFP_KERNEL);
+		if (!in1[i]) {
+			kfree(in0[i]);
+			goto err_free_elements;
+		}
+
+		/* lifted from test_sort.c */
+		r = (r * 725861) % 6599;
+		in0[i]->num = r;
+		in1[i]->num = -r;
+		in0[i]->magic = MAGIC;
+		in1[i]->magic = MAGIC;
+	}
+
+	in0[i] = in1[i] = NULL;
+
+	out = memcat_p(in0, in1);
+	if (!out)
+		goto err_free_all_elements;
+
+	err = -EINVAL;
+	for (i = 0, p = out; *p && (i < INPUT_MAX * 2 - 1); p++, i++) {
+		total += (*p)->num;
+
+		if ((*p)->magic != MAGIC) {
+			pr_err("test failed: wrong magic at %d: %u\n", i,
+			       (*p)->magic);
+			goto err_free_out;
+		}
+	}
+
+	if (total) {
+		pr_err("test failed: expected zero total, got %d\n", total);
+		goto err_free_out;
+	}
+
+	if (i != EXPECT) {
+		pr_err("test failed: expected output size %d, got %d\n",
+		       EXPECT, i);
+		goto err_free_out;
+	}
+
+	for (i = 0; i < INPUT_MAX - 1; i++)
+		if (out[i] != in0[i] || out[i + INPUT_MAX - 1] != in1[i]) {
+			pr_err("test failed: wrong element order at %d\n", i);
+			goto err_free_out;
+		}
+
+	err = 0;
+	pr_info("test passed\n");
+
+err_free_out:
+	kfree(out);
+err_free_all_elements:
+	i = INPUT_MAX;
+err_free_elements:
+	for (i--; i >= 0; i--) {
+		kfree(in1[i]);
+		kfree(in0[i]);
+	}
+
+	kfree(in1);
+err_free_in0:
+	kfree(in0);
+
+	return err;
+}
+
+static void __exit test_memcat_p_exit(void)
+{
+}
+
+module_init(test_memcat_p_init);
+module_exit(test_memcat_p_exit);
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From f458e832ba510f843807fc2c2906a8fb59554c9f Mon Sep 17 00:00:00 2001
From: Chaitanya T K <chaitanya.mgit@gmail.com>
Date: Sat, 6 Oct 2018 19:34:59 +0200
Subject: mac80211: minstrel: Enable STBC and LDPC for VHT Rates

If peer support reception of STBC and LDPC, enable them for better
performance.

Signed-off-by: Chaitanya TK <chaitanya.mgit@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h          |  1 +
 net/mac80211/rc80211_minstrel_ht.c | 23 +++++++++++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c4809ad8ab46..0ef67f837ae1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1670,6 +1670,7 @@ struct ieee80211_mu_edca_param_set {
 #define IEEE80211_VHT_CAP_RXSTBC_3				0x00000300
 #define IEEE80211_VHT_CAP_RXSTBC_4				0x00000400
 #define IEEE80211_VHT_CAP_RXSTBC_MASK				0x00000700
+#define IEEE80211_VHT_CAP_RXSTBC_SHIFT				8
 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE			0x00000800
 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE			0x00001000
 #define IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT                  13
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 67ebdeaffbbc..16d1ac30978d 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1130,7 +1130,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 	struct minstrel_ht_sta_priv *msp = priv_sta;
 	struct minstrel_ht_sta *mi = &msp->ht;
 	struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
-	u16 sta_cap = sta->ht_cap.cap;
+	u16 ht_cap = sta->ht_cap.cap;
 	struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
 	struct sta_info *sinfo = container_of(sta, struct sta_info, sta);
 	int use_vht;
@@ -1138,6 +1138,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 	int ack_dur;
 	int stbc;
 	int i;
+	bool ldpc;
 
 	/* fall back to the old minstrel for legacy stations */
 	if (!sta->ht_cap.ht_supported)
@@ -1175,16 +1176,22 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 	}
 	mi->sample_tries = 4;
 
-	/* TODO tx_flags for vht - ATM the RC API is not fine-grained enough */
 	if (!use_vht) {
-		stbc = (sta_cap & IEEE80211_HT_CAP_RX_STBC) >>
+		stbc = (ht_cap & IEEE80211_HT_CAP_RX_STBC) >>
 			IEEE80211_HT_CAP_RX_STBC_SHIFT;
-		mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT;
 
-		if (sta_cap & IEEE80211_HT_CAP_LDPC_CODING)
-			mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
+		ldpc = ht_cap & IEEE80211_HT_CAP_LDPC_CODING;
+	} else {
+		stbc = (vht_cap->cap & IEEE80211_VHT_CAP_RXSTBC_MASK) >>
+			IEEE80211_VHT_CAP_RXSTBC_SHIFT;
+
+		ldpc = vht_cap->cap & IEEE80211_VHT_CAP_RXLDPC;
 	}
 
+	mi->tx_flags |= stbc << IEEE80211_TX_CTL_STBC_SHIFT;
+	if (ldpc)
+		mi->tx_flags |= IEEE80211_TX_CTL_LDPC;
+
 	for (i = 0; i < ARRAY_SIZE(mi->groups); i++) {
 		u32 gflags = minstrel_mcs_groups[i].flags;
 		int bw, nss;
@@ -1197,10 +1204,10 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband,
 
 		if (gflags & IEEE80211_TX_RC_SHORT_GI) {
 			if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) {
-				if (!(sta_cap & IEEE80211_HT_CAP_SGI_40))
+				if (!(ht_cap & IEEE80211_HT_CAP_SGI_40))
 					continue;
 			} else {
-				if (!(sta_cap & IEEE80211_HT_CAP_SGI_20))
+				if (!(ht_cap & IEEE80211_HT_CAP_SGI_20))
 					continue;
 			}
 		}
-- 
cgit v1.2.3


From 55ef8262f2b1b31762b045b14182db3cc8f607d1 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Wed, 10 Oct 2018 19:09:28 +0200
Subject: spi: pxa2xx: Use an enum for type

That seems to be the correct type.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c   | 6 +++---
 include/linux/pxa2xx_ssp.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index d68f511a9ef1..8baa5b038f15 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1431,7 +1431,7 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 	struct resource *res;
 	const struct acpi_device_id *adev_id = NULL;
 	const struct pci_device_id *pcidev_id = NULL;
-	int type;
+	enum pxa_ssp_type type;
 
 	adev = ACPI_COMPANION(&pdev->dev);
 
@@ -1445,9 +1445,9 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 		return NULL;
 
 	if (adev_id)
-		type = (int)adev_id->driver_data;
+		type = (enum pxa_ssp_type)adev_id->driver_data;
 	else if (pcidev_id)
-		type = (int)pcidev_id->driver_data;
+		type = (enum pxa_ssp_type)pcidev_id->driver_data;
 	else
 		return NULL;
 
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 13b4244d44c1..262e1f318836 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -217,7 +217,7 @@ struct ssp_device {
 
 	const char	*label;
 	int		port_id;
-	int		type;
+	enum pxa_ssp_type type;
 	int		use_count;
 	int		irq;
 
-- 
cgit v1.2.3


From 87ae1d2d70772d661162de03e56c8d1cc5f12650 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Wed, 10 Oct 2018 19:09:29 +0200
Subject: spi: pxa2xx: Add devicetree support

The MMP2 platform, that uses device tree, has this controller. Let's add
devicetree alongside platform & PCI.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx.c   | 73 ++++++++++++++++++++++++++++------------------
 include/linux/pxa2xx_ssp.h |  1 +
 2 files changed, 45 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 8baa5b038f15..612cc49db28f 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -33,6 +33,7 @@
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/acpi.h>
+#include <linux/of_device.h>
 
 #include "spi-pxa2xx.h"
 
@@ -1335,9 +1336,6 @@ static void cleanup(struct spi_device *spi)
 	kfree(chip);
 }
 
-#ifdef CONFIG_PCI
-#ifdef CONFIG_ACPI
-
 static const struct acpi_device_id pxa2xx_spi_acpi_match[] = {
 	{ "INT33C0", LPSS_LPT_SSP },
 	{ "INT33C1", LPSS_LPT_SSP },
@@ -1349,23 +1347,6 @@ static const struct acpi_device_id pxa2xx_spi_acpi_match[] = {
 };
 MODULE_DEVICE_TABLE(acpi, pxa2xx_spi_acpi_match);
 
-static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
-{
-	unsigned int devid;
-	int port_id = -1;
-
-	if (adev && adev->pnp.unique_id &&
-	    !kstrtouint(adev->pnp.unique_id, 0, &devid))
-		port_id = devid;
-	return port_id;
-}
-#else /* !CONFIG_ACPI */
-static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
-{
-	return -1;
-}
-#endif
-
 /*
  * PCI IDs of compound devices that integrate both host controller and private
  * integrated DMA engine. Please note these are not used in module
@@ -1412,6 +1393,37 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = {
 	{ },
 };
 
+static const struct of_device_id pxa2xx_spi_of_match[] = {
+	{ .compatible = "marvell,mmp2-ssp", .data = (void *)MMP2_SSP },
+	{},
+};
+MODULE_DEVICE_TABLE(of, pxa2xx_spi_of_match);
+
+#ifdef CONFIG_ACPI
+
+static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
+{
+	unsigned int devid;
+	int port_id = -1;
+
+	if (adev && adev->pnp.unique_id &&
+	    !kstrtouint(adev->pnp.unique_id, 0, &devid))
+		port_id = devid;
+	return port_id;
+}
+
+#else /* !CONFIG_ACPI */
+
+static int pxa2xx_spi_get_port_id(struct acpi_device *adev)
+{
+	return -1;
+}
+
+#endif /* CONFIG_ACPI */
+
+
+#ifdef CONFIG_PCI
+
 static bool pxa2xx_spi_idma_filter(struct dma_chan *chan, void *param)
 {
 	struct device *dev = param;
@@ -1422,6 +1434,8 @@ static bool pxa2xx_spi_idma_filter(struct dma_chan *chan, void *param)
 	return true;
 }
 
+#endif /* CONFIG_PCI */
+
 static struct pxa2xx_spi_master *
 pxa2xx_spi_init_pdata(struct platform_device *pdev)
 {
@@ -1431,11 +1445,15 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 	struct resource *res;
 	const struct acpi_device_id *adev_id = NULL;
 	const struct pci_device_id *pcidev_id = NULL;
+	const struct of_device_id *of_id = NULL;
 	enum pxa_ssp_type type;
 
 	adev = ACPI_COMPANION(&pdev->dev);
 
-	if (dev_is_pci(pdev->dev.parent))
+	if (pdev->dev.of_node)
+		of_id = of_match_device(pdev->dev.driver->of_match_table,
+					&pdev->dev);
+	else if (dev_is_pci(pdev->dev.parent))
 		pcidev_id = pci_match_id(pxa2xx_spi_pci_compound_match,
 					 to_pci_dev(pdev->dev.parent));
 	else if (adev)
@@ -1448,6 +1466,8 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 		type = (enum pxa_ssp_type)adev_id->driver_data;
 	else if (pcidev_id)
 		type = (enum pxa_ssp_type)pcidev_id->driver_data;
+	else if (of_id)
+		type = (enum pxa_ssp_type)of_id->data;
 	else
 		return NULL;
 
@@ -1466,11 +1486,13 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 	if (IS_ERR(ssp->mmio_base))
 		return NULL;
 
+#ifdef CONFIG_PCI
 	if (pcidev_id) {
 		pdata->tx_param = pdev->dev.parent;
 		pdata->rx_param = pdev->dev.parent;
 		pdata->dma_filter = pxa2xx_spi_idma_filter;
 	}
+#endif
 
 	ssp->clk = devm_clk_get(&pdev->dev, NULL);
 	ssp->irq = platform_get_irq(pdev, 0);
@@ -1484,14 +1506,6 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev)
 	return pdata;
 }
 
-#else /* !CONFIG_PCI */
-static inline struct pxa2xx_spi_master *
-pxa2xx_spi_init_pdata(struct platform_device *pdev)
-{
-	return NULL;
-}
-#endif
-
 static int pxa2xx_spi_fw_translate_cs(struct spi_controller *master,
 				      unsigned int cs)
 {
@@ -1836,6 +1850,7 @@ static struct platform_driver driver = {
 		.name	= "pxa2xx-spi",
 		.pm	= &pxa2xx_spi_pm_ops,
 		.acpi_match_table = ACPI_PTR(pxa2xx_spi_acpi_match),
+		.of_match_table = of_match_ptr(pxa2xx_spi_of_match),
 	},
 	.probe = pxa2xx_spi_probe,
 	.remove = pxa2xx_spi_remove,
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 262e1f318836..979087e021f3 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -196,6 +196,7 @@ enum pxa_ssp_type {
 	PXA27x_SSP,
 	PXA3xx_SSP,
 	PXA168_SSP,
+	MMP2_SSP,
 	PXA910_SSP,
 	CE4100_SSP,
 	QUARK_X1000_SSP,
-- 
cgit v1.2.3


From 81b45683487a51b0f4d3b29d37f20d6d078544e4 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Sun, 26 Aug 2018 03:16:29 +0900
Subject: compiler.h: give up __compiletime_assert_fallback()

__compiletime_assert_fallback() is supposed to stop building earlier
by using the negative-array-size method in case the compiler does not
support "error" attribute, but has never worked like that.

You can simply try:

    BUILD_BUG_ON(1);

GCC immediately terminates the build, but Clang does not report
anything because Clang does not support the "error" attribute now.
It will later fail at link time, but __compiletime_assert_fallback()
is not working at least.

The root cause is commit 1d6a0d19c855 ("bug.h: prevent double evaluation
of `condition' in BUILD_BUG_ON").  Prior to that commit, BUILD_BUG_ON()
was checked by the negative-array-size method *and* the link-time trick.
Since that commit, the negative-array-size is not effective because
'__cond' is no longer constant.  As the comment in <linux/build_bug.h>
says, GCC (and Clang as well) only emits the error for obvious cases.

When '__cond' is a variable,

    ((void)sizeof(char[1 - 2 * __cond]))

... is not obvious for the compiler to know the array size is negative.

Reverting that commit would break BUILD_BUG() because negative-size-array
is evaluated before the code is optimized out.

Let's give up __compiletime_assert_fallback().  This commit does not
change the current behavior since it just rips off the useless code.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/compiler.h | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 681d866efb1e..87c776c3ce73 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -314,29 +314,14 @@ static inline void *offset_to_ptr(const int *off)
 #endif
 #ifndef __compiletime_error
 # define __compiletime_error(message)
-/*
- * Sparse complains of variable sized arrays due to the temporary variable in
- * __compiletime_assert. Unfortunately we can't just expand it out to make
- * sparse see a constant array size without breaking compiletime_assert on old
- * versions of GCC (e.g. 4.2.4), so hide the array from sparse altogether.
- */
-# ifndef __CHECKER__
-#  define __compiletime_error_fallback(condition) \
-	do { ((void)sizeof(char[1 - 2 * condition])); } while (0)
-# endif
-#endif
-#ifndef __compiletime_error_fallback
-# define __compiletime_error_fallback(condition) do { } while (0)
 #endif
 
 #ifdef __OPTIMIZE__
 # define __compiletime_assert(condition, msg, prefix, suffix)		\
 	do {								\
-		int __cond = !(condition);				\
 		extern void prefix ## suffix(void) __compiletime_error(msg); \
-		if (__cond)						\
+		if (!(condition))					\
 			prefix ## suffix();				\
-		__compiletime_error_fallback(__cond);			\
 	} while (0)
 #else
 # define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
-- 
cgit v1.2.3


From 953923c09fe83255ae11845db1c9eb576ba73df8 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Thu, 11 Oct 2018 11:06:29 -0400
Subject: dm: rename DM_TYPE_MQ_REQUEST_BASED to DM_TYPE_REQUEST_BASED

Now that request-based DM is only using blk-mq, there is no need to
differentiate between legacy "rq" and new "mq".  We're back to a single
request-based DM -- and there was much rejoicing!

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-mpath.c         | 14 +++++---------
 drivers/md/dm-table.c         |  7 +------
 drivers/md/dm.c               |  2 --
 include/linux/device-mapper.h |  5 ++---
 4 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index a24ed3973e7c..d6a66921daf4 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -203,7 +203,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
 static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
 {
 	if (m->queue_mode == DM_TYPE_NONE) {
-		m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
+		m->queue_mode = DM_TYPE_REQUEST_BASED;
 	} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
 		INIT_WORK(&m->process_queued_bios, process_queued_bios);
 		/*
@@ -658,7 +658,7 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
 
 static void process_queued_io_list(struct multipath *m)
 {
-	if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
+	if (m->queue_mode == DM_TYPE_REQUEST_BASED)
 		dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
 	else if (m->queue_mode == DM_TYPE_BIO_BASED)
 		queue_work(kmultipathd, &m->process_queued_bios);
@@ -1079,10 +1079,9 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
 
 			if (!strcasecmp(queue_mode_name, "bio"))
 				m->queue_mode = DM_TYPE_BIO_BASED;
-			else if (!strcasecmp(queue_mode_name, "rq"))
+			else if (!strcasecmp(queue_mode_name, "rq") ||
+				 !strcasecmp(queue_mode_name, "mq"))
 				m->queue_mode = DM_TYPE_REQUEST_BASED;
-			else if (!strcasecmp(queue_mode_name, "mq"))
-				m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
 			else {
 				ti->error = "Unknown 'queue_mode' requested";
 				r = -EINVAL;
@@ -1716,9 +1715,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
 			case DM_TYPE_BIO_BASED:
 				DMEMIT("queue_mode bio ");
 				break;
-			case DM_TYPE_MQ_REQUEST_BASED:
-				DMEMIT("queue_mode mq ");
-				break;
 			default:
 				WARN_ON_ONCE(true);
 				break;
@@ -1962,7 +1958,7 @@ static int multipath_busy(struct dm_target *ti)
 
 	/* no paths available, for blk-mq: rely on IO mapping to delay requeue */
 	if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
-		return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
+		return (m->queue_mode != DM_TYPE_REQUEST_BASED);
 
 	/* Guess which priority_group will be used at next mapping time */
 	pg = READ_ONCE(m->current_pg);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 96e152c339a6..eeea32bb6a3e 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -871,8 +871,7 @@ static bool __table_type_bio_based(enum dm_queue_mode table_type)
 
 static bool __table_type_request_based(enum dm_queue_mode table_type)
 {
-	return (table_type == DM_TYPE_REQUEST_BASED ||
-		table_type == DM_TYPE_MQ_REQUEST_BASED);
+	return table_type == DM_TYPE_REQUEST_BASED;
 }
 
 void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
@@ -986,10 +985,6 @@ verify_bio_based:
 
 	BUG_ON(!request_based); /* No targets in this table */
 
-	/*
-	 * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
-	 * having a compatible target use dm_table_set_type.
-	 */
 	t->type = DM_TYPE_REQUEST_BASED;
 
 verify_rq_based:
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0ce00c6f5f9a..bf36e2635ea7 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2213,7 +2213,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
 
 	switch (type) {
 	case DM_TYPE_REQUEST_BASED:
-	case DM_TYPE_MQ_REQUEST_BASED:
 		r = dm_mq_init_request_queue(md, t);
 		if (r) {
 			DMERR("Cannot initialize queue for request-based dm-mq mapped device");
@@ -2946,7 +2945,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu
 			goto out;
 		break;
 	case DM_TYPE_REQUEST_BASED:
-	case DM_TYPE_MQ_REQUEST_BASED:
 		pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
 		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
 		/* per_io_data_size is used for blk-mq pdu at queue allocation */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 6fb0808e87c8..8d937754aa0c 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -26,9 +26,8 @@ enum dm_queue_mode {
 	DM_TYPE_NONE		 = 0,
 	DM_TYPE_BIO_BASED	 = 1,
 	DM_TYPE_REQUEST_BASED	 = 2,
-	DM_TYPE_MQ_REQUEST_BASED = 3,
-	DM_TYPE_DAX_BIO_BASED	 = 4,
-	DM_TYPE_NVME_BIO_BASED	 = 5,
+	DM_TYPE_DAX_BIO_BASED	 = 3,
+	DM_TYPE_NVME_BIO_BASED	 = 4,
 };
 
 typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
-- 
cgit v1.2.3


From 3f2aa244ee1a0d17ed5b6c86564d2c1b24d1c96b Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 3 Oct 2018 13:21:07 +0100
Subject: TC: Set DMA masks for devices

Fix a TURBOchannel support regression with commit 205e1b7f51e4
("dma-mapping: warn when there is no coherent_dma_mask") that caused
coherent DMA allocations to produce a warning such as:

defxx: v1.11 2014/07/01  Lawrence V. Stefani and others
tc1: DEFTA at MMIO addr = 0x1e900000, IRQ = 20, Hardware addr = 08-00-2b-a3-a3-29
------------[ cut here ]------------
WARNING: CPU: 0 PID: 1 at ./include/linux/dma-mapping.h:516 dfx_dev_register+0x670/0x678
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 4.19.0-rc6 #2
Stack : ffffffff8009ffc0 fffffffffffffec0 0000000000000000 ffffffff80647650
        0000000000000000 0000000000000000 ffffffff806f5f80 ffffffffffffffff
        0000000000000000 0000000000000000 0000000000000001 ffffffff8065d4e8
        98000000031b6300 ffffffff80563478 ffffffff805685b0 ffffffffffffffff
        0000000000000000 ffffffff805d6720 0000000000000204 ffffffff80388df8
        0000000000000000 0000000000000009 ffffffff8053efd0 ffffffff806657d0
        0000000000000000 ffffffff803177f8 0000000000000000 ffffffff806d0000
        9800000003078000 980000000307b9e0 000000001e900000 ffffffff80067940
        0000000000000000 ffffffff805d6720 0000000000000204 ffffffff80388df8
        ffffffff805176c0 ffffffff8004dc78 0000000000000000 ffffffff80067940
        ...
Call Trace:
[<ffffffff8004dc78>] show_stack+0xa0/0x130
[<ffffffff80067940>] __warn+0x128/0x170
---[ end trace b1d1e094f67f3bb2 ]---

This is because the TURBOchannel bus driver fails to set the coherent
DMA mask for devices enumerated.

Set the regular and coherent DMA masks for TURBOchannel devices then,
observing that the bus protocol supports a 34-bit (16GiB) DMA address
space, by interpreting the value presented in the address cycle across
the 32 `ad' lines as a 32-bit word rather than byte address[1].  The
architectural size of the TURBOchannel DMA address space exceeds the
maximum amount of RAM any actual TURBOchannel system in existence may
have, hence both masks are the same.

This removes the warning shown above.

References:

[1] "TURBOchannel Hardware Specification", EK-369AA-OD-007B, Digital
    Equipment Corporation, January 1993, Section "DMA", pp. 1-15 -- 1-17

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: Paul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20835/
Fixes: 205e1b7f51e4 ("dma-mapping: warn when there is no coherent_dma_mask")
Cc: stable@vger.kernel.org # 4.16+
Cc: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/tc/tc.c    | 8 +++++++-
 include/linux/tc.h | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/tc/tc.c b/drivers/tc/tc.c
index 3be9519654e5..cf3fad2cb871 100644
--- a/drivers/tc/tc.c
+++ b/drivers/tc/tc.c
@@ -2,7 +2,7 @@
  *	TURBOchannel bus services.
  *
  *	Copyright (c) Harald Koerfgen, 1998
- *	Copyright (c) 2001, 2003, 2005, 2006  Maciej W. Rozycki
+ *	Copyright (c) 2001, 2003, 2005, 2006, 2018  Maciej W. Rozycki
  *	Copyright (c) 2005  James Simmons
  *
  *	This file is subject to the terms and conditions of the GNU
@@ -10,6 +10,7 @@
  *	directory of this archive for more details.
  */
 #include <linux/compiler.h>
+#include <linux/dma-mapping.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
@@ -92,6 +93,11 @@ static void __init tc_bus_add_devices(struct tc_bus *tbus)
 		tdev->dev.bus = &tc_bus_type;
 		tdev->slot = slot;
 
+		/* TURBOchannel has 34-bit DMA addressing (16GiB space). */
+		tdev->dma_mask = DMA_BIT_MASK(34);
+		tdev->dev.dma_mask = &tdev->dma_mask;
+		tdev->dev.coherent_dma_mask = DMA_BIT_MASK(34);
+
 		for (i = 0; i < 8; i++) {
 			tdev->firmware[i] =
 				readb(module + offset + TC_FIRM_VER + 4 * i);
diff --git a/include/linux/tc.h b/include/linux/tc.h
index f92511e57cdb..a60639f37963 100644
--- a/include/linux/tc.h
+++ b/include/linux/tc.h
@@ -84,6 +84,7 @@ struct tc_dev {
 					   device. */
 	struct device	dev;		/* Generic device interface. */
 	struct resource	resource;	/* Address space of this device. */
+	u64		dma_mask;	/* DMA addressable range. */
 	char		vendor[9];
 	char		name[9];
 	char		firmware[9];
-- 
cgit v1.2.3


From 4822e902f9bdffaea2817471365e000966f0d1a1 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Thu, 11 Oct 2018 10:07:06 +0300
Subject: block: describe difference between flags IO_STAT and STATS

This adds reasonable comments, but they definitely needs better names.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index dee46c20701b..61207560e826 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -108,7 +108,7 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_QUIET		((__force req_flags_t)(1 << 11))
 /* elevator private data attached */
 #define RQF_ELVPRIV		((__force req_flags_t)(1 << 12))
-/* account I/O stat */
+/* account into disk and partition IO statistics */
 #define RQF_IO_STAT		((__force req_flags_t)(1 << 13))
 /* request came from our alloc pool */
 #define RQF_ALLOCED		((__force req_flags_t)(1 << 14))
@@ -116,7 +116,7 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_PM			((__force req_flags_t)(1 << 15))
 /* on IO scheduler merge hash */
 #define RQF_HASHED		((__force req_flags_t)(1 << 16))
-/* IO stats tracking on */
+/* track IO completion time */
 #define RQF_STATS		((__force req_flags_t)(1 << 17))
 /* Look at ->special_vec for the actual data payload instead of the
    bio chain. */
@@ -685,7 +685,7 @@ struct request_queue {
 #define QUEUE_FLAG_FAIL_IO	7	/* fake timeout */
 #define QUEUE_FLAG_NONROT	9	/* non-rotational device (SSD) */
 #define QUEUE_FLAG_VIRT        QUEUE_FLAG_NONROT /* paravirt device */
-#define QUEUE_FLAG_IO_STAT     10	/* do IO stats */
+#define QUEUE_FLAG_IO_STAT     10	/* do disk/partitions IO accounting */
 #define QUEUE_FLAG_DISCARD     11	/* supports DISCARD */
 #define QUEUE_FLAG_NOXMERGES   12	/* No extended merges */
 #define QUEUE_FLAG_ADD_RANDOM  13	/* Contributes to random pool */
@@ -699,7 +699,7 @@ struct request_queue {
 #define QUEUE_FLAG_FUA	       21	/* device supports FUA writes */
 #define QUEUE_FLAG_FLUSH_NQ    22	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         23	/* device supports DAX */
-#define QUEUE_FLAG_STATS       24	/* track rq completion times */
+#define QUEUE_FLAG_STATS       24	/* track IO start and completion times */
 #define QUEUE_FLAG_POLL_STATS  25	/* collecting stats for hybrid polling */
 #define QUEUE_FLAG_REGISTERED  26	/* queue has been registered to a disk */
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
-- 
cgit v1.2.3


From 5fd752b6b3a2233972ce1726df8bdb40886113a9 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linaro.org>
Date: Thu, 11 Oct 2018 12:07:14 +0800
Subject: leds: core: Introduce LED pattern trigger

This patch adds a new LED trigger that LED device can configure
to employ software or hardware pattern engine.

Consumers can write 'pattern' file to enable the software pattern
which alters the brightness for the specified duration with one
software timer.

Moreover consumers can write 'hw_pattern' file to enable the hardware
pattern for some LED controllers which can autonomously control
brightness over time, according to some preprogrammed hardware
patterns.

Signed-off-by: Raphael Teysseyre <rteysseyre@gmail.com>
Signed-off-by: Baolin Wang <baolin.wang@linaro.org>
Signed-off-by: Jacek Anaszewski <jacek.anaszewski@gmail.com>
---
 .../ABI/testing/sysfs-class-led-trigger-pattern    |  82 ++++
 drivers/leds/trigger/Kconfig                       |   7 +
 drivers/leds/trigger/Makefile                      |   1 +
 drivers/leds/trigger/ledtrig-pattern.c             | 411 +++++++++++++++++++++
 include/linux/leds.h                               |  15 +
 5 files changed, 516 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-class-led-trigger-pattern
 create mode 100644 drivers/leds/trigger/ledtrig-pattern.c

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-pattern b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
new file mode 100644
index 000000000000..fb3d1e03b881
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-pattern
@@ -0,0 +1,82 @@
+What:		/sys/class/leds/<led>/pattern
+Date:		September 2018
+KernelVersion:	4.20
+Description:
+		Specify a software pattern for the LED, that supports altering
+		the brightness for the specified duration with one software
+		timer. It can do gradual dimming and step change of brightness.
+
+		The pattern is given by a series of tuples, of brightness and
+		duration (ms). The LED is expected to traverse the series and
+		each brightness value for the specified duration. Duration of
+		0 means brightness should immediately change to new value, and
+		writing malformed pattern deactivates any active one.
+
+		1. For gradual dimming, the dimming interval now is set as 50
+		milliseconds. So the tuple with duration less than dimming
+		interval (50ms) is treated as a step change of brightness,
+		i.e. the subsequent brightness will be applied without adding
+		intervening dimming intervals.
+
+		The gradual dimming format of the software pattern values should be:
+		"brightness_1 duration_1 brightness_2 duration_2 brightness_3
+		duration_3 ...". For example:
+
+		echo 0 1000 255 2000 > pattern
+
+		It will make the LED go gradually from zero-intensity to max (255)
+		intensity in 1000 milliseconds, then back to zero intensity in 2000
+		milliseconds:
+
+		LED brightness
+		    ^
+		255-|       / \            / \            /
+		    |      /    \         /    \         /
+		    |     /       \      /       \      /
+		    |    /          \   /          \   /
+		  0-|   /             \/             \/
+		    +---0----1----2----3----4----5----6------------> time (s)
+
+		2. To make the LED go instantly from one brigntess value to another,
+		we should use use zero-time lengths (the brightness must be same as
+		the previous tuple's). So the format should be:
+		"brightness_1 duration_1 brightness_1 0 brightness_2 duration_2
+		brightness_2 0 ...". For example:
+
+		echo 0 1000 0 0 255 2000 255 0 > pattern
+
+		It will make the LED stay off for one second, then stay at max brightness
+		for two seconds:
+
+		LED brightness
+		    ^
+		255-|        +---------+    +---------+
+		    |        |         |    |         |
+		    |        |         |    |         |
+		    |        |         |    |         |
+		  0-|   -----+         +----+         +----
+		    +---0----1----2----3----4----5----6------------> time (s)
+
+What:		/sys/class/leds/<led>/hw_pattern
+Date:		September 2018
+KernelVersion:	4.20
+Description:
+		Specify a hardware pattern for the LED, for LED hardware that
+		supports autonomously controlling brightness over time, according
+		to some preprogrammed hardware patterns. It deactivates any active
+		software pattern.
+
+		Since different LED hardware can have different semantics of
+		hardware patterns, each driver is expected to provide its own
+		description for the hardware patterns in their ABI documentation
+		file.
+
+What:		/sys/class/leds/<led>/repeat
+Date:		September 2018
+KernelVersion:	4.20
+Description:
+		Specify a pattern repeat number. -1 means repeat indefinitely,
+		other negative numbers and number 0 are invalid.
+
+		This file will always return the originally written repeat
+		number.
diff --git a/drivers/leds/trigger/Kconfig b/drivers/leds/trigger/Kconfig
index 4018af769969..b76fc3cdc8f8 100644
--- a/drivers/leds/trigger/Kconfig
+++ b/drivers/leds/trigger/Kconfig
@@ -129,4 +129,11 @@ config LEDS_TRIGGER_NETDEV
 	  This allows LEDs to be controlled by network device activity.
 	  If unsure, say Y.
 
+config LEDS_TRIGGER_PATTERN
+	tristate "LED Pattern Trigger"
+	help
+	  This allows LEDs to be controlled by a software or hardware pattern
+	  which is a series of tuples, of brightness and duration (ms).
+	  If unsure, say N
+
 endif # LEDS_TRIGGERS
diff --git a/drivers/leds/trigger/Makefile b/drivers/leds/trigger/Makefile
index f3cfe1950538..9bcb64ee8123 100644
--- a/drivers/leds/trigger/Makefile
+++ b/drivers/leds/trigger/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_LEDS_TRIGGER_TRANSIENT)	+= ledtrig-transient.o
 obj-$(CONFIG_LEDS_TRIGGER_CAMERA)	+= ledtrig-camera.o
 obj-$(CONFIG_LEDS_TRIGGER_PANIC)	+= ledtrig-panic.o
 obj-$(CONFIG_LEDS_TRIGGER_NETDEV)	+= ledtrig-netdev.o
+obj-$(CONFIG_LEDS_TRIGGER_PATTERN)	+= ledtrig-pattern.o
diff --git a/drivers/leds/trigger/ledtrig-pattern.c b/drivers/leds/trigger/ledtrig-pattern.c
new file mode 100644
index 000000000000..ce7acd115dd8
--- /dev/null
+++ b/drivers/leds/trigger/ledtrig-pattern.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * LED pattern trigger
+ *
+ * Idea discussed with Pavel Machek. Raphael Teysseyre implemented
+ * the first version, Baolin Wang simplified and improved the approach.
+ */
+
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+
+#define MAX_PATTERNS		1024
+/*
+ * When doing gradual dimming, the led brightness will be updated
+ * every 50 milliseconds.
+ */
+#define UPDATE_INTERVAL		50
+
+struct pattern_trig_data {
+	struct led_classdev *led_cdev;
+	struct led_pattern patterns[MAX_PATTERNS];
+	struct led_pattern *curr;
+	struct led_pattern *next;
+	struct mutex lock;
+	u32 npatterns;
+	int repeat;
+	int last_repeat;
+	int delta_t;
+	bool is_indefinite;
+	bool is_hw_pattern;
+	struct timer_list timer;
+};
+
+static void pattern_trig_update_patterns(struct pattern_trig_data *data)
+{
+	data->curr = data->next;
+	if (!data->is_indefinite && data->curr == data->patterns)
+		data->repeat--;
+
+	if (data->next == data->patterns + data->npatterns - 1)
+		data->next = data->patterns;
+	else
+		data->next++;
+
+	data->delta_t = 0;
+}
+
+static int pattern_trig_compute_brightness(struct pattern_trig_data *data)
+{
+	int step_brightness;
+
+	/*
+	 * If current tuple's duration is less than the dimming interval,
+	 * we should treat it as a step change of brightness instead of
+	 * doing gradual dimming.
+	 */
+	if (data->delta_t == 0 || data->curr->delta_t < UPDATE_INTERVAL)
+		return data->curr->brightness;
+
+	step_brightness = abs(data->next->brightness - data->curr->brightness);
+	step_brightness = data->delta_t * step_brightness / data->curr->delta_t;
+
+	if (data->next->brightness > data->curr->brightness)
+		return data->curr->brightness + step_brightness;
+	else
+		return data->curr->brightness - step_brightness;
+}
+
+static void pattern_trig_timer_function(struct timer_list *t)
+{
+	struct pattern_trig_data *data = from_timer(data, t, timer);
+
+	mutex_lock(&data->lock);
+
+	for (;;) {
+		if (!data->is_indefinite && !data->repeat)
+			break;
+
+		if (data->curr->brightness == data->next->brightness) {
+			/* Step change of brightness */
+			led_set_brightness(data->led_cdev,
+					   data->curr->brightness);
+			mod_timer(&data->timer,
+				  jiffies + msecs_to_jiffies(data->curr->delta_t));
+
+			/* Skip the tuple with zero duration */
+			pattern_trig_update_patterns(data);
+			/* Select next tuple */
+			pattern_trig_update_patterns(data);
+		} else {
+			/* Gradual dimming */
+
+			/*
+			 * If the accumulation time is larger than current
+			 * tuple's duration, we should go next one and re-check
+			 * if we repeated done.
+			 */
+			if (data->delta_t > data->curr->delta_t) {
+				pattern_trig_update_patterns(data);
+				continue;
+			}
+
+			led_set_brightness(data->led_cdev,
+					   pattern_trig_compute_brightness(data));
+			mod_timer(&data->timer,
+				  jiffies + msecs_to_jiffies(UPDATE_INTERVAL));
+
+			/* Accumulate the gradual dimming time */
+			data->delta_t += UPDATE_INTERVAL;
+		}
+
+		break;
+	}
+
+	mutex_unlock(&data->lock);
+}
+
+static int pattern_trig_start_pattern(struct led_classdev *led_cdev)
+{
+	struct pattern_trig_data *data = led_cdev->trigger_data;
+
+	if (!data->npatterns)
+		return 0;
+
+	if (data->is_hw_pattern) {
+		return led_cdev->pattern_set(led_cdev, data->patterns,
+					     data->npatterns, data->repeat);
+	}
+
+	/* At least 2 tuples for software pattern. */
+	if (data->npatterns < 2)
+		return -EINVAL;
+
+	data->delta_t = 0;
+	data->curr = data->patterns;
+	data->next = data->patterns + 1;
+	data->timer.expires = jiffies;
+	add_timer(&data->timer);
+
+	return 0;
+}
+
+static ssize_t repeat_show(struct device *dev, struct device_attribute *attr,
+			   char *buf)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct pattern_trig_data *data = led_cdev->trigger_data;
+	int repeat;
+
+	mutex_lock(&data->lock);
+
+	repeat = data->last_repeat;
+
+	mutex_unlock(&data->lock);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", repeat);
+}
+
+static ssize_t repeat_store(struct device *dev, struct device_attribute *attr,
+			    const char *buf, size_t count)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct pattern_trig_data *data = led_cdev->trigger_data;
+	int err, res;
+
+	err = kstrtos32(buf, 10, &res);
+	if (err)
+		return err;
+
+	/* Number 0 and negative numbers except -1 are invalid. */
+	if (res < -1 || res == 0)
+		return -EINVAL;
+
+	/*
+	 * Clear previous patterns' performence firstly, and remove the timer
+	 * without mutex lock to avoid dead lock.
+	 */
+	del_timer_sync(&data->timer);
+
+	mutex_lock(&data->lock);
+
+	if (data->is_hw_pattern)
+		led_cdev->pattern_clear(led_cdev);
+
+	data->last_repeat = data->repeat = res;
+	/* -1 means repeat indefinitely */
+	if (data->repeat == -1)
+		data->is_indefinite = true;
+	else
+		data->is_indefinite = false;
+
+	err = pattern_trig_start_pattern(led_cdev);
+
+	mutex_unlock(&data->lock);
+	return err < 0 ? err : count;
+}
+
+static DEVICE_ATTR_RW(repeat);
+
+static ssize_t pattern_trig_show_patterns(struct pattern_trig_data *data,
+					  char *buf, bool hw_pattern)
+{
+	ssize_t count = 0;
+	int i;
+
+	mutex_lock(&data->lock);
+
+	if (!data->npatterns || (data->is_hw_pattern ^ hw_pattern))
+		goto out;
+
+	for (i = 0; i < data->npatterns; i++) {
+		count += scnprintf(buf + count, PAGE_SIZE - count,
+				   "%d %u ",
+				   data->patterns[i].brightness,
+				   data->patterns[i].delta_t);
+	}
+
+	buf[count - 1] = '\n';
+
+out:
+	mutex_unlock(&data->lock);
+	return count;
+}
+
+static ssize_t pattern_trig_store_patterns(struct led_classdev *led_cdev,
+					   const char *buf, size_t count,
+					   bool hw_pattern)
+{
+	struct pattern_trig_data *data = led_cdev->trigger_data;
+	int ccount, cr, offset = 0, err = 0;
+
+	/*
+	 * Clear previous patterns' performence firstly, and remove the timer
+	 * without mutex lock to avoid dead lock.
+	 */
+	del_timer_sync(&data->timer);
+
+	mutex_lock(&data->lock);
+
+	if (data->is_hw_pattern)
+		led_cdev->pattern_clear(led_cdev);
+
+	data->is_hw_pattern = hw_pattern;
+	data->npatterns = 0;
+
+	while (offset < count - 1 && data->npatterns < MAX_PATTERNS) {
+		cr = 0;
+		ccount = sscanf(buf + offset, "%d %u %n",
+				&data->patterns[data->npatterns].brightness,
+				&data->patterns[data->npatterns].delta_t, &cr);
+		if (ccount != 2) {
+			data->npatterns = 0;
+			err = -EINVAL;
+			goto out;
+		}
+
+		offset += cr;
+		data->npatterns++;
+	}
+
+	err = pattern_trig_start_pattern(led_cdev);
+	if (err)
+		data->npatterns = 0;
+
+out:
+	mutex_unlock(&data->lock);
+	return err < 0 ? err : count;
+}
+
+static ssize_t pattern_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct pattern_trig_data *data = led_cdev->trigger_data;
+
+	return pattern_trig_show_patterns(data, buf, false);
+}
+
+static ssize_t pattern_store(struct device *dev, struct device_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+
+	return pattern_trig_store_patterns(led_cdev, buf, count, false);
+}
+
+static DEVICE_ATTR_RW(pattern);
+
+static ssize_t hw_pattern_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+	struct pattern_trig_data *data = led_cdev->trigger_data;
+
+	return pattern_trig_show_patterns(data, buf, true);
+}
+
+static ssize_t hw_pattern_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+
+	return pattern_trig_store_patterns(led_cdev, buf, count, true);
+}
+
+static DEVICE_ATTR_RW(hw_pattern);
+
+static umode_t pattern_trig_attrs_mode(struct kobject *kobj,
+				       struct attribute *attr, int index)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct led_classdev *led_cdev = dev_get_drvdata(dev);
+
+	if (attr == &dev_attr_repeat.attr || attr == &dev_attr_pattern.attr)
+		return attr->mode;
+	else if (attr == &dev_attr_hw_pattern.attr && led_cdev->pattern_set)
+		return attr->mode;
+
+	return 0;
+}
+
+static struct attribute *pattern_trig_attrs[] = {
+	&dev_attr_pattern.attr,
+	&dev_attr_hw_pattern.attr,
+	&dev_attr_repeat.attr,
+	NULL
+};
+
+static const struct attribute_group pattern_trig_group = {
+	.attrs = pattern_trig_attrs,
+	.is_visible = pattern_trig_attrs_mode,
+};
+
+static const struct attribute_group *pattern_trig_groups[] = {
+	&pattern_trig_group,
+	NULL,
+};
+
+static int pattern_trig_activate(struct led_classdev *led_cdev)
+{
+	struct pattern_trig_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	if (!!led_cdev->pattern_set ^ !!led_cdev->pattern_clear) {
+		dev_warn(led_cdev->dev,
+			 "Hardware pattern ops validation failed\n");
+		led_cdev->pattern_set = NULL;
+		led_cdev->pattern_clear = NULL;
+	}
+
+	data->is_indefinite = true;
+	data->last_repeat = -1;
+	mutex_init(&data->lock);
+	data->led_cdev = led_cdev;
+	led_set_trigger_data(led_cdev, data);
+	timer_setup(&data->timer, pattern_trig_timer_function, 0);
+	led_cdev->activated = true;
+
+	return 0;
+}
+
+static void pattern_trig_deactivate(struct led_classdev *led_cdev)
+{
+	struct pattern_trig_data *data = led_cdev->trigger_data;
+
+	if (!led_cdev->activated)
+		return;
+
+	if (led_cdev->pattern_clear)
+		led_cdev->pattern_clear(led_cdev);
+
+	del_timer_sync(&data->timer);
+
+	led_set_brightness(led_cdev, LED_OFF);
+	kfree(data);
+	led_cdev->activated = false;
+}
+
+static struct led_trigger pattern_led_trigger = {
+	.name = "pattern",
+	.activate = pattern_trig_activate,
+	.deactivate = pattern_trig_deactivate,
+	.groups = pattern_trig_groups,
+};
+
+static int __init pattern_trig_init(void)
+{
+	return led_trigger_register(&pattern_led_trigger);
+}
+
+static void __exit pattern_trig_exit(void)
+{
+	led_trigger_unregister(&pattern_led_trigger);
+}
+
+module_init(pattern_trig_init);
+module_exit(pattern_trig_exit);
+
+MODULE_AUTHOR("Raphael Teysseyre <rteysseyre@gmail.com");
+MODULE_AUTHOR("Baolin Wang <baolin.wang@linaro.org");
+MODULE_DESCRIPTION("LED Pattern trigger");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 834683d603f9..7393a316d9fa 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -22,6 +22,7 @@
 #include <linux/workqueue.h>
 
 struct device;
+struct led_pattern;
 /*
  * LED Core
  */
@@ -88,6 +89,10 @@ struct led_classdev {
 				     unsigned long *delay_on,
 				     unsigned long *delay_off);
 
+	int (*pattern_set)(struct led_classdev *led_cdev,
+			   struct led_pattern *pattern, u32 len, int repeat);
+	int (*pattern_clear)(struct led_classdev *led_cdev);
+
 	struct device		*dev;
 	const struct attribute_group	**groups;
 
@@ -472,4 +477,14 @@ static inline void led_classdev_notify_brightness_hw_changed(
 	struct led_classdev *led_cdev, enum led_brightness brightness) { }
 #endif
 
+/**
+ * struct led_pattern - pattern interval settings
+ * @delta_t: pattern interval delay, in milliseconds
+ * @brightness: pattern interval brightness
+ */
+struct led_pattern {
+	u32 delta_t;
+	int brightness;
+};
+
 #endif		/* __LINUX_LEDS_H_INCLUDED */
-- 
cgit v1.2.3


From ca2b497253ad01c80061a1f3ee9eb91b5d54a849 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 5 Oct 2018 13:24:36 +0100
Subject: arm64: perf: Reject stand-alone CHAIN events for PMUv3

It doesn't make sense for a perf event to be configured as a CHAIN event
in isolation, so extend the arm_pmu structure with a ->filter_match()
function to allow the backend PMU implementation to reject CHAIN events
early.

Cc: <stable@vger.kernel.org>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/kernel/perf_event.c | 7 +++++++
 drivers/perf/arm_pmu.c         | 8 +++++++-
 include/linux/perf/arm_pmu.h   | 1 +
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 8e38d5267f22..e213f8e867f6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -966,6 +966,12 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
 	return 0;
 }
 
+static int armv8pmu_filter_match(struct perf_event *event)
+{
+	unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
+	return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
+}
+
 static void armv8pmu_reset(void *info)
 {
 	struct arm_pmu *cpu_pmu = (struct arm_pmu *)info;
@@ -1114,6 +1120,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu)
 	cpu_pmu->stop			= armv8pmu_stop,
 	cpu_pmu->reset			= armv8pmu_reset,
 	cpu_pmu->set_event_filter	= armv8pmu_set_event_filter;
+	cpu_pmu->filter_match		= armv8pmu_filter_match;
 
 	return 0;
 }
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 7f01f6f60b87..d0b7dd8fb184 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -485,7 +485,13 @@ static int armpmu_filter_match(struct perf_event *event)
 {
 	struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
 	unsigned int cpu = smp_processor_id();
-	return cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+	int ret;
+
+	ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus);
+	if (ret && armpmu->filter_match)
+		return armpmu->filter_match(event);
+
+	return ret;
 }
 
 static ssize_t armpmu_cpumask_show(struct device *dev,
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 10f92e1d8e7b..bf309ff6f244 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -99,6 +99,7 @@ struct arm_pmu {
 	void		(*stop)(struct arm_pmu *);
 	void		(*reset)(void *);
 	int		(*map_event)(struct perf_event *event);
+	int		(*filter_match)(struct perf_event *event);
 	int		num_events;
 	bool		secure_access; /* 32-bit ARM only */
 #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40
-- 
cgit v1.2.3


From 9c2120090586d7e509faa885988d4eb2441fabc2 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Wed, 10 Oct 2018 19:09:27 +0200
Subject: PCI: Provide pci_match_id() with CONFIG_PCI=n

This spares drivers from #ifdef-ing on CONFIG_PCI if the driver can be
optionally built on machines without PCI bus.

Consistent with acpi_driver_match_device() and similar.

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/pci.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index e72ca8dd6241..d2f14eb23ea4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1702,6 +1702,10 @@ static inline int pci_irqd_intx_xlate(struct irq_domain *d,
 				      unsigned long *out_hwirq,
 				      unsigned int *out_type)
 { return -EINVAL; }
+
+static inline const struct pci_device_id *pci_match_id(const struct pci_device_id *ids,
+							 struct pci_dev *dev)
+{ return NULL; }
 #endif /* CONFIG_PCI */
 
 /* Include architecture-dependent settings and functions */
-- 
cgit v1.2.3


From b0ce7b29bfcd090ddba476f45a75ec0a797b048a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 12 Oct 2018 14:54:12 +0200
Subject: regulator/gpio: Allow nonexclusive GPIO access

This allows nonexclusive (simultaneous) access to a single
GPIO line for the fixed regulator enable line. This happens
when several regulators use the same GPIO for enabling and
disabling a regulator, and all need a handle on their GPIO
descriptor.

This solution with a special flag is not entirely elegant
and should ideally be replaced by something more careful as
this makes it possible for several consumers to
enable/disable the same GPIO line to the left and right
without any consistency. The current use inside the regulator
core should however be fine as it takes special care to
handle this.

For the state of the GPIO backend, this is still the
lesser evil compared to going back to global GPIO
numbers.

Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Jon Hunter <jonathanh@nvidia.com>
Fixes: efdfeb079cc3 ("regulator: fixed: Convert to use GPIO descriptor only")
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/gpio/gpiolib.c        | 19 +++++++++++++++++--
 drivers/regulator/fixed.c     | 13 +++++++++++++
 include/linux/gpio/consumer.h |  1 +
 3 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e8f8a1999393..8228306aba46 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3908,8 +3908,23 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 	 * the device name as label
 	 */
 	status = gpiod_request(desc, con_id ? con_id : devname);
-	if (status < 0)
-		return ERR_PTR(status);
+	if (status < 0) {
+		if (status == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE) {
+			/*
+			 * This happens when there are several consumers for
+			 * the same GPIO line: we just return here without
+			 * further initialization. It is a bit if a hack.
+			 * This is necessary to support fixed regulators.
+			 *
+			 * FIXME: Make this more sane and safe.
+			 */
+			dev_info(dev, "nonexclusive access to GPIO for %s\n",
+				 con_id ? con_id : devname);
+			return desc;
+		} else {
+			return ERR_PTR(status);
+		}
+	}
 
 	status = gpiod_configure_flags(desc, con_id, lookupflags, flags);
 	if (status < 0) {
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 7d639ad953b6..ccc29038f19a 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -170,6 +170,19 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 			gflags = GPIOD_OUT_LOW_OPEN_DRAIN;
 	}
 
+	/*
+	 * Some fixed regulators share the enable line between two
+	 * regulators which makes it necessary to get a handle on the
+	 * same descriptor for two different consumers. This will get
+	 * the GPIO descriptor, but only the first call will initialize
+	 * it so any flags such as inversion or open drain will only
+	 * be set up by the first caller and assumed identical on the
+	 * next caller.
+	 *
+	 * FIXME: find a better way to deal with this.
+	 */
+	gflags |= GPIOD_FLAGS_BIT_NONEXCLUSIVE;
+
 	cfg.ena_gpiod = devm_gpiod_get_optional(&pdev->dev, NULL, gflags);
 	if (IS_ERR(cfg.ena_gpiod))
 		return PTR_ERR(cfg.ena_gpiod);
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 21ddbe440030..33695a1d8b35 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -30,6 +30,7 @@ struct gpio_descs {
 #define GPIOD_FLAGS_BIT_DIR_OUT		BIT(1)
 #define GPIOD_FLAGS_BIT_DIR_VAL		BIT(2)
 #define GPIOD_FLAGS_BIT_OPEN_DRAIN	BIT(3)
+#define GPIOD_FLAGS_BIT_NONEXCLUSIVE	BIT(4)
 
 /**
  * Optional flags that can be passed to one of gpiod_* to configure direction
-- 
cgit v1.2.3


From 81732b26e05994552f347746eb11762e986079a0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 11 Sep 2018 23:28:07 -0400
Subject: usb-serial: begin switching to ->[sg]et_serial()

add such methods for usb_serial_driver, provide tty_operations
->[sg]et_serial() calling those.  For now the lack of methods
in driver means ENOIOCTLCMD from usb-serial ->[sg]et_serial(),
making tty_ioctl() fall back to calling ->ioctl().  Once all
drivers are converted, we'll be returning -ENOTTY instead,
completing the switchover.

Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/serial/usb-serial.c | 20 ++++++++++++++++++++
 include/linux/usb/serial.h      |  2 ++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index f7aaa7f079e1..0f96d82fc575 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -396,6 +396,24 @@ static void serial_unthrottle(struct tty_struct *tty)
 		port->serial->type->unthrottle(tty);
 }
 
+static int serial_get_serial(struct tty_struct *tty, struct serial_struct *ss)
+{
+	struct usb_serial_port *port = tty->driver_data;
+
+	if (port->serial->type->get_serial)
+		return port->serial->type->get_serial(tty, ss);
+	return -ENOIOCTLCMD;
+}
+
+static int serial_set_serial(struct tty_struct *tty, struct serial_struct *ss)
+{
+	struct usb_serial_port *port = tty->driver_data;
+
+	if (port->serial->type->set_serial)
+		return port->serial->type->set_serial(tty, ss);
+	return -ENOIOCTLCMD;
+}
+
 static int serial_ioctl(struct tty_struct *tty,
 					unsigned int cmd, unsigned long arg)
 {
@@ -1177,6 +1195,8 @@ static const struct tty_operations serial_ops = {
 	.tiocmget =		serial_tiocmget,
 	.tiocmset =		serial_tiocmset,
 	.get_icount =		serial_get_icount,
+	.set_serial =		serial_set_serial,
+	.get_serial =		serial_get_serial,
 	.cleanup =		serial_cleanup,
 	.install =		serial_install,
 	.proc_show =		serial_proc_show,
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 106551a5616e..1c19f77ed541 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -285,6 +285,8 @@ struct usb_serial_driver {
 	int  (*write_room)(struct tty_struct *tty);
 	int  (*ioctl)(struct tty_struct *tty,
 		      unsigned int cmd, unsigned long arg);
+	int  (*get_serial)(struct tty_struct *tty, struct serial_struct *ss);
+	int  (*set_serial)(struct tty_struct *tty, struct serial_struct *ss);
 	void (*set_termios)(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
 	void (*break_ctl)(struct tty_struct *tty, int break_state);
-- 
cgit v1.2.3


From f0193d3ea73b966b5dbfa272c8228d743b8856ef Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 13 Sep 2018 22:12:15 -0400
Subject: change semantics of ldisc ->compat_ioctl()

First of all, make it return int.  Returning long when native method
had never allowed that is ridiculous and inconvenient.

More importantly, change the caller; if ldisc ->compat_ioctl() is NULL
or returns -ENOIOCTLCMD, tty_compat_ioctl() will try to feed cmd and
compat_ptr(arg) to ldisc's native ->ioctl().

That simplifies ->compat_ioctl() instances quite a bit - they only
need to deal with ioctls that are neither generic tty ones (those
would get shunted off to tty_ioctl()) nor simple compat pointer ones.

Note that something like TCFLSH won't reach ->compat_ioctl(),
even if ldisc ->ioctl() does handle it - it will be recognized
earlier and passed to tty_ioctl() (and ultimately - ldisc ->ioctl()).

For many ldiscs it means that NULL ->compat_ioctl() does the
right thing.  Those where it won't serve (see e.g. n_r3964.c) are
also easily dealt with - we need to handle the numeric-argument
ioctls (calling the native instance) and, if such would exist,
the ioctls that need layout conversion, etc.

All in-tree ldiscs dealt with.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/bluetooth/hci_ldisc.c |  1 +
 drivers/input/serio/serport.c |  2 +-
 drivers/net/hamradio/6pack.c  | 21 ---------------------
 drivers/net/hamradio/mkiss.c  | 21 ---------------------
 drivers/net/slip/slip.c       | 25 -------------------------
 drivers/net/wan/x25_asy.c     | 19 -------------------
 drivers/tty/n_gsm.c           | 11 -----------
 drivers/tty/n_r3964.c         | 22 ++++++++++++++++++++++
 drivers/tty/tty_io.c          |  3 +++
 include/linux/tty_ldisc.h     | 10 ++++++++--
 net/nfc/nci/uart.c            |  1 +
 11 files changed, 36 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index 963bb0309e25..ae0dd57a8e99 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -821,6 +821,7 @@ static int __init hci_uart_init(void)
 	hci_uart_ldisc.read		= hci_uart_tty_read;
 	hci_uart_ldisc.write		= hci_uart_tty_write;
 	hci_uart_ldisc.ioctl		= hci_uart_tty_ioctl;
+	hci_uart_ldisc.compat_ioctl	= hci_uart_tty_ioctl;
 	hci_uart_ldisc.poll		= hci_uart_tty_poll;
 	hci_uart_ldisc.receive_buf	= hci_uart_tty_receive;
 	hci_uart_ldisc.write_wakeup	= hci_uart_tty_wakeup;
diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c
index f8ead9f9c77e..5977b8a34ebe 100644
--- a/drivers/input/serio/serport.c
+++ b/drivers/input/serio/serport.c
@@ -226,7 +226,7 @@ static int serport_ldisc_ioctl(struct tty_struct *tty, struct file *file,
 
 #ifdef CONFIG_COMPAT
 #define COMPAT_SPIOCSTYPE	_IOW('q', 0x01, compat_ulong_t)
-static long serport_ldisc_compat_ioctl(struct tty_struct *tty,
+static int serport_ldisc_compat_ioctl(struct tty_struct *tty,
 				       struct file *file,
 				       unsigned int cmd, unsigned long arg)
 {
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index d79a69dd2146..17e6dcd2eb42 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -34,7 +34,6 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <linux/semaphore.h>
-#include <linux/compat.h>
 #include <linux/refcount.h>
 
 #define SIXPACK_VERSION    "Revision: 0.3.0"
@@ -752,23 +751,6 @@ static int sixpack_ioctl(struct tty_struct *tty, struct file *file,
 	return err;
 }
 
-#ifdef CONFIG_COMPAT
-static long sixpack_compat_ioctl(struct tty_struct * tty, struct file * file,
-				unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case SIOCGIFNAME:
-	case SIOCGIFENCAP:
-	case SIOCSIFENCAP:
-	case SIOCSIFHWADDR:
-		return sixpack_ioctl(tty, file, cmd,
-				(unsigned long)compat_ptr(arg));
-	}
-
-	return -ENOIOCTLCMD;
-}
-#endif
-
 static struct tty_ldisc_ops sp_ldisc = {
 	.owner		= THIS_MODULE,
 	.magic		= TTY_LDISC_MAGIC,
@@ -776,9 +758,6 @@ static struct tty_ldisc_ops sp_ldisc = {
 	.open		= sixpack_open,
 	.close		= sixpack_close,
 	.ioctl		= sixpack_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= sixpack_compat_ioctl,
-#endif
 	.receive_buf	= sixpack_receive_buf,
 	.write_wakeup	= sixpack_write_wakeup,
 };
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 13e4c1eff353..802233d41b25 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -35,7 +35,6 @@
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
 #include <linux/jiffies.h>
-#include <linux/compat.h>
 
 #include <net/ax25.h>
 
@@ -875,23 +874,6 @@ static int mkiss_ioctl(struct tty_struct *tty, struct file *file,
 	return err;
 }
 
-#ifdef CONFIG_COMPAT
-static long mkiss_compat_ioctl(struct tty_struct *tty, struct file *file,
-	unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case SIOCGIFNAME:
-	case SIOCGIFENCAP:
-	case SIOCSIFENCAP:
-	case SIOCSIFHWADDR:
-		return mkiss_ioctl(tty, file, cmd,
-				   (unsigned long)compat_ptr(arg));
-	}
-
-	return -ENOIOCTLCMD;
-}
-#endif
-
 /*
  * Handle the 'receiver data ready' interrupt.
  * This function is called by the 'tty_io' module in the kernel when
@@ -966,9 +948,6 @@ static struct tty_ldisc_ops ax_ldisc = {
 	.open		= mkiss_open,
 	.close		= mkiss_close,
 	.ioctl		= mkiss_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= mkiss_compat_ioctl,
-#endif
 	.receive_buf	= mkiss_receive_buf,
 	.write_wakeup	= mkiss_write_wakeup
 };
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index b008266e91ea..9757f1fc104f 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -79,7 +79,6 @@
 #include <linux/rtnetlink.h>
 #include <linux/if_arp.h>
 #include <linux/if_slip.h>
-#include <linux/compat.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -1167,27 +1166,6 @@ static int slip_ioctl(struct tty_struct *tty, struct file *file,
 	}
 }
 
-#ifdef CONFIG_COMPAT
-static long slip_compat_ioctl(struct tty_struct *tty, struct file *file,
-					unsigned int cmd, unsigned long arg)
-{
-	switch (cmd) {
-	case SIOCGIFNAME:
-	case SIOCGIFENCAP:
-	case SIOCSIFENCAP:
-	case SIOCSIFHWADDR:
-	case SIOCSKEEPALIVE:
-	case SIOCGKEEPALIVE:
-	case SIOCSOUTFILL:
-	case SIOCGOUTFILL:
-		return slip_ioctl(tty, file, cmd,
-				  (unsigned long)compat_ptr(arg));
-	}
-
-	return -ENOIOCTLCMD;
-}
-#endif
-
 /* VSV changes start here */
 #ifdef CONFIG_SLIP_SMART
 /* function do_ioctl called from net/core/dev.c
@@ -1280,9 +1258,6 @@ static struct tty_ldisc_ops sl_ldisc = {
 	.close	 	= slip_close,
 	.hangup	 	= slip_hangup,
 	.ioctl		= slip_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= slip_compat_ioctl,
-#endif
 	.receive_buf	= slip_receive_buf,
 	.write_wakeup	= slip_write_wakeup,
 };
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 74c06a5f586f..1098263ab862 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -33,7 +33,6 @@
 #include <linux/lapb.h>
 #include <linux/init.h>
 #include <linux/rtnetlink.h>
-#include <linux/compat.h>
 #include <linux/slab.h>
 #include <net/x25device.h>
 #include "x25_asy.h"
@@ -703,21 +702,6 @@ static int x25_asy_ioctl(struct tty_struct *tty, struct file *file,
 	}
 }
 
-#ifdef CONFIG_COMPAT
-static long x25_asy_compat_ioctl(struct tty_struct *tty, struct file *file,
-			 unsigned int cmd,  unsigned long arg)
-{
-	switch (cmd) {
-	case SIOCGIFNAME:
-	case SIOCSIFHWADDR:
-		return x25_asy_ioctl(tty, file, cmd,
-				     (unsigned long)compat_ptr(arg));
-	}
-
-	return -ENOIOCTLCMD;
-}
-#endif
-
 static int x25_asy_open_dev(struct net_device *dev)
 {
 	struct x25_asy *sl = netdev_priv(dev);
@@ -769,9 +753,6 @@ static struct tty_ldisc_ops x25_ldisc = {
 	.open		= x25_asy_open_tty,
 	.close		= x25_asy_close_tty,
 	.ioctl		= x25_asy_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= x25_asy_compat_ioctl,
-#endif
 	.receive_buf	= x25_asy_receive_buf,
 	.write_wakeup	= x25_asy_write_wakeup,
 };
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 86b7e20ffd7f..6f7da9a9d76f 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2614,14 +2614,6 @@ static int gsmld_ioctl(struct tty_struct *tty, struct file *file,
 	}
 }
 
-#ifdef CONFIG_COMPAT
-static long gsmld_compat_ioctl(struct tty_struct *tty, struct file *file,
-		       unsigned int cmd, unsigned long arg)
-{
-	return gsmld_ioctl(tty, file, cmd, arg);
-}
-#endif
-
 /*
  *	Network interface
  *
@@ -2833,9 +2825,6 @@ static struct tty_ldisc_ops tty_ldisc_packet = {
 	.flush_buffer    = gsmld_flush_buffer,
 	.read            = gsmld_read,
 	.write           = gsmld_write,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl    = gsmld_compat_ioctl,
-#endif
 	.ioctl           = gsmld_ioctl,
 	.poll            = gsmld_poll,
 	.receive_buf     = gsmld_receive_buf,
diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index dbf1ab36758e..749a608c40b0 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -134,6 +134,10 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 		const unsigned char *buf, size_t nr);
 static int r3964_ioctl(struct tty_struct *tty, struct file *file,
 		unsigned int cmd, unsigned long arg);
+#ifdef CONFIG_COMPAT
+static int r3964_compat_ioctl(struct tty_struct *tty, struct file *file,
+		unsigned int cmd, unsigned long arg);
+#endif
 static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old);
 static __poll_t r3964_poll(struct tty_struct *tty, struct file *file,
 		struct poll_table_struct *wait);
@@ -149,6 +153,9 @@ static struct tty_ldisc_ops tty_ldisc_N_R3964 = {
 	.read = r3964_read,
 	.write = r3964_write,
 	.ioctl = r3964_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = r3964_compat_ioctl,
+#endif
 	.set_termios = r3964_set_termios,
 	.poll = r3964_poll,
 	.receive_buf = r3964_receive_buf,
@@ -1210,6 +1217,21 @@ static int r3964_ioctl(struct tty_struct *tty, struct file *file,
 	}
 }
 
+#ifdef CONFIG_COMPAT
+static int r3964_compat_ioctl(struct tty_struct *tty, struct file *file,
+		unsigned int cmd, unsigned long arg)
+{
+	switch (cmd) {
+	case R3964_ENABLE_SIGNALS:
+	case R3964_SETPRIORITY:
+	case R3964_USE_BCC:
+		return r3964_ioctl(tty, file, cmd, arg);
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+#endif
+
 static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old)
 {
 	TRACE_L("set_termios");
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 1a55c3e20299..cd8df45f09d9 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2824,6 +2824,9 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
 		return hung_up_tty_compat_ioctl(file, cmd, arg);
 	if (ld->ops->compat_ioctl)
 		retval = ld->ops->compat_ioctl(tty, file, cmd, arg);
+	if (retval == -ENOIOCTLCMD && ld->ops->ioctl)
+		retval = ld->ops->ioctl(tty, file,
+				(unsigned long)compat_ptr(cmd), arg);
 	tty_ldisc_deref(ld);
 
 	return retval;
diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h
index 840894ca3fc0..b1e6043e9917 100644
--- a/include/linux/tty_ldisc.h
+++ b/include/linux/tty_ldisc.h
@@ -54,11 +54,17 @@
  *	low-level driver can "grab" an ioctl request before the line
  *	discpline has a chance to see it.
  *
- * long	(*compat_ioctl)(struct tty_struct * tty, struct file * file,
+ * int	(*compat_ioctl)(struct tty_struct * tty, struct file * file,
  *		        unsigned int cmd, unsigned long arg);
  *
  *	Process ioctl calls from 32-bit process on 64-bit system
  *
+ *	NOTE: only ioctls that are neither "pointer to compatible
+ *	structure" nor tty-generic.  Something private that takes
+ *	an integer or a pointer to wordsize-sensitive structure
+ *	belongs here, but most of ldiscs will happily leave
+ *	it NULL.
+ *
  * void	(*set_termios)(struct tty_struct *tty, struct ktermios * old);
  *
  *	This function notifies the line discpline that a change has
@@ -184,7 +190,7 @@ struct tty_ldisc_ops {
 			 const unsigned char *buf, size_t nr);
 	int	(*ioctl)(struct tty_struct *tty, struct file *file,
 			 unsigned int cmd, unsigned long arg);
-	long	(*compat_ioctl)(struct tty_struct *tty, struct file *file,
+	int	(*compat_ioctl)(struct tty_struct *tty, struct file *file,
 				unsigned int cmd, unsigned long arg);
 	void	(*set_termios)(struct tty_struct *tty, struct ktermios *old);
 	__poll_t (*poll)(struct tty_struct *, struct file *,
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index a66f102c6c01..d1fa0f22c10c 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -465,6 +465,7 @@ static struct tty_ldisc_ops nci_uart_ldisc = {
 	.receive_buf	= nci_uart_tty_receive,
 	.write_wakeup	= nci_uart_tty_wakeup,
 	.ioctl		= nci_uart_tty_ioctl,
+	.compat_ioctl	= nci_uart_tty_ioctl,
 };
 
 static int __init nci_uart_init(void)
-- 
cgit v1.2.3


From cc53aabcc283c36274d3f3ce9adc4b40c21d4838 Mon Sep 17 00:00:00 2001
From: Govind Singh <govinds@codeaurora.org>
Date: Thu, 11 Oct 2018 13:16:01 +0300
Subject: firmware: qcom: scm: Add WLAN VMID for Qualcomm SCM interface

Add WLAN related VMID's to support wlan driver to set up
the remote's permissions call via TrustZone.

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Niklas Cassel <niklas.cassel@linaro.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/qcom_scm.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h
index 5d65521260b3..06996ad4f2bc 100644
--- a/include/linux/qcom_scm.h
+++ b/include/linux/qcom_scm.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2015, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2015, 2018, The Linux Foundation. All rights reserved.
  * Copyright (C) 2015 Linaro Ltd.
  *
  * This program is free software; you can redistribute it and/or modify
@@ -33,6 +33,8 @@ struct qcom_scm_vmperm {
 
 #define QCOM_SCM_VMID_HLOS       0x3
 #define QCOM_SCM_VMID_MSS_MSA    0xF
+#define QCOM_SCM_VMID_WLAN       0x18
+#define QCOM_SCM_VMID_WLAN_CE    0x19
 #define QCOM_SCM_PERM_READ       0x4
 #define QCOM_SCM_PERM_WRITE      0x2
 #define QCOM_SCM_PERM_EXEC       0x1
-- 
cgit v1.2.3


From c6aed238b7a9b15a5c90a0c31f1d36577b5d2cbe Mon Sep 17 00:00:00 2001
From: Loic Pallardy <loic.pallardy@st.com>
Date: Fri, 27 Jul 2018 15:14:47 +0200
Subject: remoteproc: modify vring allocation to rely on centralized carveout
 allocator

Current version of rproc_alloc_vring function supports only dynamic vring
allocation.

This patch allows to allocate vrings based on memory region declatation.
Vrings are now manage like memory carveouts, to communize memory management
code in rproc_alloc_registered_carveouts().

Allocated buffer is retrieved in rp_find_vq() thanks to
rproc_find_carveout_by_name() functions for.

This patch sets vrings names to vdev"x"vring"y" with x vdev index in
resource table and y vring index in vdev. This will be updated when
name will be associated to vdev in firmware resource table.

Signed-off-by: Loic Pallardy <loic.pallardy@st.com>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c     | 61 +++++++++++++++++---------------
 drivers/remoteproc/remoteproc_internal.h |  2 ++
 drivers/remoteproc/remoteproc_virtio.c   | 14 +++++++-
 include/linux/remoteproc.h               |  6 ++--
 4 files changed, 51 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 5abcd27a29f3..f77a42f6a8aa 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -53,6 +53,11 @@ typedef int (*rproc_handle_resources_t)(struct rproc *rproc,
 typedef int (*rproc_handle_resource_t)(struct rproc *rproc,
 				 void *, int offset, int avail);
 
+static int rproc_alloc_carveout(struct rproc *rproc,
+				struct rproc_mem_entry *mem);
+static int rproc_release_carveout(struct rproc *rproc,
+				  struct rproc_mem_entry *mem);
+
 /* Unique indices for remoteproc devices */
 static DEFINE_IDA(rproc_dev_index);
 
@@ -312,21 +317,33 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 	struct device *dev = &rproc->dev;
 	struct rproc_vring *rvring = &rvdev->vring[i];
 	struct fw_rsc_vdev *rsc;
-	dma_addr_t dma;
-	void *va;
 	int ret, size, notifyid;
+	struct rproc_mem_entry *mem;
 
 	/* actual size of vring (in bytes) */
 	size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
 
-	/*
-	 * Allocate non-cacheable memory for the vring. In the future
-	 * this call will also configure the IOMMU for us
-	 */
-	va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
-	if (!va) {
-		dev_err(dev->parent, "dma_alloc_coherent failed\n");
-		return -EINVAL;
+	rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
+
+	/* Search for pre-registered carveout */
+	mem = rproc_find_carveout_by_name(rproc, "vdev%dvring%d", rvdev->index,
+					  i);
+	if (mem) {
+		if (rproc_check_carveout_da(rproc, mem, rsc->vring[i].da, size))
+			return -ENOMEM;
+	} else {
+		/* Register carveout in in list */
+		mem = rproc_mem_entry_init(dev, 0, 0, size, rsc->vring[i].da,
+					   rproc_alloc_carveout,
+					   rproc_release_carveout,
+					   "vdev%dvring%d",
+					   rvdev->index, i);
+		if (!mem) {
+			dev_err(dev, "Can't allocate memory entry structure\n");
+			return -ENOMEM;
+		}
+
+		rproc_add_carveout(rproc, mem);
 	}
 
 	/*
@@ -337,7 +354,6 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 	ret = idr_alloc(&rproc->notifyids, rvring, 0, 0, GFP_KERNEL);
 	if (ret < 0) {
 		dev_err(dev, "idr_alloc failed: %d\n", ret);
-		dma_free_coherent(dev->parent, size, va, dma);
 		return ret;
 	}
 	notifyid = ret;
@@ -346,21 +362,9 @@ int rproc_alloc_vring(struct rproc_vdev *rvdev, int i)
 	if (notifyid > rproc->max_notifyid)
 		rproc->max_notifyid = notifyid;
 
-	dev_dbg(dev, "vring%d: va %pK dma %pad size 0x%x idr %d\n",
-		i, va, &dma, size, notifyid);
-
-	rvring->va = va;
-	rvring->dma = dma;
 	rvring->notifyid = notifyid;
 
-	/*
-	 * Let the rproc know the notifyid and da of this vring.
-	 * Not all platforms use dma_alloc_coherent to automatically
-	 * set up the iommu. In this case the device address (da) will
-	 * hold the physical address and not the device address.
-	 */
-	rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
-	rsc->vring[i].da = dma;
+	/* Let the rproc know the notifyid of this vring.*/
 	rsc->vring[i].notifyid = notifyid;
 	return 0;
 }
@@ -392,12 +396,10 @@ rproc_parse_vring(struct rproc_vdev *rvdev, struct fw_rsc_vdev *rsc, int i)
 
 void rproc_free_vring(struct rproc_vring *rvring)
 {
-	int size = PAGE_ALIGN(vring_size(rvring->len, rvring->align));
 	struct rproc *rproc = rvring->rvdev->rproc;
 	int idx = rvring->rvdev->vring - rvring;
 	struct fw_rsc_vdev *rsc;
 
-	dma_free_coherent(rproc->dev.parent, size, rvring->va, rvring->dma);
 	idr_remove(&rproc->notifyids, rvring->notifyid);
 
 	/* reset resource entry info */
@@ -484,6 +486,7 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
 
 	rvdev->id = rsc->id;
 	rvdev->rproc = rproc;
+	rvdev->index = rproc->nb_vdev++;
 
 	/* parse the vrings */
 	for (i = 0; i < rsc->num_of_vrings; i++) {
@@ -528,9 +531,6 @@ void rproc_vdev_release(struct kref *ref)
 
 	for (id = 0; id < ARRAY_SIZE(rvdev->vring); id++) {
 		rvring = &rvdev->vring[id];
-		if (!rvring->va)
-			continue;
-
 		rproc_free_vring(rvring);
 	}
 
@@ -1323,6 +1323,9 @@ static int rproc_fw_boot(struct rproc *rproc, const struct firmware *fw)
 	/* reset max_notifyid */
 	rproc->max_notifyid = -1;
 
+	/* reset handled vdev */
+	rproc->nb_vdev = 0;
+
 	/* handle fw resources which are required to boot rproc */
 	ret = rproc_handle_resources(rproc, rproc_loading_handlers);
 	if (ret) {
diff --git a/drivers/remoteproc/remoteproc_internal.h b/drivers/remoteproc/remoteproc_internal.h
index 7570beb035b5..f6cad243d7ca 100644
--- a/drivers/remoteproc/remoteproc_internal.h
+++ b/drivers/remoteproc/remoteproc_internal.h
@@ -60,6 +60,8 @@ int rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw);
 int rproc_elf_load_rsc_table(struct rproc *rproc, const struct firmware *fw);
 struct resource_table *rproc_elf_find_loaded_rsc_table(struct rproc *rproc,
 						       const struct firmware *fw);
+struct rproc_mem_entry *
+rproc_find_carveout_by_name(struct rproc *rproc, const char *name, ...);
 
 static inline
 int rproc_fw_sanity_check(struct rproc *rproc, const struct firmware *fw)
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index bbecd44df7e8..de21f620b882 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -76,7 +76,9 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct rproc *rproc = vdev_to_rproc(vdev);
 	struct device *dev = &rproc->dev;
+	struct rproc_mem_entry *mem;
 	struct rproc_vring *rvring;
+	struct fw_rsc_vdev *rsc;
 	struct virtqueue *vq;
 	void *addr;
 	int len, size;
@@ -88,8 +90,14 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 	if (!name)
 		return NULL;
 
+	/* Search allocated memory region by name */
+	mem = rproc_find_carveout_by_name(rproc, "vdev%dvring%d", rvdev->index,
+					  id);
+	if (!mem || !mem->va)
+		return ERR_PTR(-ENOMEM);
+
 	rvring = &rvdev->vring[id];
-	addr = rvring->va;
+	addr = mem->va;
 	len = rvring->len;
 
 	/* zero vring */
@@ -114,6 +122,10 @@ static struct virtqueue *rp_find_vq(struct virtio_device *vdev,
 	rvring->vq = vq;
 	vq->priv = rvring;
 
+	/* Update vring in resource table */
+	rsc = (void *)rproc->table_ptr + rvdev->rsc_offset;
+	rsc->vring[id].da = mem->da;
+
 	return vq;
 }
 
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index d4cabe8da507..8bb0cf0416f1 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -454,6 +454,7 @@ struct rproc_dump_segment {
  * @has_iommu: flag to indicate if remote processor is behind an MMU
  * @auto_boot: flag to indicate if remote processor should be auto-started
  * @dump_segments: list of segments in the firmware
+ * @nb_vdev: number of vdev currently handled by rproc
  */
 struct rproc {
 	struct list_head node;
@@ -486,6 +487,7 @@ struct rproc {
 	bool has_iommu;
 	bool auto_boot;
 	struct list_head dump_segments;
+	int nb_vdev;
 };
 
 /**
@@ -513,7 +515,6 @@ struct rproc_subdev {
 /**
  * struct rproc_vring - remoteproc vring state
  * @va:	virtual address
- * @dma: dma address
  * @len: length, in bytes
  * @da: device address
  * @align: vring alignment
@@ -523,7 +524,6 @@ struct rproc_subdev {
  */
 struct rproc_vring {
 	void *va;
-	dma_addr_t dma;
 	int len;
 	u32 da;
 	u32 align;
@@ -542,6 +542,7 @@ struct rproc_vring {
  * @vdev: the virio device
  * @vring: the vrings for this vdev
  * @rsc_offset: offset of the vdev's resource entry
+ * @index: vdev position versus other vdev declared in resource table
  */
 struct rproc_vdev {
 	struct kref refcount;
@@ -554,6 +555,7 @@ struct rproc_vdev {
 	struct virtio_device vdev;
 	struct rproc_vring vring[RVDEV_NUM_VRINGS];
 	u32 rsc_offset;
+	u32 index;
 };
 
 struct rproc *rproc_get_by_phandle(phandle phandle);
-- 
cgit v1.2.3


From 722fb61e2ed39473297157839ec7230b77fd6940 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Wed, 10 Oct 2018 12:51:32 +0900
Subject: mmc: tmio: remove TMIO_MMC_HAVE_HIGH_REG flag

TMIO_MMC_HAVE_HIGH_REG is confusing due to its counter-intuitive name.

All the TMIO MMC variants (TMIO MMC, Renesas SDHI, UniPhier SD) actually
have high registers. It is just that each of them implements its own
registers there. The original IP from Panasonic only defines registers
0x00-0xff in the bus_shift=1 review. The register area above them is
platform-dependent.

In fact, TMIO_MMC_HAVE_HIGH_REG is set only by tmio-mmc.c and used to
test the accessibility of CTL_SDIO_REGS. Because it is specific to
the TMIO MFD variant, the right thing to do is to move such registers
to tmio_mmc.c and delete the TMIO_MMC_HAVE_HIGH_REG flag.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Tested-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc.c | 7 +++++--
 drivers/mmc/host/tmio_mmc.h | 3 ---
 include/linux/mfd/tmio.h    | 7 -------
 3 files changed, 5 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 651e325238e6..93e83ad25976 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -21,6 +21,11 @@
 
 #include "tmio_mmc.h"
 
+/* Registers specific to this variant */
+#define CTL_SDIO_REGS		0x100
+#define CTL_CLK_AND_WAIT_CTL	0x138
+#define CTL_RESET_SDIO		0x1e0
+
 static void tmio_mmc_clk_start(struct tmio_mmc_host *host)
 {
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, CLK_CTL_SCLKEN |
@@ -153,8 +158,6 @@ static int tmio_mmc_probe(struct platform_device *pdev)
 		goto cell_disable;
 	}
 
-	pdata->flags |= TMIO_MMC_HAVE_HIGH_REG;
-
 	host = tmio_mmc_host_alloc(pdev, pdata);
 	if (IS_ERR(host)) {
 		ret = PTR_ERR(host);
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 1a23a3d14bd6..a8ad67eea340 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -43,9 +43,6 @@
 #define CTL_RESET_SD 0xe0
 #define CTL_VERSION 0xe2
 #define CTL_SDIF_MODE 0xe6
-#define CTL_SDIO_REGS 0x100
-#define CTL_CLK_AND_WAIT_CTL 0x138
-#define CTL_RESET_SDIO 0x1e0
 
 /* Definitions for values the CTL_STOP_INTERNAL_ACTION register can take */
 #define TMIO_STOP_STP		BIT(0)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 77866214ab51..1e70060c92ce 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -61,13 +61,6 @@
  */
 #define TMIO_MMC_USE_GPIO_CD		BIT(5)
 
-/*
- * Some controllers doesn't have over 0x100 register.
- * it is used to checking accessibility of
- * CTL_SD_CARD_CLK_CTL / CTL_CLK_AND_WAIT_CTL
- */
-#define TMIO_MMC_HAVE_HIGH_REG		BIT(6)
-
 /*
  * Some controllers have CMD12 automatically
  * issue/non-issue register
-- 
cgit v1.2.3


From 18127429a854e7607b859484880b8e26cee9ddab Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 15 Oct 2018 15:43:06 +0200
Subject: bitops: protect variables in set_mask_bits() macro

Unprotected naming of local variables within the set_mask_bits() can easily
lead to using the wrong scope.

Noticed this when "set_mask_bits(&foo->bar, 0, mask)" behaved as no-op.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 00a1a053ebe5 ("ext4: atomically set inode->i_flags in ext4_set_inode_flags()")
Cc: Theodore Ts'o <tytso@mit.edu>
---
 include/linux/bitops.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 7ddb1349394d..d18ee0e63c32 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -236,17 +236,17 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
 #ifdef __KERNEL__
 
 #ifndef set_mask_bits
-#define set_mask_bits(ptr, _mask, _bits)	\
+#define set_mask_bits(ptr, mask, bits)	\
 ({								\
-	const typeof(*ptr) mask = (_mask), bits = (_bits);	\
-	typeof(*ptr) old, new;					\
+	const typeof(*(ptr)) mask__ = (mask), bits__ = (bits);	\
+	typeof(*(ptr)) old__, new__;				\
 								\
 	do {							\
-		old = READ_ONCE(*ptr);			\
-		new = (old & ~mask) | bits;			\
-	} while (cmpxchg(ptr, old, new) != old);		\
+		old__ = READ_ONCE(*(ptr));			\
+		new__ = (old__ & ~mask__) | bits__;		\
+	} while (cmpxchg(ptr, old__, new__) != old__);		\
 								\
-	new;							\
+	new__;							\
 })
 #endif
 
-- 
cgit v1.2.3


From edfa87281f4fa1b78a21f6db999935a2faa2f6b8 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 15 Oct 2018 15:43:06 +0200
Subject: bitops: protect variables in bit_clear_unless() macro

Unprotected naming of local variables within bit_clear_unless() can easily
lead to using the wrong scope.

Noticed this by code review after having hit this issue in set_mask_bits()

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 85ad1d13ee9b ("md: set MD_CHANGE_PENDING in a atomic region")
Cc: Guoqing Jiang <gqjiang@suse.com>
---
 include/linux/bitops.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index d18ee0e63c32..705f7c442691 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -251,18 +251,18 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
 #endif
 
 #ifndef bit_clear_unless
-#define bit_clear_unless(ptr, _clear, _test)	\
+#define bit_clear_unless(ptr, clear, test)	\
 ({								\
-	const typeof(*ptr) clear = (_clear), test = (_test);	\
-	typeof(*ptr) old, new;					\
+	const typeof(*(ptr)) clear__ = (clear), test__ = (test);\
+	typeof(*(ptr)) old__, new__;				\
 								\
 	do {							\
-		old = READ_ONCE(*ptr);			\
-		new = old & ~clear;				\
-	} while (!(old & test) &&				\
-		 cmpxchg(ptr, old, new) != old);		\
+		old__ = READ_ONCE(*(ptr));			\
+		new__ = old__ & ~clear__;			\
+	} while (!(old__ & test__) &&				\
+		 cmpxchg(ptr, old__, new__) != old__);		\
 								\
-	!(old & test);						\
+	!(old__ & test__);					\
 })
 #endif
 
-- 
cgit v1.2.3


From 604326b41a6fb9b4a78b6179335decee0365cd8c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 13 Oct 2018 02:45:58 +0200
Subject: bpf, sockmap: convert to generic sk_msg interface

Add a generic sk_msg layer, and convert current sockmap and later
kTLS over to make use of it. While sk_buff handles network packet
representation from netdevice up to socket, sk_msg handles data
representation from application to socket layer.

This means that sk_msg framework spans across ULP users in the
kernel, and enables features such as introspection or filtering
of data with the help of BPF programs that operate on this data
structure.

Latter becomes in particular useful for kTLS where data encryption
is deferred into the kernel, and as such enabling the kernel to
perform L7 introspection and policy based on BPF for TLS connections
where the record is being encrypted after BPF has run and came to
a verdict. In order to get there, first step is to transform open
coding of scatter-gather list handling into a common core framework
that subsystems can use.

The code itself has been split and refactored into three bigger
pieces: i) the generic sk_msg API which deals with managing the
scatter gather ring, providing helpers for walking and mangling,
transferring application data from user space into it, and preparing
it for BPF pre/post-processing, ii) the plain sock map itself
where sockets can be attached to or detached from; these bits
are independent of i) which can now be used also without sock
map, and iii) the integration with plain TCP as one protocol
to be used for processing L7 application data (later this could
e.g. also be extended to other protocols like UDP). The semantics
are the same with the old sock map code and therefore no change
of user facing behavior or APIs. While pursuing this work it
also helped finding a number of bugs in the old sockmap code
that we've fixed already in earlier commits. The test_sockmap
kselftest suite passes through fine as well.

Joint work with John.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h       |   33 +-
 include/linux/bpf_types.h |    2 +-
 include/linux/filter.h    |   21 -
 include/linux/skmsg.h     |  371 +++++++
 include/net/tcp.h         |   27 +
 kernel/bpf/Makefile       |    5 -
 kernel/bpf/core.c         |    2 -
 kernel/bpf/sockmap.c      | 2610 ---------------------------------------------
 kernel/bpf/syscall.c      |    6 +-
 net/Kconfig               |   11 +
 net/core/Makefile         |    2 +
 net/core/filter.c         |  270 ++---
 net/core/skmsg.c          |  763 +++++++++++++
 net/core/sock_map.c       | 1002 +++++++++++++++++
 net/ipv4/Makefile         |    1 +
 net/ipv4/tcp_bpf.c        |  655 ++++++++++++
 net/strparser/Kconfig     |    4 +-
 17 files changed, 2925 insertions(+), 2860 deletions(-)
 create mode 100644 include/linux/skmsg.h
 delete mode 100644 kernel/bpf/sockmap.c
 create mode 100644 net/core/skmsg.c
 create mode 100644 net/core/sock_map.c
 create mode 100644 net/ipv4/tcp_bpf.c

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b558713447f..e60fff48288b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -737,33 +737,18 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
 }
 #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
 
-#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
-struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
-struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
-int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
-int sockmap_get_from_fd(const union bpf_attr *attr, int type,
-			struct bpf_prog *prog);
+#if defined(CONFIG_BPF_STREAM_PARSER)
+int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, u32 which);
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
 #else
-static inline struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
-{
-	return NULL;
-}
-
-static inline struct sock  *__sock_hash_lookup_elem(struct bpf_map *map,
-						    void *key)
-{
-	return NULL;
-}
-
-static inline int sock_map_prog(struct bpf_map *map,
-				struct bpf_prog *prog,
-				u32 type)
+static inline int sock_map_prog_update(struct bpf_map *map,
+				       struct bpf_prog *prog, u32 which)
 {
 	return -EOPNOTSUPP;
 }
 
-static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
-				      struct bpf_prog *prog)
+static inline int sock_map_get_from_fd(const union bpf_attr *attr,
+				       struct bpf_prog *prog)
 {
 	return -EINVAL;
 }
@@ -839,6 +824,10 @@ extern const struct bpf_func_proto bpf_get_stack_proto;
 extern const struct bpf_func_proto bpf_sock_map_update_proto;
 extern const struct bpf_func_proto bpf_sock_hash_update_proto;
 extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto;
+extern const struct bpf_func_proto bpf_msg_redirect_hash_proto;
+extern const struct bpf_func_proto bpf_msg_redirect_map_proto;
+extern const struct bpf_func_proto bpf_sk_redirect_hash_proto;
+extern const struct bpf_func_proto bpf_sk_redirect_map_proto;
 
 extern const struct bpf_func_proto bpf_get_local_storage_proto;
 
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..fa48343a5ea1 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -57,7 +57,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
 #ifdef CONFIG_NET
 BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
-#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_INET)
+#if defined(CONFIG_BPF_STREAM_PARSER)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops)
 #endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 6791a0ac0139..5771874bc01e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -520,24 +520,6 @@ struct bpf_skb_data_end {
 	void *data_end;
 };
 
-struct sk_msg_buff {
-	void *data;
-	void *data_end;
-	__u32 apply_bytes;
-	__u32 cork_bytes;
-	int sg_copybreak;
-	int sg_start;
-	int sg_curr;
-	int sg_end;
-	struct scatterlist sg_data[MAX_SKB_FRAGS];
-	bool sg_copy[MAX_SKB_FRAGS];
-	__u32 flags;
-	struct sock *sk_redir;
-	struct sock *sk;
-	struct sk_buff *skb;
-	struct list_head list;
-};
-
 struct bpf_redirect_info {
 	u32 ifindex;
 	u32 flags;
@@ -833,9 +815,6 @@ void xdp_do_flush_map(void);
 
 void bpf_warn_invalid_xdp_action(u32 act);
 
-struct sock *do_sk_redirect_map(struct sk_buff *skb);
-struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
-
 #ifdef CONFIG_INET
 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 				  struct bpf_prog *prog, struct sk_buff *skb,
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
new file mode 100644
index 000000000000..95678103c4a0
--- /dev/null
+++ b/include/linux/skmsg.h
@@ -0,0 +1,371 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#ifndef _LINUX_SKMSG_H
+#define _LINUX_SKMSG_H
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/scatterlist.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/strparser.h>
+
+#define MAX_MSG_FRAGS			MAX_SKB_FRAGS
+
+enum __sk_action {
+	__SK_DROP = 0,
+	__SK_PASS,
+	__SK_REDIRECT,
+	__SK_NONE,
+};
+
+struct sk_msg_sg {
+	u32				start;
+	u32				curr;
+	u32				end;
+	u32				size;
+	u32				copybreak;
+	bool				copy[MAX_MSG_FRAGS];
+	struct scatterlist		data[MAX_MSG_FRAGS];
+};
+
+struct sk_msg {
+	struct sk_msg_sg		sg;
+	void				*data;
+	void				*data_end;
+	u32				apply_bytes;
+	u32				cork_bytes;
+	u32				flags;
+	struct sk_buff			*skb;
+	struct sock			*sk_redir;
+	struct sock			*sk;
+	struct list_head		list;
+};
+
+struct sk_psock_progs {
+	struct bpf_prog			*msg_parser;
+	struct bpf_prog			*skb_parser;
+	struct bpf_prog			*skb_verdict;
+};
+
+enum sk_psock_state_bits {
+	SK_PSOCK_TX_ENABLED,
+};
+
+struct sk_psock_link {
+	struct list_head		list;
+	struct bpf_map			*map;
+	void				*link_raw;
+};
+
+struct sk_psock_parser {
+	struct strparser		strp;
+	bool				enabled;
+	void (*saved_data_ready)(struct sock *sk);
+};
+
+struct sk_psock_work_state {
+	struct sk_buff			*skb;
+	u32				len;
+	u32				off;
+};
+
+struct sk_psock {
+	struct sock			*sk;
+	struct sock			*sk_redir;
+	u32				apply_bytes;
+	u32				cork_bytes;
+	u32				eval;
+	struct sk_msg			*cork;
+	struct sk_psock_progs		progs;
+	struct sk_psock_parser		parser;
+	struct sk_buff_head		ingress_skb;
+	struct list_head		ingress_msg;
+	unsigned long			state;
+	struct list_head		link;
+	spinlock_t			link_lock;
+	refcount_t			refcnt;
+	void (*saved_unhash)(struct sock *sk);
+	void (*saved_close)(struct sock *sk, long timeout);
+	void (*saved_write_space)(struct sock *sk);
+	struct proto			*sk_proto;
+	struct sk_psock_work_state	work_state;
+	struct work_struct		work;
+	union {
+		struct rcu_head		rcu;
+		struct work_struct	gc;
+	};
+};
+
+int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+		 int elem_first_coalesce);
+void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len);
+int sk_msg_free(struct sock *sk, struct sk_msg *msg);
+int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg);
+void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes);
+void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes);
+
+void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes);
+
+int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
+			      struct sk_msg *msg, u32 bytes);
+int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
+			     struct sk_msg *msg, u32 bytes);
+
+static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
+{
+	WARN_ON(i == msg->sg.end && bytes);
+}
+
+static inline void sk_msg_apply_bytes(struct sk_psock *psock, u32 bytes)
+{
+	if (psock->apply_bytes) {
+		if (psock->apply_bytes < bytes)
+			psock->apply_bytes = 0;
+		else
+			psock->apply_bytes -= bytes;
+	}
+}
+
+#define sk_msg_iter_var_prev(var)			\
+	do {						\
+		if (var == 0)				\
+			var = MAX_MSG_FRAGS - 1;	\
+		else					\
+			var--;				\
+	} while (0)
+
+#define sk_msg_iter_var_next(var)			\
+	do {						\
+		var++;					\
+		if (var == MAX_MSG_FRAGS)		\
+			var = 0;			\
+	} while (0)
+
+#define sk_msg_iter_prev(msg, which)			\
+	sk_msg_iter_var_prev(msg->sg.which)
+
+#define sk_msg_iter_next(msg, which)			\
+	sk_msg_iter_var_next(msg->sg.which)
+
+static inline void sk_msg_clear_meta(struct sk_msg *msg)
+{
+	memset(&msg->sg, 0, offsetofend(struct sk_msg_sg, copy));
+}
+
+static inline void sk_msg_init(struct sk_msg *msg)
+{
+	memset(msg, 0, sizeof(*msg));
+	sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data));
+}
+
+static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
+			       int which, u32 size)
+{
+	dst->sg.data[which] = src->sg.data[which];
+	dst->sg.data[which].length  = size;
+	src->sg.data[which].length -= size;
+	src->sg.data[which].offset += size;
+}
+
+static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
+{
+	return msg->sg.end >= msg->sg.start ?
+		msg->sg.end - msg->sg.start :
+		msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
+}
+
+static inline bool sk_msg_full(const struct sk_msg *msg)
+{
+	return (msg->sg.end == msg->sg.start) && msg->sg.size;
+}
+
+static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
+{
+	return &msg->sg.data[which];
+}
+
+static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
+{
+	return sg_page(sk_msg_elem(msg, which));
+}
+
+static inline bool sk_msg_to_ingress(const struct sk_msg *msg)
+{
+	return msg->flags & BPF_F_INGRESS;
+}
+
+static inline void sk_msg_compute_data_pointers(struct sk_msg *msg)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, msg->sg.start);
+
+	if (msg->sg.copy[msg->sg.start]) {
+		msg->data = NULL;
+		msg->data_end = NULL;
+	} else {
+		msg->data = sg_virt(sge);
+		msg->data_end = msg->data + sge->length;
+	}
+}
+
+static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
+				   u32 len, u32 offset)
+{
+	struct scatterlist *sge;
+
+	get_page(page);
+	sge = sk_msg_elem(msg, msg->sg.end);
+	sg_set_page(sge, page, len, offset);
+	sg_unmark_end(sge);
+
+	msg->sg.copy[msg->sg.end] = true;
+	msg->sg.size += len;
+	sk_msg_iter_next(msg, end);
+}
+
+static inline struct sk_psock *sk_psock(const struct sock *sk)
+{
+	return rcu_dereference_sk_user_data(sk);
+}
+
+static inline bool sk_has_psock(struct sock *sk)
+{
+	return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg;
+}
+
+static inline void sk_psock_queue_msg(struct sk_psock *psock,
+				      struct sk_msg *msg)
+{
+	list_add_tail(&msg->list, &psock->ingress_msg);
+}
+
+static inline void sk_psock_report_error(struct sk_psock *psock, int err)
+{
+	struct sock *sk = psock->sk;
+
+	sk->sk_err = err;
+	sk->sk_error_report(sk);
+}
+
+struct sk_psock *sk_psock_init(struct sock *sk, int node);
+
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
+void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
+void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
+
+int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
+			 struct sk_msg *msg);
+
+static inline struct sk_psock_link *sk_psock_init_link(void)
+{
+	return kzalloc(sizeof(struct sk_psock_link),
+		       GFP_ATOMIC | __GFP_NOWARN);
+}
+
+static inline void sk_psock_free_link(struct sk_psock_link *link)
+{
+	kfree(link);
+}
+
+struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
+#if defined(CONFIG_BPF_STREAM_PARSER)
+void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link);
+#else
+static inline void sk_psock_unlink(struct sock *sk,
+				   struct sk_psock_link *link)
+{
+}
+#endif
+
+void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
+
+static inline void sk_psock_cork_free(struct sk_psock *psock)
+{
+	if (psock->cork) {
+		sk_msg_free(psock->sk, psock->cork);
+		kfree(psock->cork);
+		psock->cork = NULL;
+	}
+}
+
+static inline void sk_psock_update_proto(struct sock *sk,
+					 struct sk_psock *psock,
+					 struct proto *ops)
+{
+	psock->saved_unhash = sk->sk_prot->unhash;
+	psock->saved_close = sk->sk_prot->close;
+	psock->saved_write_space = sk->sk_write_space;
+
+	psock->sk_proto = sk->sk_prot;
+	sk->sk_prot = ops;
+}
+
+static inline void sk_psock_restore_proto(struct sock *sk,
+					  struct sk_psock *psock)
+{
+	if (psock->sk_proto) {
+		sk->sk_prot = psock->sk_proto;
+		psock->sk_proto = NULL;
+	}
+}
+
+static inline void sk_psock_set_state(struct sk_psock *psock,
+				      enum sk_psock_state_bits bit)
+{
+	set_bit(bit, &psock->state);
+}
+
+static inline void sk_psock_clear_state(struct sk_psock *psock,
+					enum sk_psock_state_bits bit)
+{
+	clear_bit(bit, &psock->state);
+}
+
+static inline bool sk_psock_test_state(const struct sk_psock *psock,
+				       enum sk_psock_state_bits bit)
+{
+	return test_bit(bit, &psock->state);
+}
+
+static inline struct sk_psock *sk_psock_get(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock && !refcount_inc_not_zero(&psock->refcnt))
+		psock = NULL;
+	rcu_read_unlock();
+	return psock;
+}
+
+void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
+void sk_psock_destroy(struct rcu_head *rcu);
+void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
+
+static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
+{
+	if (refcount_dec_and_test(&psock->refcnt))
+		sk_psock_drop(sk, psock);
+}
+
+static inline void psock_set_prog(struct bpf_prog **pprog,
+				  struct bpf_prog *prog)
+{
+	prog = xchg(pprog, prog);
+	if (prog)
+		bpf_prog_put(prog);
+}
+
+static inline void psock_progs_drop(struct sk_psock_progs *progs)
+{
+	psock_set_prog(&progs->msg_parser, NULL);
+	psock_set_prog(&progs->skb_parser, NULL);
+	psock_set_prog(&progs->skb_verdict, NULL);
+}
+
+#endif /* _LINUX_SKMSG_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8f5cef67fd35..3600ae0f25c3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -858,6 +858,21 @@ static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
 	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
 }
 
+static inline bool tcp_skb_bpf_ingress(const struct sk_buff *skb)
+{
+	return TCP_SKB_CB(skb)->bpf.flags & BPF_F_INGRESS;
+}
+
+static inline struct sock *tcp_skb_bpf_redirect_fetch(struct sk_buff *skb)
+{
+	return TCP_SKB_CB(skb)->bpf.sk_redir;
+}
+
+static inline void tcp_skb_bpf_redirect_clear(struct sk_buff *skb)
+{
+	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 /* This is the variant of inet6_iif() that must be used by TCP,
  * as TCP moves IP6CB into a different location in skb->cb[]
@@ -2064,6 +2079,18 @@ void tcp_cleanup_ulp(struct sock *sk);
 	__MODULE_INFO(alias, alias_userspace, name);		\
 	__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
 
+struct sk_msg;
+struct sk_psock;
+
+int tcp_bpf_init(struct sock *sk);
+void tcp_bpf_reinit(struct sock *sk);
+int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
+			  int flags);
+int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		    int nonblock, int flags, int *addr_len);
+int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
+		      struct msghdr *msg, int len);
+
 /* Call BPF_SOCK_OPS program that returns an int. If the return value
  * is < 0, then the BPF op failed (for example if the loaded BPF
  * program does not support the chosen operation or there is no BPF
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 0488b8258321..ff8262626b8f 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -13,11 +13,6 @@ ifeq ($(CONFIG_XDP_SOCKETS),y)
 obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
 endif
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
-ifeq ($(CONFIG_STREAM_PARSER),y)
-ifeq ($(CONFIG_INET),y)
-obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
-endif
-endif
 endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3f5bf1af0826..defcf4df6d91 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1792,8 +1792,6 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
 const struct bpf_func_proto bpf_get_current_comm_proto __weak;
-const struct bpf_func_proto bpf_sock_map_update_proto __weak;
-const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
 const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
 const struct bpf_func_proto bpf_get_local_storage_proto __weak;
 
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
deleted file mode 100644
index de6f7a65c72b..000000000000
--- a/kernel/bpf/sockmap.c
+++ /dev/null
@@ -1,2610 +0,0 @@
-/* Copyright (c) 2017 Covalent IO, Inc. http://covalent.io
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-
-/* A BPF sock_map is used to store sock objects. This is primarly used
- * for doing socket redirect with BPF helper routines.
- *
- * A sock map may have BPF programs attached to it, currently a program
- * used to parse packets and a program to provide a verdict and redirect
- * decision on the packet are supported. Any programs attached to a sock
- * map are inherited by sock objects when they are added to the map. If
- * no BPF programs are attached the sock object may only be used for sock
- * redirect.
- *
- * A sock object may be in multiple maps, but can only inherit a single
- * parse or verdict program. If adding a sock object to a map would result
- * in having multiple parsing programs the update will return an EBUSY error.
- *
- * For reference this program is similar to devmap used in XDP context
- * reviewing these together may be useful. For an example please review
- * ./samples/bpf/sockmap/.
- */
-#include <linux/bpf.h>
-#include <net/sock.h>
-#include <linux/filter.h>
-#include <linux/errno.h>
-#include <linux/file.h>
-#include <linux/kernel.h>
-#include <linux/net.h>
-#include <linux/skbuff.h>
-#include <linux/workqueue.h>
-#include <linux/list.h>
-#include <linux/mm.h>
-#include <net/strparser.h>
-#include <net/tcp.h>
-#include <linux/ptr_ring.h>
-#include <net/inet_common.h>
-#include <linux/sched/signal.h>
-
-#define SOCK_CREATE_FLAG_MASK \
-	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
-
-struct bpf_sock_progs {
-	struct bpf_prog *bpf_tx_msg;
-	struct bpf_prog *bpf_parse;
-	struct bpf_prog *bpf_verdict;
-};
-
-struct bpf_stab {
-	struct bpf_map map;
-	struct sock **sock_map;
-	struct bpf_sock_progs progs;
-	raw_spinlock_t lock;
-};
-
-struct bucket {
-	struct hlist_head head;
-	raw_spinlock_t lock;
-};
-
-struct bpf_htab {
-	struct bpf_map map;
-	struct bucket *buckets;
-	atomic_t count;
-	u32 n_buckets;
-	u32 elem_size;
-	struct bpf_sock_progs progs;
-	struct rcu_head rcu;
-};
-
-struct htab_elem {
-	struct rcu_head rcu;
-	struct hlist_node hash_node;
-	u32 hash;
-	struct sock *sk;
-	char key[0];
-};
-
-enum smap_psock_state {
-	SMAP_TX_RUNNING,
-};
-
-struct smap_psock_map_entry {
-	struct list_head list;
-	struct bpf_map *map;
-	struct sock **entry;
-	struct htab_elem __rcu *hash_link;
-};
-
-struct smap_psock {
-	struct rcu_head	rcu;
-	refcount_t refcnt;
-
-	/* datapath variables */
-	struct sk_buff_head rxqueue;
-	bool strp_enabled;
-
-	/* datapath error path cache across tx work invocations */
-	int save_rem;
-	int save_off;
-	struct sk_buff *save_skb;
-
-	/* datapath variables for tx_msg ULP */
-	struct sock *sk_redir;
-	int apply_bytes;
-	int cork_bytes;
-	int sg_size;
-	int eval;
-	struct sk_msg_buff *cork;
-	struct list_head ingress;
-
-	struct strparser strp;
-	struct bpf_prog *bpf_tx_msg;
-	struct bpf_prog *bpf_parse;
-	struct bpf_prog *bpf_verdict;
-	struct list_head maps;
-	spinlock_t maps_lock;
-
-	/* Back reference used when sock callback trigger sockmap operations */
-	struct sock *sock;
-	unsigned long state;
-
-	struct work_struct tx_work;
-	struct work_struct gc_work;
-
-	struct proto *sk_proto;
-	void (*save_unhash)(struct sock *sk);
-	void (*save_close)(struct sock *sk, long timeout);
-	void (*save_data_ready)(struct sock *sk);
-	void (*save_write_space)(struct sock *sk);
-};
-
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
-			   int nonblock, int flags, int *addr_len);
-static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
-			    int offset, size_t size, int flags);
-static void bpf_tcp_unhash(struct sock *sk);
-static void bpf_tcp_close(struct sock *sk, long timeout);
-
-static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
-{
-	return rcu_dereference_sk_user_data(sk);
-}
-
-static bool bpf_tcp_stream_read(const struct sock *sk)
-{
-	struct smap_psock *psock;
-	bool empty = true;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto out;
-	empty = list_empty(&psock->ingress);
-out:
-	rcu_read_unlock();
-	return !empty;
-}
-
-enum {
-	SOCKMAP_IPV4,
-	SOCKMAP_IPV6,
-	SOCKMAP_NUM_PROTS,
-};
-
-enum {
-	SOCKMAP_BASE,
-	SOCKMAP_TX,
-	SOCKMAP_NUM_CONFIGS,
-};
-
-static struct proto *saved_tcpv6_prot __read_mostly;
-static DEFINE_SPINLOCK(tcpv6_prot_lock);
-static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
-
-static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
-			 struct proto *base)
-{
-	prot[SOCKMAP_BASE]			= *base;
-	prot[SOCKMAP_BASE].unhash		= bpf_tcp_unhash;
-	prot[SOCKMAP_BASE].close		= bpf_tcp_close;
-	prot[SOCKMAP_BASE].recvmsg		= bpf_tcp_recvmsg;
-	prot[SOCKMAP_BASE].stream_memory_read	= bpf_tcp_stream_read;
-
-	prot[SOCKMAP_TX]			= prot[SOCKMAP_BASE];
-	prot[SOCKMAP_TX].sendmsg		= bpf_tcp_sendmsg;
-	prot[SOCKMAP_TX].sendpage		= bpf_tcp_sendpage;
-}
-
-static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
-{
-	int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
-	int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
-
-	sk->sk_prot = &bpf_tcp_prots[family][conf];
-}
-
-static int bpf_tcp_init(struct sock *sk)
-{
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		return -EINVAL;
-	}
-
-	if (unlikely(psock->sk_proto)) {
-		rcu_read_unlock();
-		return -EBUSY;
-	}
-
-	psock->save_unhash = sk->sk_prot->unhash;
-	psock->save_close = sk->sk_prot->close;
-	psock->sk_proto = sk->sk_prot;
-
-	/* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
-	if (sk->sk_family == AF_INET6 &&
-	    unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
-		spin_lock_bh(&tcpv6_prot_lock);
-		if (likely(sk->sk_prot != saved_tcpv6_prot)) {
-			build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
-			smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
-		}
-		spin_unlock_bh(&tcpv6_prot_lock);
-	}
-	update_sk_prot(sk, psock);
-	rcu_read_unlock();
-	return 0;
-}
-
-static int __init bpf_sock_init(void)
-{
-	build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
-	return 0;
-}
-core_initcall(bpf_sock_init);
-
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge);
-
-static void bpf_tcp_release(struct sock *sk)
-{
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto out;
-
-	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork, true);
-		kfree(psock->cork);
-		psock->cork = NULL;
-	}
-
-	if (psock->sk_proto) {
-		sk->sk_prot = psock->sk_proto;
-		psock->sk_proto = NULL;
-	}
-out:
-	rcu_read_unlock();
-}
-
-static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
-					 u32 hash, void *key, u32 key_size)
-{
-	struct htab_elem *l;
-
-	hlist_for_each_entry_rcu(l, head, hash_node) {
-		if (l->hash == hash && !memcmp(&l->key, key, key_size))
-			return l;
-	}
-
-	return NULL;
-}
-
-static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
-{
-	return &htab->buckets[hash & (htab->n_buckets - 1)];
-}
-
-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
-{
-	return &__select_bucket(htab, hash)->head;
-}
-
-static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
-{
-	atomic_dec(&htab->count);
-	kfree_rcu(l, rcu);
-}
-
-static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
-						  struct smap_psock *psock)
-{
-	struct smap_psock_map_entry *e;
-
-	spin_lock_bh(&psock->maps_lock);
-	e = list_first_entry_or_null(&psock->maps,
-				     struct smap_psock_map_entry,
-				     list);
-	if (e)
-		list_del(&e->list);
-	spin_unlock_bh(&psock->maps_lock);
-	return e;
-}
-
-static void bpf_tcp_remove(struct sock *sk, struct smap_psock *psock)
-{
-	struct smap_psock_map_entry *e;
-	struct sk_msg_buff *md, *mtmp;
-	struct sock *osk;
-
-	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork, true);
-		kfree(psock->cork);
-		psock->cork = NULL;
-	}
-
-	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
-		list_del(&md->list);
-		free_start_sg(psock->sock, md, true);
-		kfree(md);
-	}
-
-	e = psock_map_pop(sk, psock);
-	while (e) {
-		if (e->entry) {
-			struct bpf_stab *stab = container_of(e->map, struct bpf_stab, map);
-
-			raw_spin_lock_bh(&stab->lock);
-			osk = *e->entry;
-			if (osk == sk) {
-				*e->entry = NULL;
-				smap_release_sock(psock, sk);
-			}
-			raw_spin_unlock_bh(&stab->lock);
-		} else {
-			struct htab_elem *link = rcu_dereference(e->hash_link);
-			struct bpf_htab *htab = container_of(e->map, struct bpf_htab, map);
-			struct hlist_head *head;
-			struct htab_elem *l;
-			struct bucket *b;
-
-			b = __select_bucket(htab, link->hash);
-			head = &b->head;
-			raw_spin_lock_bh(&b->lock);
-			l = lookup_elem_raw(head,
-					    link->hash, link->key,
-					    htab->map.key_size);
-			/* If another thread deleted this object skip deletion.
-			 * The refcnt on psock may or may not be zero.
-			 */
-			if (l && l == link) {
-				hlist_del_rcu(&link->hash_node);
-				smap_release_sock(psock, link->sk);
-				free_htab_elem(htab, link);
-			}
-			raw_spin_unlock_bh(&b->lock);
-		}
-		kfree(e);
-		e = psock_map_pop(sk, psock);
-	}
-}
-
-static void bpf_tcp_unhash(struct sock *sk)
-{
-	void (*unhash_fun)(struct sock *sk);
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		if (sk->sk_prot->unhash)
-			sk->sk_prot->unhash(sk);
-		return;
-	}
-	unhash_fun = psock->save_unhash;
-	bpf_tcp_remove(sk, psock);
-	rcu_read_unlock();
-	unhash_fun(sk);
-}
-
-static void bpf_tcp_close(struct sock *sk, long timeout)
-{
-	void (*close_fun)(struct sock *sk, long timeout);
-	struct smap_psock *psock;
-
-	lock_sock(sk);
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		release_sock(sk);
-		return sk->sk_prot->close(sk, timeout);
-	}
-	close_fun = psock->save_close;
-	bpf_tcp_remove(sk, psock);
-	rcu_read_unlock();
-	release_sock(sk);
-	close_fun(sk, timeout);
-}
-
-enum __sk_action {
-	__SK_DROP = 0,
-	__SK_PASS,
-	__SK_REDIRECT,
-	__SK_NONE,
-};
-
-static int memcopy_from_iter(struct sock *sk,
-			     struct sk_msg_buff *md,
-			     struct iov_iter *from, int bytes)
-{
-	struct scatterlist *sg = md->sg_data;
-	int i = md->sg_curr, rc = -ENOSPC;
-
-	do {
-		int copy;
-		char *to;
-
-		if (md->sg_copybreak >= sg[i].length) {
-			md->sg_copybreak = 0;
-
-			if (++i == MAX_SKB_FRAGS)
-				i = 0;
-
-			if (i == md->sg_end)
-				break;
-		}
-
-		copy = sg[i].length - md->sg_copybreak;
-		to = sg_virt(&sg[i]) + md->sg_copybreak;
-		md->sg_copybreak += copy;
-
-		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
-			rc = copy_from_iter_nocache(to, copy, from);
-		else
-			rc = copy_from_iter(to, copy, from);
-
-		if (rc != copy) {
-			rc = -EFAULT;
-			goto out;
-		}
-
-		bytes -= copy;
-		if (!bytes)
-			break;
-
-		md->sg_copybreak = 0;
-		if (++i == MAX_SKB_FRAGS)
-			i = 0;
-	} while (i != md->sg_end);
-out:
-	md->sg_curr = i;
-	return rc;
-}
-
-static int bpf_tcp_push(struct sock *sk, int apply_bytes,
-			struct sk_msg_buff *md,
-			int flags, bool uncharge)
-{
-	bool apply = apply_bytes;
-	struct scatterlist *sg;
-	int offset, ret = 0;
-	struct page *p;
-	size_t size;
-
-	while (1) {
-		sg = md->sg_data + md->sg_start;
-		size = (apply && apply_bytes < sg->length) ?
-			apply_bytes : sg->length;
-		offset = sg->offset;
-
-		tcp_rate_check_app_limited(sk);
-		p = sg_page(sg);
-retry:
-		ret = do_tcp_sendpages(sk, p, offset, size, flags);
-		if (ret != size) {
-			if (ret > 0) {
-				if (apply)
-					apply_bytes -= ret;
-
-				sg->offset += ret;
-				sg->length -= ret;
-				size -= ret;
-				offset += ret;
-				if (uncharge)
-					sk_mem_uncharge(sk, ret);
-				goto retry;
-			}
-
-			return ret;
-		}
-
-		if (apply)
-			apply_bytes -= ret;
-		sg->offset += ret;
-		sg->length -= ret;
-		if (uncharge)
-			sk_mem_uncharge(sk, ret);
-
-		if (!sg->length) {
-			put_page(p);
-			md->sg_start++;
-			if (md->sg_start == MAX_SKB_FRAGS)
-				md->sg_start = 0;
-			sg_init_table(sg, 1);
-
-			if (md->sg_start == md->sg_end)
-				break;
-		}
-
-		if (apply && !apply_bytes)
-			break;
-	}
-	return 0;
-}
-
-static inline void bpf_compute_data_pointers_sg(struct sk_msg_buff *md)
-{
-	struct scatterlist *sg = md->sg_data + md->sg_start;
-
-	if (md->sg_copy[md->sg_start]) {
-		md->data = md->data_end = 0;
-	} else {
-		md->data = sg_virt(sg);
-		md->data_end = md->data + sg->length;
-	}
-}
-
-static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)
-{
-	struct scatterlist *sg = md->sg_data;
-	int i = md->sg_start;
-
-	do {
-		int uncharge = (bytes < sg[i].length) ? bytes : sg[i].length;
-
-		sk_mem_uncharge(sk, uncharge);
-		bytes -= uncharge;
-		if (!bytes)
-			break;
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
-	} while (i != md->sg_end);
-}
-
-static void free_bytes_sg(struct sock *sk, int bytes,
-			  struct sk_msg_buff *md, bool charge)
-{
-	struct scatterlist *sg = md->sg_data;
-	int i = md->sg_start, free;
-
-	while (bytes && sg[i].length) {
-		free = sg[i].length;
-		if (bytes < free) {
-			sg[i].length -= bytes;
-			sg[i].offset += bytes;
-			if (charge)
-				sk_mem_uncharge(sk, bytes);
-			break;
-		}
-
-		if (charge)
-			sk_mem_uncharge(sk, sg[i].length);
-		put_page(sg_page(&sg[i]));
-		bytes -= sg[i].length;
-		sg[i].length = 0;
-		sg[i].page_link = 0;
-		sg[i].offset = 0;
-		i++;
-
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
-	}
-	md->sg_start = i;
-}
-
-static int free_sg(struct sock *sk, int start,
-		   struct sk_msg_buff *md, bool charge)
-{
-	struct scatterlist *sg = md->sg_data;
-	int i = start, free = 0;
-
-	while (sg[i].length) {
-		free += sg[i].length;
-		if (charge)
-			sk_mem_uncharge(sk, sg[i].length);
-		if (!md->skb)
-			put_page(sg_page(&sg[i]));
-		sg[i].length = 0;
-		sg[i].page_link = 0;
-		sg[i].offset = 0;
-		i++;
-
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
-	}
-	consume_skb(md->skb);
-
-	return free;
-}
-
-static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge)
-{
-	int free = free_sg(sk, md->sg_start, md, charge);
-
-	md->sg_start = md->sg_end;
-	return free;
-}
-
-static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
-{
-	return free_sg(sk, md->sg_curr, md, true);
-}
-
-static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
-{
-	return ((_rc == SK_PASS) ?
-	       (md->sk_redir ? __SK_REDIRECT : __SK_PASS) :
-	       __SK_DROP);
-}
-
-static unsigned int smap_do_tx_msg(struct sock *sk,
-				   struct smap_psock *psock,
-				   struct sk_msg_buff *md)
-{
-	struct bpf_prog *prog;
-	unsigned int rc, _rc;
-
-	preempt_disable();
-	rcu_read_lock();
-
-	/* If the policy was removed mid-send then default to 'accept' */
-	prog = READ_ONCE(psock->bpf_tx_msg);
-	if (unlikely(!prog)) {
-		_rc = SK_PASS;
-		goto verdict;
-	}
-
-	bpf_compute_data_pointers_sg(md);
-	md->sk = sk;
-	rc = (*prog->bpf_func)(md, prog->insnsi);
-	psock->apply_bytes = md->apply_bytes;
-
-	/* Moving return codes from UAPI namespace into internal namespace */
-	_rc = bpf_map_msg_verdict(rc, md);
-
-	/* The psock has a refcount on the sock but not on the map and because
-	 * we need to drop rcu read lock here its possible the map could be
-	 * removed between here and when we need it to execute the sock
-	 * redirect. So do the map lookup now for future use.
-	 */
-	if (_rc == __SK_REDIRECT) {
-		if (psock->sk_redir)
-			sock_put(psock->sk_redir);
-		psock->sk_redir = do_msg_redirect_map(md);
-		if (!psock->sk_redir) {
-			_rc = __SK_DROP;
-			goto verdict;
-		}
-		sock_hold(psock->sk_redir);
-	}
-verdict:
-	rcu_read_unlock();
-	preempt_enable();
-
-	return _rc;
-}
-
-static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
-			   struct smap_psock *psock,
-			   struct sk_msg_buff *md, int flags)
-{
-	bool apply = apply_bytes;
-	size_t size, copied = 0;
-	struct sk_msg_buff *r;
-	int err = 0, i;
-
-	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_KERNEL);
-	if (unlikely(!r))
-		return -ENOMEM;
-
-	lock_sock(sk);
-	r->sg_start = md->sg_start;
-	i = md->sg_start;
-
-	do {
-		size = (apply && apply_bytes < md->sg_data[i].length) ?
-			apply_bytes : md->sg_data[i].length;
-
-		if (!sk_wmem_schedule(sk, size)) {
-			if (!copied)
-				err = -ENOMEM;
-			break;
-		}
-
-		sk_mem_charge(sk, size);
-		r->sg_data[i] = md->sg_data[i];
-		r->sg_data[i].length = size;
-		md->sg_data[i].length -= size;
-		md->sg_data[i].offset += size;
-		copied += size;
-
-		if (md->sg_data[i].length) {
-			get_page(sg_page(&r->sg_data[i]));
-			r->sg_end = (i + 1) == MAX_SKB_FRAGS ? 0 : i + 1;
-		} else {
-			i++;
-			if (i == MAX_SKB_FRAGS)
-				i = 0;
-			r->sg_end = i;
-		}
-
-		if (apply) {
-			apply_bytes -= size;
-			if (!apply_bytes)
-				break;
-		}
-	} while (i != md->sg_end);
-
-	md->sg_start = i;
-
-	if (!err) {
-		list_add_tail(&r->list, &psock->ingress);
-		sk->sk_data_ready(sk);
-	} else {
-		free_start_sg(sk, r, true);
-		kfree(r);
-	}
-
-	release_sock(sk);
-	return err;
-}
-
-static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
-				       struct sk_msg_buff *md,
-				       int flags)
-{
-	bool ingress = !!(md->flags & BPF_F_INGRESS);
-	struct smap_psock *psock;
-	int err = 0;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto out_rcu;
-
-	if (!refcount_inc_not_zero(&psock->refcnt))
-		goto out_rcu;
-
-	rcu_read_unlock();
-
-	if (ingress) {
-		err = bpf_tcp_ingress(sk, send, psock, md, flags);
-	} else {
-		lock_sock(sk);
-		err = bpf_tcp_push(sk, send, md, flags, false);
-		release_sock(sk);
-	}
-	smap_release_sock(psock, sk);
-	return err;
-out_rcu:
-	rcu_read_unlock();
-	return 0;
-}
-
-static inline void bpf_md_init(struct smap_psock *psock)
-{
-	if (!psock->apply_bytes) {
-		psock->eval =  __SK_NONE;
-		if (psock->sk_redir) {
-			sock_put(psock->sk_redir);
-			psock->sk_redir = NULL;
-		}
-	}
-}
-
-static void apply_bytes_dec(struct smap_psock *psock, int i)
-{
-	if (psock->apply_bytes) {
-		if (psock->apply_bytes < i)
-			psock->apply_bytes = 0;
-		else
-			psock->apply_bytes -= i;
-	}
-}
-
-static int bpf_exec_tx_verdict(struct smap_psock *psock,
-			       struct sk_msg_buff *m,
-			       struct sock *sk,
-			       int *copied, int flags)
-{
-	bool cork = false, enospc = (m->sg_start == m->sg_end);
-	struct sock *redir;
-	int err = 0;
-	int send;
-
-more_data:
-	if (psock->eval == __SK_NONE)
-		psock->eval = smap_do_tx_msg(sk, psock, m);
-
-	if (m->cork_bytes &&
-	    m->cork_bytes > psock->sg_size && !enospc) {
-		psock->cork_bytes = m->cork_bytes - psock->sg_size;
-		if (!psock->cork) {
-			psock->cork = kcalloc(1,
-					sizeof(struct sk_msg_buff),
-					GFP_ATOMIC | __GFP_NOWARN);
-
-			if (!psock->cork) {
-				err = -ENOMEM;
-				goto out_err;
-			}
-		}
-		memcpy(psock->cork, m, sizeof(*m));
-		goto out_err;
-	}
-
-	send = psock->sg_size;
-	if (psock->apply_bytes && psock->apply_bytes < send)
-		send = psock->apply_bytes;
-
-	switch (psock->eval) {
-	case __SK_PASS:
-		err = bpf_tcp_push(sk, send, m, flags, true);
-		if (unlikely(err)) {
-			*copied -= free_start_sg(sk, m, true);
-			break;
-		}
-
-		apply_bytes_dec(psock, send);
-		psock->sg_size -= send;
-		break;
-	case __SK_REDIRECT:
-		redir = psock->sk_redir;
-		apply_bytes_dec(psock, send);
-
-		if (psock->cork) {
-			cork = true;
-			psock->cork = NULL;
-		}
-
-		return_mem_sg(sk, send, m);
-		release_sock(sk);
-
-		err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);
-		lock_sock(sk);
-
-		if (unlikely(err < 0)) {
-			int free = free_start_sg(sk, m, false);
-
-			psock->sg_size = 0;
-			if (!cork)
-				*copied -= free;
-		} else {
-			psock->sg_size -= send;
-		}
-
-		if (cork) {
-			free_start_sg(sk, m, true);
-			psock->sg_size = 0;
-			kfree(m);
-			m = NULL;
-			err = 0;
-		}
-		break;
-	case __SK_DROP:
-	default:
-		free_bytes_sg(sk, send, m, true);
-		apply_bytes_dec(psock, send);
-		*copied -= send;
-		psock->sg_size -= send;
-		err = -EACCES;
-		break;
-	}
-
-	if (likely(!err)) {
-		bpf_md_init(psock);
-		if (m &&
-		    m->sg_data[m->sg_start].page_link &&
-		    m->sg_data[m->sg_start].length)
-			goto more_data;
-	}
-
-out_err:
-	return err;
-}
-
-static int bpf_wait_data(struct sock *sk,
-			 struct smap_psock *psk, int flags,
-			 long timeo, int *err)
-{
-	int rc;
-
-	DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
-	add_wait_queue(sk_sleep(sk), &wait);
-	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	rc = sk_wait_event(sk, &timeo,
-			   !list_empty(&psk->ingress) ||
-			   !skb_queue_empty(&sk->sk_receive_queue),
-			   &wait);
-	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-	remove_wait_queue(sk_sleep(sk), &wait);
-
-	return rc;
-}
-
-static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
-			   int nonblock, int flags, int *addr_len)
-{
-	struct iov_iter *iter = &msg->msg_iter;
-	struct smap_psock *psock;
-	int copied = 0;
-
-	if (unlikely(flags & MSG_ERRQUEUE))
-		return inet_recv_error(sk, msg, len, addr_len);
-	if (!skb_queue_empty(&sk->sk_receive_queue))
-		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto out;
-
-	if (unlikely(!refcount_inc_not_zero(&psock->refcnt)))
-		goto out;
-	rcu_read_unlock();
-
-	lock_sock(sk);
-bytes_ready:
-	while (copied != len) {
-		struct scatterlist *sg;
-		struct sk_msg_buff *md;
-		int i;
-
-		md = list_first_entry_or_null(&psock->ingress,
-					      struct sk_msg_buff, list);
-		if (unlikely(!md))
-			break;
-		i = md->sg_start;
-		do {
-			struct page *page;
-			int n, copy;
-
-			sg = &md->sg_data[i];
-			copy = sg->length;
-			page = sg_page(sg);
-
-			if (copied + copy > len)
-				copy = len - copied;
-
-			n = copy_page_to_iter(page, sg->offset, copy, iter);
-			if (n != copy) {
-				md->sg_start = i;
-				release_sock(sk);
-				smap_release_sock(psock, sk);
-				return -EFAULT;
-			}
-
-			copied += copy;
-			sg->offset += copy;
-			sg->length -= copy;
-			sk_mem_uncharge(sk, copy);
-
-			if (!sg->length) {
-				i++;
-				if (i == MAX_SKB_FRAGS)
-					i = 0;
-				if (!md->skb)
-					put_page(page);
-			}
-			if (copied == len)
-				break;
-		} while (i != md->sg_end);
-		md->sg_start = i;
-
-		if (!sg->length && md->sg_start == md->sg_end) {
-			list_del(&md->list);
-			consume_skb(md->skb);
-			kfree(md);
-		}
-	}
-
-	if (!copied) {
-		long timeo;
-		int data;
-		int err = 0;
-
-		timeo = sock_rcvtimeo(sk, nonblock);
-		data = bpf_wait_data(sk, psock, flags, timeo, &err);
-
-		if (data) {
-			if (!skb_queue_empty(&sk->sk_receive_queue)) {
-				release_sock(sk);
-				smap_release_sock(psock, sk);
-				copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-				return copied;
-			}
-			goto bytes_ready;
-		}
-
-		if (err)
-			copied = err;
-	}
-
-	release_sock(sk);
-	smap_release_sock(psock, sk);
-	return copied;
-out:
-	rcu_read_unlock();
-	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-}
-
-
-static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
-{
-	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
-	struct sk_msg_buff md = {0};
-	unsigned int sg_copy = 0;
-	struct smap_psock *psock;
-	int copied = 0, err = 0;
-	struct scatterlist *sg;
-	long timeo;
-
-	/* Its possible a sock event or user removed the psock _but_ the ops
-	 * have not been reprogrammed yet so we get here. In this case fallback
-	 * to tcp_sendmsg. Note this only works because we _only_ ever allow
-	 * a single ULP there is no hierarchy here.
-	 */
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock)) {
-		rcu_read_unlock();
-		return tcp_sendmsg(sk, msg, size);
-	}
-
-	/* Increment the psock refcnt to ensure its not released while sending a
-	 * message. Required because sk lookup and bpf programs are used in
-	 * separate rcu critical sections. Its OK if we lose the map entry
-	 * but we can't lose the sock reference.
-	 */
-	if (!refcount_inc_not_zero(&psock->refcnt)) {
-		rcu_read_unlock();
-		return tcp_sendmsg(sk, msg, size);
-	}
-
-	sg = md.sg_data;
-	sg_init_marker(sg, MAX_SKB_FRAGS);
-	rcu_read_unlock();
-
-	lock_sock(sk);
-	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-
-	while (msg_data_left(msg)) {
-		struct sk_msg_buff *m = NULL;
-		bool enospc = false;
-		int copy;
-
-		if (sk->sk_err) {
-			err = -sk->sk_err;
-			goto out_err;
-		}
-
-		copy = msg_data_left(msg);
-		if (!sk_stream_memory_free(sk))
-			goto wait_for_sndbuf;
-
-		m = psock->cork_bytes ? psock->cork : &md;
-		m->sg_curr = m->sg_copybreak ? m->sg_curr : m->sg_end;
-		err = sk_alloc_sg(sk, copy, m->sg_data,
-				  m->sg_start, &m->sg_end, &sg_copy,
-				  m->sg_end - 1);
-		if (err) {
-			if (err != -ENOSPC)
-				goto wait_for_memory;
-			enospc = true;
-			copy = sg_copy;
-		}
-
-		err = memcopy_from_iter(sk, m, &msg->msg_iter, copy);
-		if (err < 0) {
-			free_curr_sg(sk, m);
-			goto out_err;
-		}
-
-		psock->sg_size += copy;
-		copied += copy;
-		sg_copy = 0;
-
-		/* When bytes are being corked skip running BPF program and
-		 * applying verdict unless there is no more buffer space. In
-		 * the ENOSPC case simply run BPF prorgram with currently
-		 * accumulated data. We don't have much choice at this point
-		 * we could try extending the page frags or chaining complex
-		 * frags but even in these cases _eventually_ we will hit an
-		 * OOM scenario. More complex recovery schemes may be
-		 * implemented in the future, but BPF programs must handle
-		 * the case where apply_cork requests are not honored. The
-		 * canonical method to verify this is to check data length.
-		 */
-		if (psock->cork_bytes) {
-			if (copy > psock->cork_bytes)
-				psock->cork_bytes = 0;
-			else
-				psock->cork_bytes -= copy;
-
-			if (psock->cork_bytes && !enospc)
-				goto out_cork;
-
-			/* All cork bytes accounted for re-run filter */
-			psock->eval = __SK_NONE;
-			psock->cork_bytes = 0;
-		}
-
-		err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
-		if (unlikely(err < 0))
-			goto out_err;
-		continue;
-wait_for_sndbuf:
-		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-wait_for_memory:
-		err = sk_stream_wait_memory(sk, &timeo);
-		if (err) {
-			if (m && m != psock->cork)
-				free_start_sg(sk, m, true);
-			goto out_err;
-		}
-	}
-out_err:
-	if (err < 0)
-		err = sk_stream_error(sk, msg->msg_flags, err);
-out_cork:
-	release_sock(sk);
-	smap_release_sock(psock, sk);
-	return copied ? copied : err;
-}
-
-static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
-			    int offset, size_t size, int flags)
-{
-	struct sk_msg_buff md = {0}, *m = NULL;
-	int err = 0, copied = 0;
-	struct smap_psock *psock;
-	struct scatterlist *sg;
-	bool enospc = false;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (unlikely(!psock))
-		goto accept;
-
-	if (!refcount_inc_not_zero(&psock->refcnt))
-		goto accept;
-	rcu_read_unlock();
-
-	lock_sock(sk);
-
-	if (psock->cork_bytes) {
-		m = psock->cork;
-		sg = &m->sg_data[m->sg_end];
-	} else {
-		m = &md;
-		sg = m->sg_data;
-		sg_init_marker(sg, MAX_SKB_FRAGS);
-	}
-
-	/* Catch case where ring is full and sendpage is stalled. */
-	if (unlikely(m->sg_end == m->sg_start &&
-	    m->sg_data[m->sg_end].length))
-		goto out_err;
-
-	psock->sg_size += size;
-	sg_set_page(sg, page, size, offset);
-	get_page(page);
-	m->sg_copy[m->sg_end] = true;
-	sk_mem_charge(sk, size);
-	m->sg_end++;
-	copied = size;
-
-	if (m->sg_end == MAX_SKB_FRAGS)
-		m->sg_end = 0;
-
-	if (m->sg_end == m->sg_start)
-		enospc = true;
-
-	if (psock->cork_bytes) {
-		if (size > psock->cork_bytes)
-			psock->cork_bytes = 0;
-		else
-			psock->cork_bytes -= size;
-
-		if (psock->cork_bytes && !enospc)
-			goto out_err;
-
-		/* All cork bytes accounted for re-run filter */
-		psock->eval = __SK_NONE;
-		psock->cork_bytes = 0;
-	}
-
-	err = bpf_exec_tx_verdict(psock, m, sk, &copied, flags);
-out_err:
-	release_sock(sk);
-	smap_release_sock(psock, sk);
-	return copied ? copied : err;
-accept:
-	rcu_read_unlock();
-	return tcp_sendpage(sk, page, offset, size, flags);
-}
-
-static void bpf_tcp_msg_add(struct smap_psock *psock,
-			    struct sock *sk,
-			    struct bpf_prog *tx_msg)
-{
-	struct bpf_prog *orig_tx_msg;
-
-	orig_tx_msg = xchg(&psock->bpf_tx_msg, tx_msg);
-	if (orig_tx_msg)
-		bpf_prog_put(orig_tx_msg);
-}
-
-static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
-{
-	struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict);
-	int rc;
-
-	if (unlikely(!prog))
-		return __SK_DROP;
-
-	skb_orphan(skb);
-	/* We need to ensure that BPF metadata for maps is also cleared
-	 * when we orphan the skb so that we don't have the possibility
-	 * to reference a stale map.
-	 */
-	TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
-	skb->sk = psock->sock;
-	bpf_compute_data_end_sk_skb(skb);
-	preempt_disable();
-	rc = (*prog->bpf_func)(skb, prog->insnsi);
-	preempt_enable();
-	skb->sk = NULL;
-
-	/* Moving return codes from UAPI namespace into internal namespace */
-	return rc == SK_PASS ?
-		(TCP_SKB_CB(skb)->bpf.sk_redir ? __SK_REDIRECT : __SK_PASS) :
-		__SK_DROP;
-}
-
-static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
-{
-	struct sock *sk = psock->sock;
-	int copied = 0, num_sg;
-	struct sk_msg_buff *r;
-
-	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
-	if (unlikely(!r))
-		return -EAGAIN;
-
-	if (!sk_rmem_schedule(sk, skb, skb->len)) {
-		kfree(r);
-		return -EAGAIN;
-	}
-
-	sg_init_table(r->sg_data, MAX_SKB_FRAGS);
-	num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
-	if (unlikely(num_sg < 0)) {
-		kfree(r);
-		return num_sg;
-	}
-	sk_mem_charge(sk, skb->len);
-	copied = skb->len;
-	r->sg_start = 0;
-	r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
-	r->skb = skb;
-	list_add_tail(&r->list, &psock->ingress);
-	sk->sk_data_ready(sk);
-	return copied;
-}
-
-static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
-{
-	struct smap_psock *peer;
-	struct sock *sk;
-	__u32 in;
-	int rc;
-
-	rc = smap_verdict_func(psock, skb);
-	switch (rc) {
-	case __SK_REDIRECT:
-		sk = do_sk_redirect_map(skb);
-		if (!sk) {
-			kfree_skb(skb);
-			break;
-		}
-
-		peer = smap_psock_sk(sk);
-		in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
-
-		if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
-			     !test_bit(SMAP_TX_RUNNING, &peer->state))) {
-			kfree_skb(skb);
-			break;
-		}
-
-		if (!in && sock_writeable(sk)) {
-			skb_set_owner_w(skb, sk);
-			skb_queue_tail(&peer->rxqueue, skb);
-			schedule_work(&peer->tx_work);
-			break;
-		} else if (in &&
-			   atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-			skb_queue_tail(&peer->rxqueue, skb);
-			schedule_work(&peer->tx_work);
-			break;
-		}
-	/* Fall through and free skb otherwise */
-	case __SK_DROP:
-	default:
-		kfree_skb(skb);
-	}
-}
-
-static void smap_report_sk_error(struct smap_psock *psock, int err)
-{
-	struct sock *sk = psock->sock;
-
-	sk->sk_err = err;
-	sk->sk_error_report(sk);
-}
-
-static void smap_read_sock_strparser(struct strparser *strp,
-				     struct sk_buff *skb)
-{
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = container_of(strp, struct smap_psock, strp);
-	smap_do_verdict(psock, skb);
-	rcu_read_unlock();
-}
-
-/* Called with lock held on socket */
-static void smap_data_ready(struct sock *sk)
-{
-	struct smap_psock *psock;
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (likely(psock)) {
-		write_lock_bh(&sk->sk_callback_lock);
-		strp_data_ready(&psock->strp);
-		write_unlock_bh(&sk->sk_callback_lock);
-	}
-	rcu_read_unlock();
-}
-
-static void smap_tx_work(struct work_struct *w)
-{
-	struct smap_psock *psock;
-	struct sk_buff *skb;
-	int rem, off, n;
-
-	psock = container_of(w, struct smap_psock, tx_work);
-
-	/* lock sock to avoid losing sk_socket at some point during loop */
-	lock_sock(psock->sock);
-	if (psock->save_skb) {
-		skb = psock->save_skb;
-		rem = psock->save_rem;
-		off = psock->save_off;
-		psock->save_skb = NULL;
-		goto start;
-	}
-
-	while ((skb = skb_dequeue(&psock->rxqueue))) {
-		__u32 flags;
-
-		rem = skb->len;
-		off = 0;
-start:
-		flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
-		do {
-			if (likely(psock->sock->sk_socket)) {
-				if (flags)
-					n = smap_do_ingress(psock, skb);
-				else
-					n = skb_send_sock_locked(psock->sock,
-								 skb, off, rem);
-			} else {
-				n = -EINVAL;
-			}
-
-			if (n <= 0) {
-				if (n == -EAGAIN) {
-					/* Retry when space is available */
-					psock->save_skb = skb;
-					psock->save_rem = rem;
-					psock->save_off = off;
-					goto out;
-				}
-				/* Hard errors break pipe and stop xmit */
-				smap_report_sk_error(psock, n ? -n : EPIPE);
-				clear_bit(SMAP_TX_RUNNING, &psock->state);
-				kfree_skb(skb);
-				goto out;
-			}
-			rem -= n;
-			off += n;
-		} while (rem);
-
-		if (!flags)
-			kfree_skb(skb);
-	}
-out:
-	release_sock(psock->sock);
-}
-
-static void smap_write_space(struct sock *sk)
-{
-	struct smap_psock *psock;
-	void (*write_space)(struct sock *sk);
-
-	rcu_read_lock();
-	psock = smap_psock_sk(sk);
-	if (likely(psock && test_bit(SMAP_TX_RUNNING, &psock->state)))
-		schedule_work(&psock->tx_work);
-	write_space = psock->save_write_space;
-	rcu_read_unlock();
-	write_space(sk);
-}
-
-static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
-{
-	if (!psock->strp_enabled)
-		return;
-	sk->sk_data_ready = psock->save_data_ready;
-	sk->sk_write_space = psock->save_write_space;
-	psock->save_data_ready = NULL;
-	psock->save_write_space = NULL;
-	strp_stop(&psock->strp);
-	psock->strp_enabled = false;
-}
-
-static void smap_destroy_psock(struct rcu_head *rcu)
-{
-	struct smap_psock *psock = container_of(rcu,
-						  struct smap_psock, rcu);
-
-	/* Now that a grace period has passed there is no longer
-	 * any reference to this sock in the sockmap so we can
-	 * destroy the psock, strparser, and bpf programs. But,
-	 * because we use workqueue sync operations we can not
-	 * do it in rcu context
-	 */
-	schedule_work(&psock->gc_work);
-}
-
-static bool psock_is_smap_sk(struct sock *sk)
-{
-	return inet_csk(sk)->icsk_ulp_ops == &bpf_tcp_ulp_ops;
-}
-
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
-{
-	if (refcount_dec_and_test(&psock->refcnt)) {
-		if (psock_is_smap_sk(sock))
-			bpf_tcp_release(sock);
-		write_lock_bh(&sock->sk_callback_lock);
-		smap_stop_sock(psock, sock);
-		write_unlock_bh(&sock->sk_callback_lock);
-		clear_bit(SMAP_TX_RUNNING, &psock->state);
-		rcu_assign_sk_user_data(sock, NULL);
-		call_rcu_sched(&psock->rcu, smap_destroy_psock);
-	}
-}
-
-static int smap_parse_func_strparser(struct strparser *strp,
-				       struct sk_buff *skb)
-{
-	struct smap_psock *psock;
-	struct bpf_prog *prog;
-	int rc;
-
-	rcu_read_lock();
-	psock = container_of(strp, struct smap_psock, strp);
-	prog = READ_ONCE(psock->bpf_parse);
-
-	if (unlikely(!prog)) {
-		rcu_read_unlock();
-		return skb->len;
-	}
-
-	/* Attach socket for bpf program to use if needed we can do this
-	 * because strparser clones the skb before handing it to a upper
-	 * layer, meaning skb_orphan has been called. We NULL sk on the
-	 * way out to ensure we don't trigger a BUG_ON in skb/sk operations
-	 * later and because we are not charging the memory of this skb to
-	 * any socket yet.
-	 */
-	skb->sk = psock->sock;
-	bpf_compute_data_end_sk_skb(skb);
-	rc = (*prog->bpf_func)(skb, prog->insnsi);
-	skb->sk = NULL;
-	rcu_read_unlock();
-	return rc;
-}
-
-static int smap_read_sock_done(struct strparser *strp, int err)
-{
-	return err;
-}
-
-static int smap_init_sock(struct smap_psock *psock,
-			  struct sock *sk)
-{
-	static const struct strp_callbacks cb = {
-		.rcv_msg = smap_read_sock_strparser,
-		.parse_msg = smap_parse_func_strparser,
-		.read_sock_done = smap_read_sock_done,
-	};
-
-	return strp_init(&psock->strp, sk, &cb);
-}
-
-static void smap_init_progs(struct smap_psock *psock,
-			    struct bpf_prog *verdict,
-			    struct bpf_prog *parse)
-{
-	struct bpf_prog *orig_parse, *orig_verdict;
-
-	orig_parse = xchg(&psock->bpf_parse, parse);
-	orig_verdict = xchg(&psock->bpf_verdict, verdict);
-
-	if (orig_verdict)
-		bpf_prog_put(orig_verdict);
-	if (orig_parse)
-		bpf_prog_put(orig_parse);
-}
-
-static void smap_start_sock(struct smap_psock *psock, struct sock *sk)
-{
-	if (sk->sk_data_ready == smap_data_ready)
-		return;
-	psock->save_data_ready = sk->sk_data_ready;
-	psock->save_write_space = sk->sk_write_space;
-	sk->sk_data_ready = smap_data_ready;
-	sk->sk_write_space = smap_write_space;
-	psock->strp_enabled = true;
-}
-
-static void sock_map_remove_complete(struct bpf_stab *stab)
-{
-	bpf_map_area_free(stab->sock_map);
-	kfree(stab);
-}
-
-static void smap_gc_work(struct work_struct *w)
-{
-	struct smap_psock_map_entry *e, *tmp;
-	struct sk_msg_buff *md, *mtmp;
-	struct smap_psock *psock;
-
-	psock = container_of(w, struct smap_psock, gc_work);
-
-	/* no callback lock needed because we already detached sockmap ops */
-	if (psock->strp_enabled)
-		strp_done(&psock->strp);
-
-	cancel_work_sync(&psock->tx_work);
-	__skb_queue_purge(&psock->rxqueue);
-
-	/* At this point all strparser and xmit work must be complete */
-	if (psock->bpf_parse)
-		bpf_prog_put(psock->bpf_parse);
-	if (psock->bpf_verdict)
-		bpf_prog_put(psock->bpf_verdict);
-	if (psock->bpf_tx_msg)
-		bpf_prog_put(psock->bpf_tx_msg);
-
-	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork, true);
-		kfree(psock->cork);
-	}
-
-	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
-		list_del(&md->list);
-		free_start_sg(psock->sock, md, true);
-		kfree(md);
-	}
-
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		list_del(&e->list);
-		kfree(e);
-	}
-
-	if (psock->sk_redir)
-		sock_put(psock->sk_redir);
-
-	sock_put(psock->sock);
-	kfree(psock);
-}
-
-static struct smap_psock *smap_init_psock(struct sock *sock, int node)
-{
-	struct smap_psock *psock;
-
-	psock = kzalloc_node(sizeof(struct smap_psock),
-			     GFP_ATOMIC | __GFP_NOWARN,
-			     node);
-	if (!psock)
-		return ERR_PTR(-ENOMEM);
-
-	psock->eval =  __SK_NONE;
-	psock->sock = sock;
-	skb_queue_head_init(&psock->rxqueue);
-	INIT_WORK(&psock->tx_work, smap_tx_work);
-	INIT_WORK(&psock->gc_work, smap_gc_work);
-	INIT_LIST_HEAD(&psock->maps);
-	INIT_LIST_HEAD(&psock->ingress);
-	refcount_set(&psock->refcnt, 1);
-	spin_lock_init(&psock->maps_lock);
-
-	rcu_assign_sk_user_data(sock, psock);
-	sock_hold(sock);
-	return psock;
-}
-
-static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
-{
-	struct bpf_stab *stab;
-	u64 cost;
-	int err;
-
-	if (!capable(CAP_NET_ADMIN))
-		return ERR_PTR(-EPERM);
-
-	/* check sanity of attributes */
-	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
-		return ERR_PTR(-EINVAL);
-
-	stab = kzalloc(sizeof(*stab), GFP_USER);
-	if (!stab)
-		return ERR_PTR(-ENOMEM);
-
-	bpf_map_init_from_attr(&stab->map, attr);
-	raw_spin_lock_init(&stab->lock);
-
-	/* make sure page count doesn't overflow */
-	cost = (u64) stab->map.max_entries * sizeof(struct sock *);
-	err = -EINVAL;
-	if (cost >= U32_MAX - PAGE_SIZE)
-		goto free_stab;
-
-	stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
-
-	/* if map size is larger than memlock limit, reject it early */
-	err = bpf_map_precharge_memlock(stab->map.pages);
-	if (err)
-		goto free_stab;
-
-	err = -ENOMEM;
-	stab->sock_map = bpf_map_area_alloc(stab->map.max_entries *
-					    sizeof(struct sock *),
-					    stab->map.numa_node);
-	if (!stab->sock_map)
-		goto free_stab;
-
-	return &stab->map;
-free_stab:
-	kfree(stab);
-	return ERR_PTR(err);
-}
-
-static void smap_list_map_remove(struct smap_psock *psock,
-				 struct sock **entry)
-{
-	struct smap_psock_map_entry *e, *tmp;
-
-	spin_lock_bh(&psock->maps_lock);
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		if (e->entry == entry) {
-			list_del(&e->list);
-			kfree(e);
-		}
-	}
-	spin_unlock_bh(&psock->maps_lock);
-}
-
-static void smap_list_hash_remove(struct smap_psock *psock,
-				  struct htab_elem *hash_link)
-{
-	struct smap_psock_map_entry *e, *tmp;
-
-	spin_lock_bh(&psock->maps_lock);
-	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-		struct htab_elem *c = rcu_dereference(e->hash_link);
-
-		if (c == hash_link) {
-			list_del(&e->list);
-			kfree(e);
-		}
-	}
-	spin_unlock_bh(&psock->maps_lock);
-}
-
-static void sock_map_free(struct bpf_map *map)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	int i;
-
-	synchronize_rcu();
-
-	/* At this point no update, lookup or delete operations can happen.
-	 * However, be aware we can still get a socket state event updates,
-	 * and data ready callabacks that reference the psock from sk_user_data
-	 * Also psock worker threads are still in-flight. So smap_release_sock
-	 * will only free the psock after cancel_sync on the worker threads
-	 * and a grace period expire to ensure psock is really safe to remove.
-	 */
-	rcu_read_lock();
-	raw_spin_lock_bh(&stab->lock);
-	for (i = 0; i < stab->map.max_entries; i++) {
-		struct smap_psock *psock;
-		struct sock *sock;
-
-		sock = stab->sock_map[i];
-		if (!sock)
-			continue;
-		stab->sock_map[i] = NULL;
-		psock = smap_psock_sk(sock);
-		/* This check handles a racing sock event that can get the
-		 * sk_callback_lock before this case but after xchg happens
-		 * causing the refcnt to hit zero and sock user data (psock)
-		 * to be null and queued for garbage collection.
-		 */
-		if (likely(psock)) {
-			smap_list_map_remove(psock, &stab->sock_map[i]);
-			smap_release_sock(psock, sock);
-		}
-	}
-	raw_spin_unlock_bh(&stab->lock);
-	rcu_read_unlock();
-
-	sock_map_remove_complete(stab);
-}
-
-static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	u32 i = key ? *(u32 *)key : U32_MAX;
-	u32 *next = (u32 *)next_key;
-
-	if (i >= stab->map.max_entries) {
-		*next = 0;
-		return 0;
-	}
-
-	if (i == stab->map.max_entries - 1)
-		return -ENOENT;
-
-	*next = i + 1;
-	return 0;
-}
-
-struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-
-	if (key >= map->max_entries)
-		return NULL;
-
-	return READ_ONCE(stab->sock_map[key]);
-}
-
-static int sock_map_delete_elem(struct bpf_map *map, void *key)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	struct smap_psock *psock;
-	int k = *(u32 *)key;
-	struct sock *sock;
-
-	if (k >= map->max_entries)
-		return -EINVAL;
-
-	raw_spin_lock_bh(&stab->lock);
-	sock = stab->sock_map[k];
-	stab->sock_map[k] = NULL;
-	raw_spin_unlock_bh(&stab->lock);
-	if (!sock)
-		return -EINVAL;
-
-	psock = smap_psock_sk(sock);
-	if (!psock)
-		return 0;
-	if (psock->bpf_parse) {
-		write_lock_bh(&sock->sk_callback_lock);
-		smap_stop_sock(psock, sock);
-		write_unlock_bh(&sock->sk_callback_lock);
-	}
-	smap_list_map_remove(psock, &stab->sock_map[k]);
-	smap_release_sock(psock, sock);
-	return 0;
-}
-
-/* Locking notes: Concurrent updates, deletes, and lookups are allowed and are
- * done inside rcu critical sections. This ensures on updates that the psock
- * will not be released via smap_release_sock() until concurrent updates/deletes
- * complete. All operations operate on sock_map using cmpxchg and xchg
- * operations to ensure we do not get stale references. Any reads into the
- * map must be done with READ_ONCE() because of this.
- *
- * A psock is destroyed via call_rcu and after any worker threads are cancelled
- * and syncd so we are certain all references from the update/lookup/delete
- * operations as well as references in the data path are no longer in use.
- *
- * Psocks may exist in multiple maps, but only a single set of parse/verdict
- * programs may be inherited from the maps it belongs to. A reference count
- * is kept with the total number of references to the psock from all maps. The
- * psock will not be released until this reaches zero. The psock and sock
- * user data data use the sk_callback_lock to protect critical data structures
- * from concurrent access. This allows us to avoid two updates from modifying
- * the user data in sock and the lock is required anyways for modifying
- * callbacks, we simply increase its scope slightly.
- *
- * Rules to follow,
- *  - psock must always be read inside RCU critical section
- *  - sk_user_data must only be modified inside sk_callback_lock and read
- *    inside RCU critical section.
- *  - psock->maps list must only be read & modified inside sk_callback_lock
- *  - sock_map must use READ_ONCE and (cmp)xchg operations
- *  - BPF verdict/parse programs must use READ_ONCE and xchg operations
- */
-
-static int __sock_map_ctx_update_elem(struct bpf_map *map,
-				      struct bpf_sock_progs *progs,
-				      struct sock *sock,
-				      void *key)
-{
-	struct bpf_prog *verdict, *parse, *tx_msg;
-	struct smap_psock *psock;
-	bool new = false;
-	int err = 0;
-
-	/* 1. If sock map has BPF programs those will be inherited by the
-	 * sock being added. If the sock is already attached to BPF programs
-	 * this results in an error.
-	 */
-	verdict = READ_ONCE(progs->bpf_verdict);
-	parse = READ_ONCE(progs->bpf_parse);
-	tx_msg = READ_ONCE(progs->bpf_tx_msg);
-
-	if (parse && verdict) {
-		/* bpf prog refcnt may be zero if a concurrent attach operation
-		 * removes the program after the above READ_ONCE() but before
-		 * we increment the refcnt. If this is the case abort with an
-		 * error.
-		 */
-		verdict = bpf_prog_inc_not_zero(verdict);
-		if (IS_ERR(verdict))
-			return PTR_ERR(verdict);
-
-		parse = bpf_prog_inc_not_zero(parse);
-		if (IS_ERR(parse)) {
-			bpf_prog_put(verdict);
-			return PTR_ERR(parse);
-		}
-	}
-
-	if (tx_msg) {
-		tx_msg = bpf_prog_inc_not_zero(tx_msg);
-		if (IS_ERR(tx_msg)) {
-			if (parse && verdict) {
-				bpf_prog_put(parse);
-				bpf_prog_put(verdict);
-			}
-			return PTR_ERR(tx_msg);
-		}
-	}
-
-	psock = smap_psock_sk(sock);
-
-	/* 2. Do not allow inheriting programs if psock exists and has
-	 * already inherited programs. This would create confusion on
-	 * which parser/verdict program is running. If no psock exists
-	 * create one. Inside sk_callback_lock to ensure concurrent create
-	 * doesn't update user data.
-	 */
-	if (psock) {
-		if (!psock_is_smap_sk(sock)) {
-			err = -EBUSY;
-			goto out_progs;
-		}
-		if (READ_ONCE(psock->bpf_parse) && parse) {
-			err = -EBUSY;
-			goto out_progs;
-		}
-		if (READ_ONCE(psock->bpf_tx_msg) && tx_msg) {
-			err = -EBUSY;
-			goto out_progs;
-		}
-		if (!refcount_inc_not_zero(&psock->refcnt)) {
-			err = -EAGAIN;
-			goto out_progs;
-		}
-	} else {
-		psock = smap_init_psock(sock, map->numa_node);
-		if (IS_ERR(psock)) {
-			err = PTR_ERR(psock);
-			goto out_progs;
-		}
-
-		set_bit(SMAP_TX_RUNNING, &psock->state);
-		new = true;
-	}
-
-	/* 3. At this point we have a reference to a valid psock that is
-	 * running. Attach any BPF programs needed.
-	 */
-	if (tx_msg)
-		bpf_tcp_msg_add(psock, sock, tx_msg);
-	if (new) {
-		err = bpf_tcp_init(sock);
-		if (err)
-			goto out_free;
-	}
-
-	if (parse && verdict && !psock->strp_enabled) {
-		err = smap_init_sock(psock, sock);
-		if (err)
-			goto out_free;
-		smap_init_progs(psock, verdict, parse);
-		write_lock_bh(&sock->sk_callback_lock);
-		smap_start_sock(psock, sock);
-		write_unlock_bh(&sock->sk_callback_lock);
-	}
-
-	return err;
-out_free:
-	smap_release_sock(psock, sock);
-out_progs:
-	if (parse && verdict) {
-		bpf_prog_put(parse);
-		bpf_prog_put(verdict);
-	}
-	if (tx_msg)
-		bpf_prog_put(tx_msg);
-	return err;
-}
-
-static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
-				    struct bpf_map *map,
-				    void *key, u64 flags)
-{
-	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-	struct bpf_sock_progs *progs = &stab->progs;
-	struct sock *osock, *sock = skops->sk;
-	struct smap_psock_map_entry *e;
-	struct smap_psock *psock;
-	u32 i = *(u32 *)key;
-	int err;
-
-	if (unlikely(flags > BPF_EXIST))
-		return -EINVAL;
-	if (unlikely(i >= stab->map.max_entries))
-		return -E2BIG;
-
-	e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
-	if (!e)
-		return -ENOMEM;
-
-	err = __sock_map_ctx_update_elem(map, progs, sock, key);
-	if (err)
-		goto out;
-
-	/* psock guaranteed to be present. */
-	psock = smap_psock_sk(sock);
-	raw_spin_lock_bh(&stab->lock);
-	osock = stab->sock_map[i];
-	if (osock && flags == BPF_NOEXIST) {
-		err = -EEXIST;
-		goto out_unlock;
-	}
-	if (!osock && flags == BPF_EXIST) {
-		err = -ENOENT;
-		goto out_unlock;
-	}
-
-	e->entry = &stab->sock_map[i];
-	e->map = map;
-	spin_lock_bh(&psock->maps_lock);
-	list_add_tail(&e->list, &psock->maps);
-	spin_unlock_bh(&psock->maps_lock);
-
-	stab->sock_map[i] = sock;
-	if (osock) {
-		psock = smap_psock_sk(osock);
-		smap_list_map_remove(psock, &stab->sock_map[i]);
-		smap_release_sock(psock, osock);
-	}
-	raw_spin_unlock_bh(&stab->lock);
-	return 0;
-out_unlock:
-	smap_release_sock(psock, sock);
-	raw_spin_unlock_bh(&stab->lock);
-out:
-	kfree(e);
-	return err;
-}
-
-int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
-{
-	struct bpf_sock_progs *progs;
-	struct bpf_prog *orig;
-
-	if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
-		struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-
-		progs = &stab->progs;
-	} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH) {
-		struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-
-		progs = &htab->progs;
-	} else {
-		return -EINVAL;
-	}
-
-	switch (type) {
-	case BPF_SK_MSG_VERDICT:
-		orig = xchg(&progs->bpf_tx_msg, prog);
-		break;
-	case BPF_SK_SKB_STREAM_PARSER:
-		orig = xchg(&progs->bpf_parse, prog);
-		break;
-	case BPF_SK_SKB_STREAM_VERDICT:
-		orig = xchg(&progs->bpf_verdict, prog);
-		break;
-	default:
-		return -EOPNOTSUPP;
-	}
-
-	if (orig)
-		bpf_prog_put(orig);
-
-	return 0;
-}
-
-int sockmap_get_from_fd(const union bpf_attr *attr, int type,
-			struct bpf_prog *prog)
-{
-	int ufd = attr->target_fd;
-	struct bpf_map *map;
-	struct fd f;
-	int err;
-
-	f = fdget(ufd);
-	map = __bpf_map_get(f);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
-
-	err = sock_map_prog(map, prog, attr->attach_type);
-	fdput(f);
-	return err;
-}
-
-static void *sock_map_lookup(struct bpf_map *map, void *key)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static int sock_map_update_elem(struct bpf_map *map,
-				void *key, void *value, u64 flags)
-{
-	struct bpf_sock_ops_kern skops;
-	u32 fd = *(u32 *)value;
-	struct socket *socket;
-	int err;
-
-	socket = sockfd_lookup(fd, &err);
-	if (!socket)
-		return err;
-
-	skops.sk = socket->sk;
-	if (!skops.sk) {
-		fput(socket->file);
-		return -EINVAL;
-	}
-
-	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
-	 * state.
-	 */
-	if (skops.sk->sk_type != SOCK_STREAM ||
-	    skops.sk->sk_protocol != IPPROTO_TCP ||
-	    skops.sk->sk_state != TCP_ESTABLISHED) {
-		fput(socket->file);
-		return -EOPNOTSUPP;
-	}
-
-	lock_sock(skops.sk);
-	preempt_disable();
-	rcu_read_lock();
-	err = sock_map_ctx_update_elem(&skops, map, key, flags);
-	rcu_read_unlock();
-	preempt_enable();
-	release_sock(skops.sk);
-	fput(socket->file);
-	return err;
-}
-
-static void sock_map_release(struct bpf_map *map)
-{
-	struct bpf_sock_progs *progs;
-	struct bpf_prog *orig;
-
-	if (map->map_type == BPF_MAP_TYPE_SOCKMAP) {
-		struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
-
-		progs = &stab->progs;
-	} else {
-		struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-
-		progs = &htab->progs;
-	}
-
-	orig = xchg(&progs->bpf_parse, NULL);
-	if (orig)
-		bpf_prog_put(orig);
-	orig = xchg(&progs->bpf_verdict, NULL);
-	if (orig)
-		bpf_prog_put(orig);
-
-	orig = xchg(&progs->bpf_tx_msg, NULL);
-	if (orig)
-		bpf_prog_put(orig);
-}
-
-static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
-{
-	struct bpf_htab *htab;
-	int i, err;
-	u64 cost;
-
-	if (!capable(CAP_NET_ADMIN))
-		return ERR_PTR(-EPERM);
-
-	/* check sanity of attributes */
-	if (attr->max_entries == 0 ||
-	    attr->key_size == 0 ||
-	    attr->value_size != 4 ||
-	    attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
-		return ERR_PTR(-EINVAL);
-
-	if (attr->key_size > MAX_BPF_STACK)
-		/* eBPF programs initialize keys on stack, so they cannot be
-		 * larger than max stack size
-		 */
-		return ERR_PTR(-E2BIG);
-
-	htab = kzalloc(sizeof(*htab), GFP_USER);
-	if (!htab)
-		return ERR_PTR(-ENOMEM);
-
-	bpf_map_init_from_attr(&htab->map, attr);
-
-	htab->n_buckets = roundup_pow_of_two(htab->map.max_entries);
-	htab->elem_size = sizeof(struct htab_elem) +
-			  round_up(htab->map.key_size, 8);
-	err = -EINVAL;
-	if (htab->n_buckets == 0 ||
-	    htab->n_buckets > U32_MAX / sizeof(struct bucket))
-		goto free_htab;
-
-	cost = (u64) htab->n_buckets * sizeof(struct bucket) +
-	       (u64) htab->elem_size * htab->map.max_entries;
-
-	if (cost >= U32_MAX - PAGE_SIZE)
-		goto free_htab;
-
-	htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
-	err = bpf_map_precharge_memlock(htab->map.pages);
-	if (err)
-		goto free_htab;
-
-	err = -ENOMEM;
-	htab->buckets = bpf_map_area_alloc(
-				htab->n_buckets * sizeof(struct bucket),
-				htab->map.numa_node);
-	if (!htab->buckets)
-		goto free_htab;
-
-	for (i = 0; i < htab->n_buckets; i++) {
-		INIT_HLIST_HEAD(&htab->buckets[i].head);
-		raw_spin_lock_init(&htab->buckets[i].lock);
-	}
-
-	return &htab->map;
-free_htab:
-	kfree(htab);
-	return ERR_PTR(err);
-}
-
-static void __bpf_htab_free(struct rcu_head *rcu)
-{
-	struct bpf_htab *htab;
-
-	htab = container_of(rcu, struct bpf_htab, rcu);
-	bpf_map_area_free(htab->buckets);
-	kfree(htab);
-}
-
-static void sock_hash_free(struct bpf_map *map)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	int i;
-
-	synchronize_rcu();
-
-	/* At this point no update, lookup or delete operations can happen.
-	 * However, be aware we can still get a socket state event updates,
-	 * and data ready callabacks that reference the psock from sk_user_data
-	 * Also psock worker threads are still in-flight. So smap_release_sock
-	 * will only free the psock after cancel_sync on the worker threads
-	 * and a grace period expire to ensure psock is really safe to remove.
-	 */
-	rcu_read_lock();
-	for (i = 0; i < htab->n_buckets; i++) {
-		struct bucket *b = __select_bucket(htab, i);
-		struct hlist_head *head;
-		struct hlist_node *n;
-		struct htab_elem *l;
-
-		raw_spin_lock_bh(&b->lock);
-		head = &b->head;
-		hlist_for_each_entry_safe(l, n, head, hash_node) {
-			struct sock *sock = l->sk;
-			struct smap_psock *psock;
-
-			hlist_del_rcu(&l->hash_node);
-			psock = smap_psock_sk(sock);
-			/* This check handles a racing sock event that can get
-			 * the sk_callback_lock before this case but after xchg
-			 * causing the refcnt to hit zero and sock user data
-			 * (psock) to be null and queued for garbage collection.
-			 */
-			if (likely(psock)) {
-				smap_list_hash_remove(psock, l);
-				smap_release_sock(psock, sock);
-			}
-			free_htab_elem(htab, l);
-		}
-		raw_spin_unlock_bh(&b->lock);
-	}
-	rcu_read_unlock();
-	call_rcu(&htab->rcu, __bpf_htab_free);
-}
-
-static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
-					      void *key, u32 key_size, u32 hash,
-					      struct sock *sk,
-					      struct htab_elem *old_elem)
-{
-	struct htab_elem *l_new;
-
-	if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
-		if (!old_elem) {
-			atomic_dec(&htab->count);
-			return ERR_PTR(-E2BIG);
-		}
-	}
-	l_new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
-			     htab->map.numa_node);
-	if (!l_new) {
-		atomic_dec(&htab->count);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	memcpy(l_new->key, key, key_size);
-	l_new->sk = sk;
-	l_new->hash = hash;
-	return l_new;
-}
-
-static inline u32 htab_map_hash(const void *key, u32 key_len)
-{
-	return jhash(key, key_len, 0);
-}
-
-static int sock_hash_get_next_key(struct bpf_map *map,
-				  void *key, void *next_key)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct htab_elem *l, *next_l;
-	struct hlist_head *h;
-	u32 hash, key_size;
-	int i = 0;
-
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	key_size = map->key_size;
-	if (!key)
-		goto find_first_elem;
-	hash = htab_map_hash(key, key_size);
-	h = select_bucket(htab, hash);
-
-	l = lookup_elem_raw(h, hash, key, key_size);
-	if (!l)
-		goto find_first_elem;
-	next_l = hlist_entry_safe(
-		     rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
-		     struct htab_elem, hash_node);
-	if (next_l) {
-		memcpy(next_key, next_l->key, key_size);
-		return 0;
-	}
-
-	/* no more elements in this hash list, go to the next bucket */
-	i = hash & (htab->n_buckets - 1);
-	i++;
-
-find_first_elem:
-	/* iterate over buckets */
-	for (; i < htab->n_buckets; i++) {
-		h = select_bucket(htab, i);
-
-		/* pick first element in the bucket */
-		next_l = hlist_entry_safe(
-				rcu_dereference_raw(hlist_first_rcu(h)),
-				struct htab_elem, hash_node);
-		if (next_l) {
-			/* if it's not empty, just return it */
-			memcpy(next_key, next_l->key, key_size);
-			return 0;
-		}
-	}
-
-	/* iterated over all buckets and all elements */
-	return -ENOENT;
-}
-
-static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
-				     struct bpf_map *map,
-				     void *key, u64 map_flags)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct bpf_sock_progs *progs = &htab->progs;
-	struct htab_elem *l_new = NULL, *l_old;
-	struct smap_psock_map_entry *e = NULL;
-	struct hlist_head *head;
-	struct smap_psock *psock;
-	u32 key_size, hash;
-	struct sock *sock;
-	struct bucket *b;
-	int err;
-
-	sock = skops->sk;
-
-	if (sock->sk_type != SOCK_STREAM ||
-	    sock->sk_protocol != IPPROTO_TCP)
-		return -EOPNOTSUPP;
-
-	if (unlikely(map_flags > BPF_EXIST))
-		return -EINVAL;
-
-	e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
-	if (!e)
-		return -ENOMEM;
-
-	WARN_ON_ONCE(!rcu_read_lock_held());
-	key_size = map->key_size;
-	hash = htab_map_hash(key, key_size);
-	b = __select_bucket(htab, hash);
-	head = &b->head;
-
-	err = __sock_map_ctx_update_elem(map, progs, sock, key);
-	if (err)
-		goto err;
-
-	/* psock is valid here because otherwise above *ctx_update_elem would
-	 * have thrown an error. It is safe to skip error check.
-	 */
-	psock = smap_psock_sk(sock);
-	raw_spin_lock_bh(&b->lock);
-	l_old = lookup_elem_raw(head, hash, key, key_size);
-	if (l_old && map_flags == BPF_NOEXIST) {
-		err = -EEXIST;
-		goto bucket_err;
-	}
-	if (!l_old && map_flags == BPF_EXIST) {
-		err = -ENOENT;
-		goto bucket_err;
-	}
-
-	l_new = alloc_sock_hash_elem(htab, key, key_size, hash, sock, l_old);
-	if (IS_ERR(l_new)) {
-		err = PTR_ERR(l_new);
-		goto bucket_err;
-	}
-
-	rcu_assign_pointer(e->hash_link, l_new);
-	e->map = map;
-	spin_lock_bh(&psock->maps_lock);
-	list_add_tail(&e->list, &psock->maps);
-	spin_unlock_bh(&psock->maps_lock);
-
-	/* add new element to the head of the list, so that
-	 * concurrent search will find it before old elem
-	 */
-	hlist_add_head_rcu(&l_new->hash_node, head);
-	if (l_old) {
-		psock = smap_psock_sk(l_old->sk);
-
-		hlist_del_rcu(&l_old->hash_node);
-		smap_list_hash_remove(psock, l_old);
-		smap_release_sock(psock, l_old->sk);
-		free_htab_elem(htab, l_old);
-	}
-	raw_spin_unlock_bh(&b->lock);
-	return 0;
-bucket_err:
-	smap_release_sock(psock, sock);
-	raw_spin_unlock_bh(&b->lock);
-err:
-	kfree(e);
-	return err;
-}
-
-static int sock_hash_update_elem(struct bpf_map *map,
-				void *key, void *value, u64 flags)
-{
-	struct bpf_sock_ops_kern skops;
-	u32 fd = *(u32 *)value;
-	struct socket *socket;
-	int err;
-
-	socket = sockfd_lookup(fd, &err);
-	if (!socket)
-		return err;
-
-	skops.sk = socket->sk;
-	if (!skops.sk) {
-		fput(socket->file);
-		return -EINVAL;
-	}
-
-	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
-	 * state.
-	 */
-	if (skops.sk->sk_type != SOCK_STREAM ||
-	    skops.sk->sk_protocol != IPPROTO_TCP ||
-	    skops.sk->sk_state != TCP_ESTABLISHED) {
-		fput(socket->file);
-		return -EOPNOTSUPP;
-	}
-
-	lock_sock(skops.sk);
-	preempt_disable();
-	rcu_read_lock();
-	err = sock_hash_ctx_update_elem(&skops, map, key, flags);
-	rcu_read_unlock();
-	preempt_enable();
-	release_sock(skops.sk);
-	fput(socket->file);
-	return err;
-}
-
-static int sock_hash_delete_elem(struct bpf_map *map, void *key)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
-	struct bucket *b;
-	struct htab_elem *l;
-	u32 hash, key_size;
-	int ret = -ENOENT;
-
-	key_size = map->key_size;
-	hash = htab_map_hash(key, key_size);
-	b = __select_bucket(htab, hash);
-	head = &b->head;
-
-	raw_spin_lock_bh(&b->lock);
-	l = lookup_elem_raw(head, hash, key, key_size);
-	if (l) {
-		struct sock *sock = l->sk;
-		struct smap_psock *psock;
-
-		hlist_del_rcu(&l->hash_node);
-		psock = smap_psock_sk(sock);
-		/* This check handles a racing sock event that can get the
-		 * sk_callback_lock before this case but after xchg happens
-		 * causing the refcnt to hit zero and sock user data (psock)
-		 * to be null and queued for garbage collection.
-		 */
-		if (likely(psock)) {
-			smap_list_hash_remove(psock, l);
-			smap_release_sock(psock, sock);
-		}
-		free_htab_elem(htab, l);
-		ret = 0;
-	}
-	raw_spin_unlock_bh(&b->lock);
-	return ret;
-}
-
-struct sock  *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
-{
-	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
-	struct hlist_head *head;
-	struct htab_elem *l;
-	u32 key_size, hash;
-	struct bucket *b;
-	struct sock *sk;
-
-	key_size = map->key_size;
-	hash = htab_map_hash(key, key_size);
-	b = __select_bucket(htab, hash);
-	head = &b->head;
-
-	l = lookup_elem_raw(head, hash, key, key_size);
-	sk = l ? l->sk : NULL;
-	return sk;
-}
-
-const struct bpf_map_ops sock_map_ops = {
-	.map_alloc = sock_map_alloc,
-	.map_free = sock_map_free,
-	.map_lookup_elem = sock_map_lookup,
-	.map_get_next_key = sock_map_get_next_key,
-	.map_update_elem = sock_map_update_elem,
-	.map_delete_elem = sock_map_delete_elem,
-	.map_release_uref = sock_map_release,
-	.map_check_btf = map_check_no_btf,
-};
-
-const struct bpf_map_ops sock_hash_ops = {
-	.map_alloc = sock_hash_alloc,
-	.map_free = sock_hash_free,
-	.map_lookup_elem = sock_map_lookup,
-	.map_get_next_key = sock_hash_get_next_key,
-	.map_update_elem = sock_hash_update_elem,
-	.map_delete_elem = sock_hash_delete_elem,
-	.map_release_uref = sock_map_release,
-	.map_check_btf = map_check_no_btf,
-};
-
-static bool bpf_is_valid_sock_op(struct bpf_sock_ops_kern *ops)
-{
-	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
-	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
-}
-BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	/* ULPs are currently supported only for TCP sockets in ESTABLISHED
-	 * state. This checks that the sock ops triggering the update is
-	 * one indicating we are (or will be soon) in an ESTABLISHED state.
-	 */
-	if (!bpf_is_valid_sock_op(bpf_sock))
-		return -EOPNOTSUPP;
-	return sock_map_ctx_update_elem(bpf_sock, map, key, flags);
-}
-
-const struct bpf_func_proto bpf_sock_map_update_proto = {
-	.func		= bpf_sock_map_update,
-	.gpl_only	= false,
-	.pkt_access	= true,
-	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_CONST_MAP_PTR,
-	.arg3_type	= ARG_PTR_TO_MAP_KEY,
-	.arg4_type	= ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, bpf_sock,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-
-	if (!bpf_is_valid_sock_op(bpf_sock))
-		return -EOPNOTSUPP;
-	return sock_hash_ctx_update_elem(bpf_sock, map, key, flags);
-}
-
-const struct bpf_func_proto bpf_sock_hash_update_proto = {
-	.func		= bpf_sock_hash_update,
-	.gpl_only	= false,
-	.pkt_access	= true,
-	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_CONST_MAP_PTR,
-	.arg3_type	= ARG_PTR_TO_MAP_KEY,
-	.arg4_type	= ARG_ANYTHING,
-};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 53968f82b919..f4ecd6ed2252 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1664,7 +1664,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	switch (ptype) {
 	case BPF_PROG_TYPE_SK_SKB:
 	case BPF_PROG_TYPE_SK_MSG:
-		ret = sockmap_get_from_fd(attr, ptype, prog);
+		ret = sock_map_get_from_fd(attr, prog);
 		break;
 	case BPF_PROG_TYPE_LIRC_MODE2:
 		ret = lirc_prog_attach(attr, prog);
@@ -1718,10 +1718,10 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
 	case BPF_SK_MSG_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
+		return sock_map_get_from_fd(attr, NULL);
 	case BPF_SK_SKB_STREAM_PARSER:
 	case BPF_SK_SKB_STREAM_VERDICT:
-		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
+		return sock_map_get_from_fd(attr, NULL);
 	case BPF_LIRC_MODE2:
 		return lirc_prog_detach(attr);
 	case BPF_FLOW_DISSECTOR:
diff --git a/net/Kconfig b/net/Kconfig
index 228dfa382eec..f235edb593ba 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -300,8 +300,11 @@ config BPF_JIT
 
 config BPF_STREAM_PARSER
 	bool "enable BPF STREAM_PARSER"
+	depends on INET
 	depends on BPF_SYSCALL
+	depends on CGROUP_BPF
 	select STREAM_PARSER
+	select NET_SOCK_MSG
 	---help---
 	 Enabling this allows a stream parser to be used with
 	 BPF_MAP_TYPE_SOCKMAP.
@@ -413,6 +416,14 @@ config GRO_CELLS
 config SOCK_VALIDATE_XMIT
 	bool
 
+config NET_SOCK_MSG
+	bool
+	default n
+	help
+	  The NET_SOCK_MSG provides a framework for plain sockets (e.g. TCP) or
+	  ULPs (upper layer modules, e.g. TLS) to process L7 application data
+	  with the help of BPF programs.
+
 config NET_DEVLINK
 	tristate "Network physical/parent device Netlink interface"
 	help
diff --git a/net/core/Makefile b/net/core/Makefile
index 80175e6a2eb8..fccd31e0e7f7 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,6 +16,7 @@ obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
 obj-y += net-sysfs.o
 obj-$(CONFIG_PAGE_POOL) += page_pool.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
+obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
@@ -27,6 +28,7 @@ obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
 obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
 obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
 obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
+obj-$(CONFIG_BPF_STREAM_PARSER) += sock_map.o
 obj-$(CONFIG_DST_CACHE) += dst_cache.o
 obj-$(CONFIG_HWBM) += hwbm.o
 obj-$(CONFIG_NET_DEVLINK) += devlink.o
diff --git a/net/core/filter.c b/net/core/filter.c
index b844761b5d4c..0f5260b04bfe 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -38,6 +38,7 @@
 #include <net/protocol.h>
 #include <net/netlink.h>
 #include <linux/skbuff.h>
+#include <linux/skmsg.h>
 #include <net/sock.h>
 #include <net/flow_dissector.h>
 #include <linux/errno.h>
@@ -2142,123 +2143,7 @@ static const struct bpf_func_proto bpf_redirect_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
-	if (!tcb->bpf.sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-static const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
-	.func           = bpf_sk_redirect_hash,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_PTR_TO_MAP_KEY,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
-	   struct bpf_map *, map, u32, key, u64, flags)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	tcb->bpf.flags = flags;
-	tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
-	if (!tcb->bpf.sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-struct sock *do_sk_redirect_map(struct sk_buff *skb)
-{
-	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
-
-	return tcb->bpf.sk_redir;
-}
-
-static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
-	.func           = bpf_sk_redirect_map,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_ANYTHING,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg_buff *, msg,
-	   struct bpf_map *, map, void *, key, u64, flags)
-{
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	msg->flags = flags;
-	msg->sk_redir = __sock_hash_lookup_elem(map, key);
-	if (!msg->sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-static const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
-	.func           = bpf_msg_redirect_hash,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_PTR_TO_MAP_KEY,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
-	   struct bpf_map *, map, u32, key, u64, flags)
-{
-	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags & ~(BPF_F_INGRESS)))
-		return SK_DROP;
-
-	msg->flags = flags;
-	msg->sk_redir = __sock_map_lookup_elem(map, key);
-	if (!msg->sk_redir)
-		return SK_DROP;
-
-	return SK_PASS;
-}
-
-struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
-{
-	return msg->sk_redir;
-}
-
-static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
-	.func           = bpf_msg_redirect_map,
-	.gpl_only       = false,
-	.ret_type       = RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_ANYTHING,
-	.arg4_type      = ARG_ANYTHING,
-};
-
-BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
+BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
 {
 	msg->apply_bytes = bytes;
 	return 0;
@@ -2272,7 +2157,7 @@ static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
+BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
 {
 	msg->cork_bytes = bytes;
 	return 0;
@@ -2286,45 +2171,37 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
-#define sk_msg_iter_var(var)			\
-	do {					\
-		var++;				\
-		if (var == MAX_SKB_FRAGS)	\
-			var = 0;		\
-	} while (0)
-
-BPF_CALL_4(bpf_msg_pull_data,
-	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
+BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
+	   u32, end, u64, flags)
 {
-	unsigned int len = 0, offset = 0, copy = 0, poffset = 0;
-	int bytes = end - start, bytes_sg_total;
-	struct scatterlist *sg = msg->sg_data;
-	int first_sg, last_sg, i, shift;
-	unsigned char *p, *to, *from;
+	u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
+	u32 first_sge, last_sge, i, shift, bytes_sg_total;
+	struct scatterlist *sge;
+	u8 *raw, *to, *from;
 	struct page *page;
 
 	if (unlikely(flags || end <= start))
 		return -EINVAL;
 
 	/* First find the starting scatterlist element */
-	i = msg->sg_start;
+	i = msg->sg.start;
 	do {
-		len = sg[i].length;
+		len = sk_msg_elem(msg, i)->length;
 		if (start < offset + len)
 			break;
 		offset += len;
-		sk_msg_iter_var(i);
-	} while (i != msg->sg_end);
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
 
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
-	first_sg = i;
+	first_sge = i;
 	/* The start may point into the sg element so we need to also
 	 * account for the headroom.
 	 */
 	bytes_sg_total = start - offset + bytes;
-	if (!msg->sg_copy[i] && bytes_sg_total <= len)
+	if (!msg->sg.copy[i] && bytes_sg_total <= len)
 		goto out;
 
 	/* At this point we need to linearize multiple scatterlist
@@ -2338,12 +2215,12 @@ BPF_CALL_4(bpf_msg_pull_data,
 	 * will copy the entire sg entry.
 	 */
 	do {
-		copy += sg[i].length;
-		sk_msg_iter_var(i);
+		copy += sk_msg_elem(msg, i)->length;
+		sk_msg_iter_var_next(i);
 		if (bytes_sg_total <= copy)
 			break;
-	} while (i != msg->sg_end);
-	last_sg = i;
+	} while (i != msg->sg.end);
+	last_sge = i;
 
 	if (unlikely(bytes_sg_total > copy))
 		return -EINVAL;
@@ -2352,63 +2229,61 @@ BPF_CALL_4(bpf_msg_pull_data,
 			   get_order(copy));
 	if (unlikely(!page))
 		return -ENOMEM;
-	p = page_address(page);
 
-	i = first_sg;
+	raw = page_address(page);
+	i = first_sge;
 	do {
-		from = sg_virt(&sg[i]);
-		len = sg[i].length;
-		to = p + poffset;
+		sge = sk_msg_elem(msg, i);
+		from = sg_virt(sge);
+		len = sge->length;
+		to = raw + poffset;
 
 		memcpy(to, from, len);
 		poffset += len;
-		sg[i].length = 0;
-		put_page(sg_page(&sg[i]));
+		sge->length = 0;
+		put_page(sg_page(sge));
 
-		sk_msg_iter_var(i);
-	} while (i != last_sg);
+		sk_msg_iter_var_next(i);
+	} while (i != last_sge);
 
-	sg[first_sg].length = copy;
-	sg_set_page(&sg[first_sg], page, copy, 0);
+	sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
 
 	/* To repair sg ring we need to shift entries. If we only
 	 * had a single entry though we can just replace it and
 	 * be done. Otherwise walk the ring and shift the entries.
 	 */
-	WARN_ON_ONCE(last_sg == first_sg);
-	shift = last_sg > first_sg ?
-		last_sg - first_sg - 1 :
-		MAX_SKB_FRAGS - first_sg + last_sg - 1;
+	WARN_ON_ONCE(last_sge == first_sge);
+	shift = last_sge > first_sge ?
+		last_sge - first_sge - 1 :
+		MAX_SKB_FRAGS - first_sge + last_sge - 1;
 	if (!shift)
 		goto out;
 
-	i = first_sg;
-	sk_msg_iter_var(i);
+	i = first_sge;
+	sk_msg_iter_var_next(i);
 	do {
-		int move_from;
+		u32 move_from;
 
-		if (i + shift >= MAX_SKB_FRAGS)
-			move_from = i + shift - MAX_SKB_FRAGS;
+		if (i + shift >= MAX_MSG_FRAGS)
+			move_from = i + shift - MAX_MSG_FRAGS;
 		else
 			move_from = i + shift;
-
-		if (move_from == msg->sg_end)
+		if (move_from == msg->sg.end)
 			break;
 
-		sg[i] = sg[move_from];
-		sg[move_from].length = 0;
-		sg[move_from].page_link = 0;
-		sg[move_from].offset = 0;
-
-		sk_msg_iter_var(i);
+		msg->sg.data[i] = msg->sg.data[move_from];
+		msg->sg.data[move_from].length = 0;
+		msg->sg.data[move_from].page_link = 0;
+		msg->sg.data[move_from].offset = 0;
+		sk_msg_iter_var_next(i);
 	} while (1);
-	msg->sg_end -= shift;
-	if (msg->sg_end < 0)
-		msg->sg_end += MAX_SKB_FRAGS;
+
+	msg->sg.end = msg->sg.end - shift > msg->sg.end ?
+		      msg->sg.end - shift + MAX_MSG_FRAGS :
+		      msg->sg.end - shift;
 out:
-	msg->data = sg_virt(&sg[first_sg]) + start - offset;
+	msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
 	msg->data_end = msg->data + bytes;
-
 	return 0;
 }
 
@@ -5203,6 +5078,9 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+const struct bpf_func_proto bpf_sock_map_update_proto __weak;
+const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
+
 static const struct bpf_func_proto *
 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -5226,6 +5104,9 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
+const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
+
 static const struct bpf_func_proto *
 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -5247,6 +5128,9 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
+const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
+const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
+
 static const struct bpf_func_proto *
 sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -7001,22 +6885,22 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 
 	switch (si->off) {
 	case offsetof(struct sk_msg_md, data):
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, data));
+				      offsetof(struct sk_msg, data));
 		break;
 	case offsetof(struct sk_msg_md, data_end):
-		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_buff, data_end),
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, data_end));
+				      offsetof(struct sk_msg, data_end));
 		break;
 	case offsetof(struct sk_msg_md, family):
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-					      struct sk_msg_buff, sk),
+					      struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_family));
 		break;
@@ -7025,9 +6909,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_daddr));
 		break;
@@ -7037,9 +6921,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 					  skc_rcv_saddr) != 4);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-					      struct sk_msg_buff, sk),
+					      struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_rcv_saddr));
@@ -7054,9 +6938,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		off = si->off;
 		off -= offsetof(struct sk_msg_md, remote_ip6[0]);
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_v6_daddr.s6_addr32[0]) +
@@ -7075,9 +6959,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		off = si->off;
 		off -= offsetof(struct sk_msg_md, local_ip6[0]);
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common,
 					       skc_v6_rcv_saddr.s6_addr32[0]) +
@@ -7091,9 +6975,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_dport));
 #ifndef __BIG_ENDIAN_BITFIELD
@@ -7105,9 +6989,9 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 		BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
 
 		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
-						struct sk_msg_buff, sk),
+						struct sk_msg, sk),
 				      si->dst_reg, si->src_reg,
-				      offsetof(struct sk_msg_buff, sk));
+				      offsetof(struct sk_msg, sk));
 		*insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
 				      offsetof(struct sock_common, skc_num));
 		break;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
new file mode 100644
index 000000000000..ae2b281c9c57
--- /dev/null
+++ b/net/core/skmsg.c
@@ -0,0 +1,763 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/skmsg.h>
+#include <linux/skbuff.h>
+#include <linux/scatterlist.h>
+
+#include <net/sock.h>
+#include <net/tcp.h>
+
+static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
+{
+	if (msg->sg.end > msg->sg.start &&
+	    elem_first_coalesce < msg->sg.end)
+		return true;
+
+	if (msg->sg.end < msg->sg.start &&
+	    (elem_first_coalesce > msg->sg.start ||
+	     elem_first_coalesce < msg->sg.end))
+		return true;
+
+	return false;
+}
+
+int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
+		 int elem_first_coalesce)
+{
+	struct page_frag *pfrag = sk_page_frag(sk);
+	int ret = 0;
+
+	len -= msg->sg.size;
+	while (len > 0) {
+		struct scatterlist *sge;
+		u32 orig_offset;
+		int use, i;
+
+		if (!sk_page_frag_refill(sk, pfrag))
+			return -ENOMEM;
+
+		orig_offset = pfrag->offset;
+		use = min_t(int, len, pfrag->size - orig_offset);
+		if (!sk_wmem_schedule(sk, use))
+			return -ENOMEM;
+
+		i = msg->sg.end;
+		sk_msg_iter_var_prev(i);
+		sge = &msg->sg.data[i];
+
+		if (sk_msg_try_coalesce_ok(msg, elem_first_coalesce) &&
+		    sg_page(sge) == pfrag->page &&
+		    sge->offset + sge->length == orig_offset) {
+			sge->length += use;
+		} else {
+			if (sk_msg_full(msg)) {
+				ret = -ENOSPC;
+				break;
+			}
+
+			sge = &msg->sg.data[msg->sg.end];
+			sg_unmark_end(sge);
+			sg_set_page(sge, pfrag->page, use, orig_offset);
+			get_page(pfrag->page);
+			sk_msg_iter_next(msg, end);
+		}
+
+		sk_mem_charge(sk, use);
+		msg->sg.size += use;
+		pfrag->offset += use;
+		len -= use;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_alloc);
+
+void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes)
+{
+	int i = msg->sg.start;
+
+	do {
+		struct scatterlist *sge = sk_msg_elem(msg, i);
+
+		if (bytes < sge->length) {
+			sge->length -= bytes;
+			sge->offset += bytes;
+			sk_mem_uncharge(sk, bytes);
+			break;
+		}
+
+		sk_mem_uncharge(sk, sge->length);
+		bytes -= sge->length;
+		sge->length = 0;
+		sge->offset = 0;
+		sk_msg_iter_var_next(i);
+	} while (bytes && i != msg->sg.end);
+	msg->sg.start = i;
+}
+EXPORT_SYMBOL_GPL(sk_msg_return_zero);
+
+void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes)
+{
+	int i = msg->sg.start;
+
+	do {
+		struct scatterlist *sge = &msg->sg.data[i];
+		int uncharge = (bytes < sge->length) ? bytes : sge->length;
+
+		sk_mem_uncharge(sk, uncharge);
+		bytes -= uncharge;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+}
+EXPORT_SYMBOL_GPL(sk_msg_return);
+
+static int sk_msg_free_elem(struct sock *sk, struct sk_msg *msg, u32 i,
+			    bool charge)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, i);
+	u32 len = sge->length;
+
+	if (charge)
+		sk_mem_uncharge(sk, len);
+	if (!msg->skb)
+		put_page(sg_page(sge));
+	memset(sge, 0, sizeof(*sge));
+	return len;
+}
+
+static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i,
+			 bool charge)
+{
+	struct scatterlist *sge = sk_msg_elem(msg, i);
+	int freed = 0;
+
+	while (msg->sg.size) {
+		msg->sg.size -= sge->length;
+		freed += sk_msg_free_elem(sk, msg, i, charge);
+		sk_msg_iter_var_next(i);
+		sk_msg_check_to_free(msg, i, msg->sg.size);
+		sge = sk_msg_elem(msg, i);
+	}
+	if (msg->skb)
+		consume_skb(msg->skb);
+	sk_msg_init(msg);
+	return freed;
+}
+
+int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg)
+{
+	return __sk_msg_free(sk, msg, msg->sg.start, false);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free_nocharge);
+
+int sk_msg_free(struct sock *sk, struct sk_msg *msg)
+{
+	return __sk_msg_free(sk, msg, msg->sg.start, true);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free);
+
+static void __sk_msg_free_partial(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes, bool charge)
+{
+	struct scatterlist *sge;
+	u32 i = msg->sg.start;
+
+	while (bytes) {
+		sge = sk_msg_elem(msg, i);
+		if (!sge->length)
+			break;
+		if (bytes < sge->length) {
+			if (charge)
+				sk_mem_uncharge(sk, bytes);
+			sge->length -= bytes;
+			sge->offset += bytes;
+			msg->sg.size -= bytes;
+			break;
+		}
+
+		msg->sg.size -= sge->length;
+		bytes -= sge->length;
+		sk_msg_free_elem(sk, msg, i, charge);
+		sk_msg_iter_var_next(i);
+		sk_msg_check_to_free(msg, i, bytes);
+	}
+	msg->sg.start = i;
+}
+
+void sk_msg_free_partial(struct sock *sk, struct sk_msg *msg, u32 bytes)
+{
+	__sk_msg_free_partial(sk, msg, bytes, true);
+}
+EXPORT_SYMBOL_GPL(sk_msg_free_partial);
+
+void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
+				  u32 bytes)
+{
+	__sk_msg_free_partial(sk, msg, bytes, false);
+}
+
+void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len)
+{
+	int trim = msg->sg.size - len;
+	u32 i = msg->sg.end;
+
+	if (trim <= 0) {
+		WARN_ON(trim < 0);
+		return;
+	}
+
+	sk_msg_iter_var_prev(i);
+	msg->sg.size = len;
+	while (msg->sg.data[i].length &&
+	       trim >= msg->sg.data[i].length) {
+		trim -= msg->sg.data[i].length;
+		sk_msg_free_elem(sk, msg, i, true);
+		sk_msg_iter_var_prev(i);
+		if (!trim)
+			goto out;
+	}
+
+	msg->sg.data[i].length -= trim;
+	sk_mem_uncharge(sk, trim);
+out:
+	/* If we trim data before curr pointer update copybreak and current
+	 * so that any future copy operations start at new copy location.
+	 * However trimed data that has not yet been used in a copy op
+	 * does not require an update.
+	 */
+	if (msg->sg.curr >= i) {
+		msg->sg.curr = i;
+		msg->sg.copybreak = msg->sg.data[i].length;
+	}
+	sk_msg_iter_var_next(i);
+	msg->sg.end = i;
+}
+EXPORT_SYMBOL_GPL(sk_msg_trim);
+
+int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
+			      struct sk_msg *msg, u32 bytes)
+{
+	int i, maxpages, ret = 0, num_elems = sk_msg_elem_used(msg);
+	const int to_max_pages = MAX_MSG_FRAGS;
+	struct page *pages[MAX_MSG_FRAGS];
+	ssize_t orig, copied, use, offset;
+
+	orig = msg->sg.size;
+	while (bytes > 0) {
+		i = 0;
+		maxpages = to_max_pages - num_elems;
+		if (maxpages == 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		copied = iov_iter_get_pages(from, pages, bytes, maxpages,
+					    &offset);
+		if (copied <= 0) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		iov_iter_advance(from, copied);
+		bytes -= copied;
+		msg->sg.size += copied;
+
+		while (copied) {
+			use = min_t(int, copied, PAGE_SIZE - offset);
+			sg_set_page(&msg->sg.data[msg->sg.end],
+				    pages[i], use, offset);
+			sg_unmark_end(&msg->sg.data[msg->sg.end]);
+			sk_mem_charge(sk, use);
+
+			offset = 0;
+			copied -= use;
+			sk_msg_iter_next(msg, end);
+			num_elems++;
+			i++;
+		}
+		/* When zerocopy is mixed with sk_msg_*copy* operations we
+		 * may have a copybreak set in this case clear and prefer
+		 * zerocopy remainder when possible.
+		 */
+		msg->sg.copybreak = 0;
+		msg->sg.curr = msg->sg.end;
+	}
+out:
+	/* Revert iov_iter updates, msg will need to use 'trim' later if it
+	 * also needs to be cleared.
+	 */
+	if (ret)
+		iov_iter_revert(from, msg->sg.size - orig);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_zerocopy_from_iter);
+
+int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
+			     struct sk_msg *msg, u32 bytes)
+{
+	int ret = -ENOSPC, i = msg->sg.curr;
+	struct scatterlist *sge;
+	u32 copy, buf_size;
+	void *to;
+
+	do {
+		sge = sk_msg_elem(msg, i);
+		/* This is possible if a trim operation shrunk the buffer */
+		if (msg->sg.copybreak >= sge->length) {
+			msg->sg.copybreak = 0;
+			sk_msg_iter_var_next(i);
+			if (i == msg->sg.end)
+				break;
+			sge = sk_msg_elem(msg, i);
+		}
+
+		buf_size = sge->length - msg->sg.copybreak;
+		copy = (buf_size > bytes) ? bytes : buf_size;
+		to = sg_virt(sge) + msg->sg.copybreak;
+		msg->sg.copybreak += copy;
+		if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY)
+			ret = copy_from_iter_nocache(to, copy, from);
+		else
+			ret = copy_from_iter(to, copy, from);
+		if (ret != copy) {
+			ret = -EFAULT;
+			goto out;
+		}
+		bytes -= copy;
+		if (!bytes)
+			break;
+		msg->sg.copybreak = 0;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+out:
+	msg->sg.curr = i;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
+
+static int sk_psock_skb_ingress(struct sk_psock *psock, struct sk_buff *skb)
+{
+	struct sock *sk = psock->sk;
+	int copied = 0, num_sge;
+	struct sk_msg *msg;
+
+	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
+	if (unlikely(!msg))
+		return -EAGAIN;
+	if (!sk_rmem_schedule(sk, skb, skb->len)) {
+		kfree(msg);
+		return -EAGAIN;
+	}
+
+	sk_msg_init(msg);
+	num_sge = skb_to_sgvec(skb, msg->sg.data, 0, skb->len);
+	if (unlikely(num_sge < 0)) {
+		kfree(msg);
+		return num_sge;
+	}
+
+	sk_mem_charge(sk, skb->len);
+	copied = skb->len;
+	msg->sg.start = 0;
+	msg->sg.end = num_sge == MAX_MSG_FRAGS ? 0 : num_sge;
+	msg->skb = skb;
+
+	sk_psock_queue_msg(psock, msg);
+	sk->sk_data_ready(sk);
+	return copied;
+}
+
+static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
+			       u32 off, u32 len, bool ingress)
+{
+	if (ingress)
+		return sk_psock_skb_ingress(psock, skb);
+	else
+		return skb_send_sock_locked(psock->sk, skb, off, len);
+}
+
+static void sk_psock_backlog(struct work_struct *work)
+{
+	struct sk_psock *psock = container_of(work, struct sk_psock, work);
+	struct sk_psock_work_state *state = &psock->work_state;
+	struct sk_buff *skb;
+	bool ingress;
+	u32 len, off;
+	int ret;
+
+	/* Lock sock to avoid losing sk_socket during loop. */
+	lock_sock(psock->sk);
+	if (state->skb) {
+		skb = state->skb;
+		len = state->len;
+		off = state->off;
+		state->skb = NULL;
+		goto start;
+	}
+
+	while ((skb = skb_dequeue(&psock->ingress_skb))) {
+		len = skb->len;
+		off = 0;
+start:
+		ingress = tcp_skb_bpf_ingress(skb);
+		do {
+			ret = -EIO;
+			if (likely(psock->sk->sk_socket))
+				ret = sk_psock_handle_skb(psock, skb, off,
+							  len, ingress);
+			if (ret <= 0) {
+				if (ret == -EAGAIN) {
+					state->skb = skb;
+					state->len = len;
+					state->off = off;
+					goto end;
+				}
+				/* Hard errors break pipe and stop xmit. */
+				sk_psock_report_error(psock, ret ? -ret : EPIPE);
+				sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+				kfree_skb(skb);
+				goto end;
+			}
+			off += ret;
+			len -= ret;
+		} while (len);
+
+		if (!ingress)
+			kfree_skb(skb);
+	}
+end:
+	release_sock(psock->sk);
+}
+
+struct sk_psock *sk_psock_init(struct sock *sk, int node)
+{
+	struct sk_psock *psock = kzalloc_node(sizeof(*psock),
+					      GFP_ATOMIC | __GFP_NOWARN,
+					      node);
+	if (!psock)
+		return NULL;
+
+	psock->sk = sk;
+	psock->eval =  __SK_NONE;
+
+	INIT_LIST_HEAD(&psock->link);
+	spin_lock_init(&psock->link_lock);
+
+	INIT_WORK(&psock->work, sk_psock_backlog);
+	INIT_LIST_HEAD(&psock->ingress_msg);
+	skb_queue_head_init(&psock->ingress_skb);
+
+	sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
+	refcount_set(&psock->refcnt, 1);
+
+	rcu_assign_sk_user_data(sk, psock);
+	sock_hold(sk);
+
+	return psock;
+}
+EXPORT_SYMBOL_GPL(sk_psock_init);
+
+struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock)
+{
+	struct sk_psock_link *link;
+
+	spin_lock_bh(&psock->link_lock);
+	link = list_first_entry_or_null(&psock->link, struct sk_psock_link,
+					list);
+	if (link)
+		list_del(&link->list);
+	spin_unlock_bh(&psock->link_lock);
+	return link;
+}
+
+void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
+{
+	struct sk_msg *msg, *tmp;
+
+	list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) {
+		list_del(&msg->list);
+		sk_msg_free(psock->sk, msg);
+		kfree(msg);
+	}
+}
+
+static void sk_psock_zap_ingress(struct sk_psock *psock)
+{
+	__skb_queue_purge(&psock->ingress_skb);
+	__sk_psock_purge_ingress_msg(psock);
+}
+
+static void sk_psock_link_destroy(struct sk_psock *psock)
+{
+	struct sk_psock_link *link, *tmp;
+
+	list_for_each_entry_safe(link, tmp, &psock->link, list) {
+		list_del(&link->list);
+		sk_psock_free_link(link);
+	}
+}
+
+static void sk_psock_destroy_deferred(struct work_struct *gc)
+{
+	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
+
+	/* No sk_callback_lock since already detached. */
+	if (psock->parser.enabled)
+		strp_done(&psock->parser.strp);
+
+	cancel_work_sync(&psock->work);
+
+	psock_progs_drop(&psock->progs);
+
+	sk_psock_link_destroy(psock);
+	sk_psock_cork_free(psock);
+	sk_psock_zap_ingress(psock);
+
+	if (psock->sk_redir)
+		sock_put(psock->sk_redir);
+	sock_put(psock->sk);
+	kfree(psock);
+}
+
+void sk_psock_destroy(struct rcu_head *rcu)
+{
+	struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
+
+	INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
+	schedule_work(&psock->gc);
+}
+EXPORT_SYMBOL_GPL(sk_psock_destroy);
+
+void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
+{
+	rcu_assign_sk_user_data(sk, NULL);
+	sk_psock_cork_free(psock);
+	sk_psock_restore_proto(sk, psock);
+
+	write_lock_bh(&sk->sk_callback_lock);
+	if (psock->progs.skb_parser)
+		sk_psock_stop_strp(sk, psock);
+	write_unlock_bh(&sk->sk_callback_lock);
+	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+
+	call_rcu_sched(&psock->rcu, sk_psock_destroy);
+}
+EXPORT_SYMBOL_GPL(sk_psock_drop);
+
+static int sk_psock_map_verd(int verdict, bool redir)
+{
+	switch (verdict) {
+	case SK_PASS:
+		return redir ? __SK_REDIRECT : __SK_PASS;
+	case SK_DROP:
+	default:
+		break;
+	}
+
+	return __SK_DROP;
+}
+
+int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
+			 struct sk_msg *msg)
+{
+	struct bpf_prog *prog;
+	int ret;
+
+	preempt_disable();
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.msg_parser);
+	if (unlikely(!prog)) {
+		ret = __SK_PASS;
+		goto out;
+	}
+
+	sk_msg_compute_data_pointers(msg);
+	msg->sk = sk;
+	ret = BPF_PROG_RUN(prog, msg);
+	ret = sk_psock_map_verd(ret, msg->sk_redir);
+	psock->apply_bytes = msg->apply_bytes;
+	if (ret == __SK_REDIRECT) {
+		if (psock->sk_redir)
+			sock_put(psock->sk_redir);
+		psock->sk_redir = msg->sk_redir;
+		if (!psock->sk_redir) {
+			ret = __SK_DROP;
+			goto out;
+		}
+		sock_hold(psock->sk_redir);
+	}
+out:
+	rcu_read_unlock();
+	preempt_enable();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
+
+static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
+			    struct sk_buff *skb)
+{
+	int ret;
+
+	skb->sk = psock->sk;
+	bpf_compute_data_end_sk_skb(skb);
+	preempt_disable();
+	ret = BPF_PROG_RUN(prog, skb);
+	preempt_enable();
+	/* strparser clones the skb before handing it to a upper layer,
+	 * meaning skb_orphan has been called. We NULL sk on the way out
+	 * to ensure we don't trigger a BUG_ON() in skb/sk operations
+	 * later and because we are not charging the memory of this skb
+	 * to any socket yet.
+	 */
+	skb->sk = NULL;
+	return ret;
+}
+
+static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
+{
+	struct sk_psock_parser *parser;
+
+	parser = container_of(strp, struct sk_psock_parser, strp);
+	return container_of(parser, struct sk_psock, parser);
+}
+
+static void sk_psock_verdict_apply(struct sk_psock *psock,
+				   struct sk_buff *skb, int verdict)
+{
+	struct sk_psock *psock_other;
+	struct sock *sk_other;
+	bool ingress;
+
+	switch (verdict) {
+	case __SK_REDIRECT:
+		sk_other = tcp_skb_bpf_redirect_fetch(skb);
+		if (unlikely(!sk_other))
+			goto out_free;
+		psock_other = sk_psock(sk_other);
+		if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
+		    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
+			goto out_free;
+		ingress = tcp_skb_bpf_ingress(skb);
+		if ((!ingress && sock_writeable(sk_other)) ||
+		    (ingress &&
+		     atomic_read(&sk_other->sk_rmem_alloc) <=
+		     sk_other->sk_rcvbuf)) {
+			if (!ingress)
+				skb_set_owner_w(skb, sk_other);
+			skb_queue_tail(&psock_other->ingress_skb, skb);
+			schedule_work(&psock_other->work);
+			break;
+		}
+		/* fall-through */
+	case __SK_DROP:
+		/* fall-through */
+	default:
+out_free:
+		kfree_skb(skb);
+	}
+}
+
+static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
+{
+	struct sk_psock *psock = sk_psock_from_strp(strp);
+	struct bpf_prog *prog;
+	int ret = __SK_DROP;
+
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.skb_verdict);
+	if (likely(prog)) {
+		skb_orphan(skb);
+		tcp_skb_bpf_redirect_clear(skb);
+		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+	}
+	rcu_read_unlock();
+	sk_psock_verdict_apply(psock, skb, ret);
+}
+
+static int sk_psock_strp_read_done(struct strparser *strp, int err)
+{
+	return err;
+}
+
+static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
+{
+	struct sk_psock *psock = sk_psock_from_strp(strp);
+	struct bpf_prog *prog;
+	int ret = skb->len;
+
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.skb_parser);
+	if (likely(prog))
+		ret = sk_psock_bpf_run(psock, prog, skb);
+	rcu_read_unlock();
+	return ret;
+}
+
+/* Called with socket lock held. */
+static void sk_psock_data_ready(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock)) {
+		write_lock_bh(&sk->sk_callback_lock);
+		strp_data_ready(&psock->parser.strp);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	rcu_read_unlock();
+}
+
+static void sk_psock_write_space(struct sock *sk)
+{
+	struct sk_psock *psock;
+	void (*write_space)(struct sock *sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock && sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)))
+		schedule_work(&psock->work);
+	write_space = psock->saved_write_space;
+	rcu_read_unlock();
+	write_space(sk);
+}
+
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
+{
+	static const struct strp_callbacks cb = {
+		.rcv_msg	= sk_psock_strp_read,
+		.read_sock_done	= sk_psock_strp_read_done,
+		.parse_msg	= sk_psock_strp_parse,
+	};
+
+	psock->parser.enabled = false;
+	return strp_init(&psock->parser.strp, sk, &cb);
+}
+
+void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_parser *parser = &psock->parser;
+
+	if (parser->enabled)
+		return;
+
+	parser->saved_data_ready = sk->sk_data_ready;
+	sk->sk_data_ready = sk_psock_data_ready;
+	sk->sk_write_space = sk_psock_write_space;
+	parser->enabled = true;
+}
+
+void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_parser *parser = &psock->parser;
+
+	if (!parser->enabled)
+		return;
+
+	sk->sk_data_ready = parser->saved_data_ready;
+	parser->saved_data_ready = NULL;
+	strp_stop(&parser->strp);
+	parser->enabled = false;
+}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
new file mode 100644
index 000000000000..3c0e44cb811a
--- /dev/null
+++ b/net/core/sock_map.c
@@ -0,0 +1,1002 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/errno.h>
+#include <linux/file.h>
+#include <linux/net.h>
+#include <linux/workqueue.h>
+#include <linux/skmsg.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+
+struct bpf_stab {
+	struct bpf_map map;
+	struct sock **sks;
+	struct sk_psock_progs progs;
+	raw_spinlock_t lock;
+};
+
+#define SOCK_CREATE_FLAG_MASK				\
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_stab *stab;
+	u64 cost;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (attr->max_entries == 0 ||
+	    attr->key_size    != 4 ||
+	    attr->value_size  != 4 ||
+	    attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
+		return ERR_PTR(-EINVAL);
+
+	stab = kzalloc(sizeof(*stab), GFP_USER);
+	if (!stab)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&stab->map, attr);
+	raw_spin_lock_init(&stab->lock);
+
+	/* Make sure page count doesn't overflow. */
+	cost = (u64) stab->map.max_entries * sizeof(struct sock *);
+	if (cost >= U32_MAX - PAGE_SIZE) {
+		err = -EINVAL;
+		goto free_stab;
+	}
+
+	stab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+	err = bpf_map_precharge_memlock(stab->map.pages);
+	if (err)
+		goto free_stab;
+
+	stab->sks = bpf_map_area_alloc(stab->map.max_entries *
+				       sizeof(struct sock *),
+				       stab->map.numa_node);
+	if (stab->sks)
+		return &stab->map;
+	err = -ENOMEM;
+free_stab:
+	kfree(stab);
+	return ERR_PTR(err);
+}
+
+int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	u32 ufd = attr->target_fd;
+	struct bpf_map *map;
+	struct fd f;
+	int ret;
+
+	f = fdget(ufd);
+	map = __bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	ret = sock_map_prog_update(map, prog, attr->attach_type);
+	fdput(f);
+	return ret;
+}
+
+static void sock_map_sk_acquire(struct sock *sk)
+	__acquires(&sk->sk_lock.slock)
+{
+	lock_sock(sk);
+	preempt_disable();
+	rcu_read_lock();
+}
+
+static void sock_map_sk_release(struct sock *sk)
+	__releases(&sk->sk_lock.slock)
+{
+	rcu_read_unlock();
+	preempt_enable();
+	release_sock(sk);
+}
+
+static void sock_map_add_link(struct sk_psock *psock,
+			      struct sk_psock_link *link,
+			      struct bpf_map *map, void *link_raw)
+{
+	link->link_raw = link_raw;
+	link->map = map;
+	spin_lock_bh(&psock->link_lock);
+	list_add_tail(&link->list, &psock->link);
+	spin_unlock_bh(&psock->link_lock);
+}
+
+static void sock_map_del_link(struct sock *sk,
+			      struct sk_psock *psock, void *link_raw)
+{
+	struct sk_psock_link *link, *tmp;
+	bool strp_stop = false;
+
+	spin_lock_bh(&psock->link_lock);
+	list_for_each_entry_safe(link, tmp, &psock->link, list) {
+		if (link->link_raw == link_raw) {
+			struct bpf_map *map = link->map;
+			struct bpf_stab *stab = container_of(map, struct bpf_stab,
+							     map);
+			if (psock->parser.enabled && stab->progs.skb_parser)
+				strp_stop = true;
+			list_del(&link->list);
+			sk_psock_free_link(link);
+		}
+	}
+	spin_unlock_bh(&psock->link_lock);
+	if (strp_stop) {
+		write_lock_bh(&sk->sk_callback_lock);
+		sk_psock_stop_strp(sk, psock);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+}
+
+static void sock_map_unref(struct sock *sk, void *link_raw)
+{
+	struct sk_psock *psock = sk_psock(sk);
+
+	if (likely(psock)) {
+		sock_map_del_link(sk, psock, link_raw);
+		sk_psock_put(sk, psock);
+	}
+}
+
+static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
+			 struct sock *sk)
+{
+	struct bpf_prog *msg_parser, *skb_parser, *skb_verdict;
+	bool skb_progs, sk_psock_is_new = false;
+	struct sk_psock *psock;
+	int ret;
+
+	skb_verdict = READ_ONCE(progs->skb_verdict);
+	skb_parser = READ_ONCE(progs->skb_parser);
+	skb_progs = skb_parser && skb_verdict;
+	if (skb_progs) {
+		skb_verdict = bpf_prog_inc_not_zero(skb_verdict);
+		if (IS_ERR(skb_verdict))
+			return PTR_ERR(skb_verdict);
+		skb_parser = bpf_prog_inc_not_zero(skb_parser);
+		if (IS_ERR(skb_parser)) {
+			bpf_prog_put(skb_verdict);
+			return PTR_ERR(skb_parser);
+		}
+	}
+
+	msg_parser = READ_ONCE(progs->msg_parser);
+	if (msg_parser) {
+		msg_parser = bpf_prog_inc_not_zero(msg_parser);
+		if (IS_ERR(msg_parser)) {
+			ret = PTR_ERR(msg_parser);
+			goto out;
+		}
+	}
+
+	psock = sk_psock_get(sk);
+	if (psock) {
+		if (!sk_has_psock(sk)) {
+			ret = -EBUSY;
+			goto out_progs;
+		}
+		if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
+		    (skb_progs  && READ_ONCE(psock->progs.skb_parser))) {
+			sk_psock_put(sk, psock);
+			ret = -EBUSY;
+			goto out_progs;
+		}
+	} else {
+		psock = sk_psock_init(sk, map->numa_node);
+		if (!psock) {
+			ret = -ENOMEM;
+			goto out_progs;
+		}
+		sk_psock_is_new = true;
+	}
+
+	if (msg_parser)
+		psock_set_prog(&psock->progs.msg_parser, msg_parser);
+	if (sk_psock_is_new) {
+		ret = tcp_bpf_init(sk);
+		if (ret < 0)
+			goto out_drop;
+	} else {
+		tcp_bpf_reinit(sk);
+	}
+
+	write_lock_bh(&sk->sk_callback_lock);
+	if (skb_progs && !psock->parser.enabled) {
+		ret = sk_psock_init_strp(sk, psock);
+		if (ret) {
+			write_unlock_bh(&sk->sk_callback_lock);
+			goto out_drop;
+		}
+		psock_set_prog(&psock->progs.skb_verdict, skb_verdict);
+		psock_set_prog(&psock->progs.skb_parser, skb_parser);
+		sk_psock_start_strp(sk, psock);
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+	return 0;
+out_drop:
+	sk_psock_put(sk, psock);
+out_progs:
+	if (msg_parser)
+		bpf_prog_put(msg_parser);
+out:
+	if (skb_progs) {
+		bpf_prog_put(skb_verdict);
+		bpf_prog_put(skb_parser);
+	}
+	return ret;
+}
+
+static void sock_map_free(struct bpf_map *map)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	int i;
+
+	synchronize_rcu();
+	rcu_read_lock();
+	raw_spin_lock_bh(&stab->lock);
+	for (i = 0; i < stab->map.max_entries; i++) {
+		struct sock **psk = &stab->sks[i];
+		struct sock *sk;
+
+		sk = xchg(psk, NULL);
+		if (sk)
+			sock_map_unref(sk, psk);
+	}
+	raw_spin_unlock_bh(&stab->lock);
+	rcu_read_unlock();
+
+	bpf_map_area_free(stab->sks);
+	kfree(stab);
+}
+
+static void sock_map_release_progs(struct bpf_map *map)
+{
+	psock_progs_drop(&container_of(map, struct bpf_stab, map)->progs);
+}
+
+static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (unlikely(key >= map->max_entries))
+		return NULL;
+	return READ_ONCE(stab->sks[key]);
+}
+
+static void *sock_map_lookup(struct bpf_map *map, void *key)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
+			     struct sock **psk)
+{
+	struct sock *sk;
+
+	raw_spin_lock_bh(&stab->lock);
+	sk = *psk;
+	if (!sk_test || sk_test == sk)
+		*psk = NULL;
+	raw_spin_unlock_bh(&stab->lock);
+	if (unlikely(!sk))
+		return -EINVAL;
+	sock_map_unref(sk, psk);
+	return 0;
+}
+
+static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk,
+				      void *link_raw)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+
+	__sock_map_delete(stab, sk, link_raw);
+}
+
+static int sock_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	u32 i = *(u32 *)key;
+	struct sock **psk;
+
+	if (unlikely(i >= map->max_entries))
+		return -EINVAL;
+
+	psk = &stab->sks[i];
+	return __sock_map_delete(stab, NULL, psk);
+}
+
+static int sock_map_get_next_key(struct bpf_map *map, void *key, void *next)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	u32 i = key ? *(u32 *)key : U32_MAX;
+	u32 *key_next = next;
+
+	if (i == stab->map.max_entries - 1)
+		return -ENOENT;
+	if (i >= stab->map.max_entries)
+		*key_next = 0;
+	else
+		*key_next = i + 1;
+	return 0;
+}
+
+static int sock_map_update_common(struct bpf_map *map, u32 idx,
+				  struct sock *sk, u64 flags)
+{
+	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
+	struct sk_psock_link *link;
+	struct sk_psock *psock;
+	struct sock *osk;
+	int ret;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	if (unlikely(flags > BPF_EXIST))
+		return -EINVAL;
+	if (unlikely(idx >= map->max_entries))
+		return -E2BIG;
+
+	link = sk_psock_init_link();
+	if (!link)
+		return -ENOMEM;
+
+	ret = sock_map_link(map, &stab->progs, sk);
+	if (ret < 0)
+		goto out_free;
+
+	psock = sk_psock(sk);
+	WARN_ON_ONCE(!psock);
+
+	raw_spin_lock_bh(&stab->lock);
+	osk = stab->sks[idx];
+	if (osk && flags == BPF_NOEXIST) {
+		ret = -EEXIST;
+		goto out_unlock;
+	} else if (!osk && flags == BPF_EXIST) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	sock_map_add_link(psock, link, map, &stab->sks[idx]);
+	stab->sks[idx] = sk;
+	if (osk)
+		sock_map_unref(osk, &stab->sks[idx]);
+	raw_spin_unlock_bh(&stab->lock);
+	return 0;
+out_unlock:
+	raw_spin_unlock_bh(&stab->lock);
+	if (psock)
+		sk_psock_put(sk, psock);
+out_free:
+	sk_psock_free_link(link);
+	return ret;
+}
+
+static bool sock_map_op_okay(const struct bpf_sock_ops_kern *ops)
+{
+	return ops->op == BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB ||
+	       ops->op == BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB;
+}
+
+static bool sock_map_sk_is_suitable(const struct sock *sk)
+{
+	return sk->sk_type == SOCK_STREAM &&
+	       sk->sk_protocol == IPPROTO_TCP;
+}
+
+static int sock_map_update_elem(struct bpf_map *map, void *key,
+				void *value, u64 flags)
+{
+	u32 ufd = *(u32 *)value;
+	u32 idx = *(u32 *)key;
+	struct socket *sock;
+	struct sock *sk;
+	int ret;
+
+	sock = sockfd_lookup(ufd, &ret);
+	if (!sock)
+		return ret;
+	sk = sock->sk;
+	if (!sk) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!sock_map_sk_is_suitable(sk) ||
+	    sk->sk_state != TCP_ESTABLISHED) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	sock_map_sk_acquire(sk);
+	ret = sock_map_update_common(map, idx, sk, flags);
+	sock_map_sk_release(sk);
+out:
+	fput(sock->file);
+	return ret;
+}
+
+BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, sops,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (likely(sock_map_sk_is_suitable(sops->sk) &&
+		   sock_map_op_okay(sops)))
+		return sock_map_update_common(map, *(u32 *)key, sops->sk,
+					      flags);
+	return -EOPNOTSUPP;
+}
+
+const struct bpf_func_proto bpf_sock_map_update_proto = {
+	.func		= bpf_sock_map_update,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	tcb->bpf.flags = flags;
+	tcb->bpf.sk_redir = __sock_map_lookup_elem(map, key);
+	if (!tcb->bpf.sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_sk_redirect_map_proto = {
+	.func           = bpf_sk_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg *, msg,
+	   struct bpf_map *, map, u32, key, u64, flags)
+{
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	msg->flags = flags;
+	msg->sk_redir = __sock_map_lookup_elem(map, key);
+	if (!msg->sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_msg_redirect_map_proto = {
+	.func           = bpf_msg_redirect_map,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+const struct bpf_map_ops sock_map_ops = {
+	.map_alloc		= sock_map_alloc,
+	.map_free		= sock_map_free,
+	.map_get_next_key	= sock_map_get_next_key,
+	.map_update_elem	= sock_map_update_elem,
+	.map_delete_elem	= sock_map_delete_elem,
+	.map_lookup_elem	= sock_map_lookup,
+	.map_release_uref	= sock_map_release_progs,
+	.map_check_btf		= map_check_no_btf,
+};
+
+struct bpf_htab_elem {
+	struct rcu_head rcu;
+	u32 hash;
+	struct sock *sk;
+	struct hlist_node node;
+	u8 key[0];
+};
+
+struct bpf_htab_bucket {
+	struct hlist_head head;
+	raw_spinlock_t lock;
+};
+
+struct bpf_htab {
+	struct bpf_map map;
+	struct bpf_htab_bucket *buckets;
+	u32 buckets_num;
+	u32 elem_size;
+	struct sk_psock_progs progs;
+	atomic_t count;
+};
+
+static inline u32 sock_hash_bucket_hash(const void *key, u32 len)
+{
+	return jhash(key, len, 0);
+}
+
+static struct bpf_htab_bucket *sock_hash_select_bucket(struct bpf_htab *htab,
+						       u32 hash)
+{
+	return &htab->buckets[hash & (htab->buckets_num - 1)];
+}
+
+static struct bpf_htab_elem *
+sock_hash_lookup_elem_raw(struct hlist_head *head, u32 hash, void *key,
+			  u32 key_size)
+{
+	struct bpf_htab_elem *elem;
+
+	hlist_for_each_entry_rcu(elem, head, node) {
+		if (elem->hash == hash &&
+		    !memcmp(&elem->key, key, key_size))
+			return elem;
+	}
+
+	return NULL;
+}
+
+static struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 key_size = map->key_size, hash;
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+
+	return elem ? elem->sk : NULL;
+}
+
+static void sock_hash_free_elem(struct bpf_htab *htab,
+				struct bpf_htab_elem *elem)
+{
+	atomic_dec(&htab->count);
+	kfree_rcu(elem, rcu);
+}
+
+static void sock_hash_delete_from_link(struct bpf_map *map, struct sock *sk,
+				       void *link_raw)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_elem *elem_probe, *elem = link_raw;
+	struct bpf_htab_bucket *bucket;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	bucket = sock_hash_select_bucket(htab, elem->hash);
+
+	/* elem may be deleted in parallel from the map, but access here
+	 * is okay since it's going away only after RCU grace period.
+	 * However, we need to check whether it's still present.
+	 */
+	raw_spin_lock_bh(&bucket->lock);
+	elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
+					       elem->key, map->key_size);
+	if (elem_probe && elem_probe == elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+}
+
+static int sock_hash_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 hash, key_size = map->key_size;
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+	int ret = -ENOENT;
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+
+	raw_spin_lock_bh(&bucket->lock);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+	if (elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+		ret = 0;
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+	return ret;
+}
+
+static struct bpf_htab_elem *sock_hash_alloc_elem(struct bpf_htab *htab,
+						  void *key, u32 key_size,
+						  u32 hash, struct sock *sk,
+						  struct bpf_htab_elem *old)
+{
+	struct bpf_htab_elem *new;
+
+	if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
+		if (!old) {
+			atomic_dec(&htab->count);
+			return ERR_PTR(-E2BIG);
+		}
+	}
+
+	new = kmalloc_node(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN,
+			   htab->map.numa_node);
+	if (!new) {
+		atomic_dec(&htab->count);
+		return ERR_PTR(-ENOMEM);
+	}
+	memcpy(new->key, key, key_size);
+	new->sk = sk;
+	new->hash = hash;
+	return new;
+}
+
+static int sock_hash_update_common(struct bpf_map *map, void *key,
+				   struct sock *sk, u64 flags)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 key_size = map->key_size, hash;
+	struct bpf_htab_elem *elem, *elem_new;
+	struct bpf_htab_bucket *bucket;
+	struct sk_psock_link *link;
+	struct sk_psock *psock;
+	int ret;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	if (unlikely(flags > BPF_EXIST))
+		return -EINVAL;
+
+	link = sk_psock_init_link();
+	if (!link)
+		return -ENOMEM;
+
+	ret = sock_map_link(map, &htab->progs, sk);
+	if (ret < 0)
+		goto out_free;
+
+	psock = sk_psock(sk);
+	WARN_ON_ONCE(!psock);
+
+	hash = sock_hash_bucket_hash(key, key_size);
+	bucket = sock_hash_select_bucket(htab, hash);
+
+	raw_spin_lock_bh(&bucket->lock);
+	elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
+	if (elem && flags == BPF_NOEXIST) {
+		ret = -EEXIST;
+		goto out_unlock;
+	} else if (!elem && flags == BPF_EXIST) {
+		ret = -ENOENT;
+		goto out_unlock;
+	}
+
+	elem_new = sock_hash_alloc_elem(htab, key, key_size, hash, sk, elem);
+	if (IS_ERR(elem_new)) {
+		ret = PTR_ERR(elem_new);
+		goto out_unlock;
+	}
+
+	sock_map_add_link(psock, link, map, elem_new);
+	/* Add new element to the head of the list, so that
+	 * concurrent search will find it before old elem.
+	 */
+	hlist_add_head_rcu(&elem_new->node, &bucket->head);
+	if (elem) {
+		hlist_del_rcu(&elem->node);
+		sock_map_unref(elem->sk, elem);
+		sock_hash_free_elem(htab, elem);
+	}
+	raw_spin_unlock_bh(&bucket->lock);
+	return 0;
+out_unlock:
+	raw_spin_unlock_bh(&bucket->lock);
+	sk_psock_put(sk, psock);
+out_free:
+	sk_psock_free_link(link);
+	return ret;
+}
+
+static int sock_hash_update_elem(struct bpf_map *map, void *key,
+				 void *value, u64 flags)
+{
+	u32 ufd = *(u32 *)value;
+	struct socket *sock;
+	struct sock *sk;
+	int ret;
+
+	sock = sockfd_lookup(ufd, &ret);
+	if (!sock)
+		return ret;
+	sk = sock->sk;
+	if (!sk) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (!sock_map_sk_is_suitable(sk) ||
+	    sk->sk_state != TCP_ESTABLISHED) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	sock_map_sk_acquire(sk);
+	ret = sock_hash_update_common(map, key, sk, flags);
+	sock_map_sk_release(sk);
+out:
+	fput(sock->file);
+	return ret;
+}
+
+static int sock_hash_get_next_key(struct bpf_map *map, void *key,
+				  void *key_next)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_elem *elem, *elem_next;
+	u32 hash, key_size = map->key_size;
+	struct hlist_head *head;
+	int i = 0;
+
+	if (!key)
+		goto find_first_elem;
+	hash = sock_hash_bucket_hash(key, key_size);
+	head = &sock_hash_select_bucket(htab, hash)->head;
+	elem = sock_hash_lookup_elem_raw(head, hash, key, key_size);
+	if (!elem)
+		goto find_first_elem;
+
+	elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&elem->node)),
+				     struct bpf_htab_elem, node);
+	if (elem_next) {
+		memcpy(key_next, elem_next->key, key_size);
+		return 0;
+	}
+
+	i = hash & (htab->buckets_num - 1);
+	i++;
+find_first_elem:
+	for (; i < htab->buckets_num; i++) {
+		head = &sock_hash_select_bucket(htab, i)->head;
+		elem_next = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
+					     struct bpf_htab_elem, node);
+		if (elem_next) {
+			memcpy(key_next, elem_next->key, key_size);
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+
+static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
+{
+	struct bpf_htab *htab;
+	int i, err;
+	u64 cost;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+	if (attr->max_entries == 0 ||
+	    attr->key_size    == 0 ||
+	    attr->value_size  != 4 ||
+	    attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
+		return ERR_PTR(-EINVAL);
+	if (attr->key_size > MAX_BPF_STACK)
+		return ERR_PTR(-E2BIG);
+
+	htab = kzalloc(sizeof(*htab), GFP_USER);
+	if (!htab)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&htab->map, attr);
+
+	htab->buckets_num = roundup_pow_of_two(htab->map.max_entries);
+	htab->elem_size = sizeof(struct bpf_htab_elem) +
+			  round_up(htab->map.key_size, 8);
+	if (htab->buckets_num == 0 ||
+	    htab->buckets_num > U32_MAX / sizeof(struct bpf_htab_bucket)) {
+		err = -EINVAL;
+		goto free_htab;
+	}
+
+	cost = (u64) htab->buckets_num * sizeof(struct bpf_htab_bucket) +
+	       (u64) htab->elem_size * htab->map.max_entries;
+	if (cost >= U32_MAX - PAGE_SIZE) {
+		err = -EINVAL;
+		goto free_htab;
+	}
+
+	htab->buckets = bpf_map_area_alloc(htab->buckets_num *
+					   sizeof(struct bpf_htab_bucket),
+					   htab->map.numa_node);
+	if (!htab->buckets) {
+		err = -ENOMEM;
+		goto free_htab;
+	}
+
+	for (i = 0; i < htab->buckets_num; i++) {
+		INIT_HLIST_HEAD(&htab->buckets[i].head);
+		raw_spin_lock_init(&htab->buckets[i].lock);
+	}
+
+	return &htab->map;
+free_htab:
+	kfree(htab);
+	return ERR_PTR(err);
+}
+
+static void sock_hash_free(struct bpf_map *map)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct bpf_htab_bucket *bucket;
+	struct bpf_htab_elem *elem;
+	struct hlist_node *node;
+	int i;
+
+	synchronize_rcu();
+	rcu_read_lock();
+	for (i = 0; i < htab->buckets_num; i++) {
+		bucket = sock_hash_select_bucket(htab, i);
+		raw_spin_lock_bh(&bucket->lock);
+		hlist_for_each_entry_safe(elem, node, &bucket->head, node) {
+			hlist_del_rcu(&elem->node);
+			sock_map_unref(elem->sk, elem);
+		}
+		raw_spin_unlock_bh(&bucket->lock);
+	}
+	rcu_read_unlock();
+
+	bpf_map_area_free(htab->buckets);
+	kfree(htab);
+}
+
+static void sock_hash_release_progs(struct bpf_map *map)
+{
+	psock_progs_drop(&container_of(map, struct bpf_htab, map)->progs);
+}
+
+BPF_CALL_4(bpf_sock_hash_update, struct bpf_sock_ops_kern *, sops,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	if (likely(sock_map_sk_is_suitable(sops->sk) &&
+		   sock_map_op_okay(sops)))
+		return sock_hash_update_common(map, key, sops->sk, flags);
+	return -EOPNOTSUPP;
+}
+
+const struct bpf_func_proto bpf_sock_hash_update_proto = {
+	.func		= bpf_sock_hash_update,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_CONST_MAP_PTR,
+	.arg3_type	= ARG_PTR_TO_MAP_KEY,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_sk_redirect_hash, struct sk_buff *, skb,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
+
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	tcb->bpf.flags = flags;
+	tcb->bpf.sk_redir = __sock_hash_lookup_elem(map, key);
+	if (!tcb->bpf.sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_sk_redirect_hash_proto = {
+	.func           = bpf_sk_redirect_hash,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_PTR_TO_MAP_KEY,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_msg_redirect_hash, struct sk_msg *, msg,
+	   struct bpf_map *, map, void *, key, u64, flags)
+{
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
+		return SK_DROP;
+	msg->flags = flags;
+	msg->sk_redir = __sock_hash_lookup_elem(map, key);
+	if (!msg->sk_redir)
+		return SK_DROP;
+	return SK_PASS;
+}
+
+const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
+	.func           = bpf_msg_redirect_hash,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_PTR_TO_MAP_KEY,
+	.arg4_type      = ARG_ANYTHING,
+};
+
+const struct bpf_map_ops sock_hash_ops = {
+	.map_alloc		= sock_hash_alloc,
+	.map_free		= sock_hash_free,
+	.map_get_next_key	= sock_hash_get_next_key,
+	.map_update_elem	= sock_hash_update_elem,
+	.map_delete_elem	= sock_hash_delete_elem,
+	.map_lookup_elem	= sock_map_lookup,
+	.map_release_uref	= sock_hash_release_progs,
+	.map_check_btf		= map_check_no_btf,
+};
+
+static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
+{
+	switch (map->map_type) {
+	case BPF_MAP_TYPE_SOCKMAP:
+		return &container_of(map, struct bpf_stab, map)->progs;
+	case BPF_MAP_TYPE_SOCKHASH:
+		return &container_of(map, struct bpf_htab, map)->progs;
+	default:
+		break;
+	}
+
+	return NULL;
+}
+
+int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
+			 u32 which)
+{
+	struct sk_psock_progs *progs = sock_map_progs(map);
+
+	if (!progs)
+		return -EOPNOTSUPP;
+
+	switch (which) {
+	case BPF_SK_MSG_VERDICT:
+		psock_set_prog(&progs->msg_parser, prog);
+		break;
+	case BPF_SK_SKB_STREAM_PARSER:
+		psock_set_prog(&progs->skb_parser, prog);
+		break;
+	case BPF_SK_SKB_STREAM_VERDICT:
+		psock_set_prog(&progs->skb_verdict, prog);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+void sk_psock_unlink(struct sock *sk, struct sk_psock_link *link)
+{
+	switch (link->map->map_type) {
+	case BPF_MAP_TYPE_SOCKMAP:
+		return sock_map_delete_from_link(link->map, sk,
+						 link->link_raw);
+	case BPF_MAP_TYPE_SOCKHASH:
+		return sock_hash_delete_from_link(link->map, sk,
+						  link->link_raw);
+	default:
+		break;
+	}
+}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 7446b98661d8..58629314eae9 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_NET_SOCK_MSG) += tcp_bpf.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
new file mode 100644
index 000000000000..80debb0daf37
--- /dev/null
+++ b/net/ipv4/tcp_bpf.c
@@ -0,0 +1,655 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2017 - 2018 Covalent IO, Inc. http://covalent.io */
+
+#include <linux/skmsg.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+
+#include <net/inet_common.h>
+
+static bool tcp_bpf_stream_read(const struct sock *sk)
+{
+	struct sk_psock *psock;
+	bool empty = true;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (likely(psock))
+		empty = list_empty(&psock->ingress_msg);
+	rcu_read_unlock();
+	return !empty;
+}
+
+static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
+			     int flags, long timeo, int *err)
+{
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+	int ret;
+
+	add_wait_queue(sk_sleep(sk), &wait);
+	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	ret = sk_wait_event(sk, &timeo,
+			    !list_empty(&psock->ingress_msg) ||
+			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
+	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
+	remove_wait_queue(sk_sleep(sk), &wait);
+	return ret;
+}
+
+int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
+		      struct msghdr *msg, int len)
+{
+	struct iov_iter *iter = &msg->msg_iter;
+	int i, ret, copied = 0;
+
+	while (copied != len) {
+		struct scatterlist *sge;
+		struct sk_msg *msg_rx;
+
+		msg_rx = list_first_entry_or_null(&psock->ingress_msg,
+						  struct sk_msg, list);
+		if (unlikely(!msg_rx))
+			break;
+
+		i = msg_rx->sg.start;
+		do {
+			struct page *page;
+			int copy;
+
+			sge = sk_msg_elem(msg_rx, i);
+			copy = sge->length;
+			page = sg_page(sge);
+			if (copied + copy > len)
+				copy = len - copied;
+			ret = copy_page_to_iter(page, sge->offset, copy, iter);
+			if (ret != copy) {
+				msg_rx->sg.start = i;
+				return -EFAULT;
+			}
+
+			copied += copy;
+			sge->offset += copy;
+			sge->length -= copy;
+			sk_mem_uncharge(sk, copy);
+			if (!sge->length) {
+				i++;
+				if (i == MAX_SKB_FRAGS)
+					i = 0;
+				if (!msg_rx->skb)
+					put_page(page);
+			}
+
+			if (copied == len)
+				break;
+		} while (i != msg_rx->sg.end);
+
+		msg_rx->sg.start = i;
+		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
+			list_del(&msg_rx->list);
+			if (msg_rx->skb)
+				consume_skb(msg_rx->skb);
+			kfree(msg_rx);
+		}
+	}
+
+	return copied;
+}
+EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
+
+int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+		    int nonblock, int flags, int *addr_len)
+{
+	struct sk_psock *psock;
+	int copied, ret;
+
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return inet_recv_error(sk, msg, len, addr_len);
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+	lock_sock(sk);
+msg_bytes_ready:
+	copied = __tcp_bpf_recvmsg(sk, psock, msg, len);
+	if (!copied) {
+		int data, err = 0;
+		long timeo;
+
+		timeo = sock_rcvtimeo(sk, nonblock);
+		data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
+		if (data) {
+			if (skb_queue_empty(&sk->sk_receive_queue))
+				goto msg_bytes_ready;
+			release_sock(sk);
+			sk_psock_put(sk, psock);
+			return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+		}
+		if (err) {
+			ret = err;
+			goto out;
+		}
+	}
+	ret = copied;
+out:
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return ret;
+}
+
+static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+			   struct sk_msg *msg, u32 apply_bytes, int flags)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sge;
+	u32 size, copied = 0;
+	struct sk_msg *tmp;
+	int i, ret = 0;
+
+	tmp = kzalloc(sizeof(*tmp), __GFP_NOWARN | GFP_KERNEL);
+	if (unlikely(!tmp))
+		return -ENOMEM;
+
+	lock_sock(sk);
+	tmp->sg.start = msg->sg.start;
+	i = msg->sg.start;
+	do {
+		sge = sk_msg_elem(msg, i);
+		size = (apply && apply_bytes < sge->length) ?
+			apply_bytes : sge->length;
+		if (!sk_wmem_schedule(sk, size)) {
+			if (!copied)
+				ret = -ENOMEM;
+			break;
+		}
+
+		sk_mem_charge(sk, size);
+		sk_msg_xfer(tmp, msg, i, size);
+		copied += size;
+		if (sge->length)
+			get_page(sk_msg_page(tmp, i));
+		sk_msg_iter_var_next(i);
+		tmp->sg.end = i;
+		if (apply) {
+			apply_bytes -= size;
+			if (!apply_bytes)
+				break;
+		}
+	} while (i != msg->sg.end);
+
+	if (!ret) {
+		msg->sg.start = i;
+		msg->sg.size -= apply_bytes;
+		sk_psock_queue_msg(psock, tmp);
+		sk->sk_data_ready(sk);
+	} else {
+		sk_msg_free(sk, tmp);
+		kfree(tmp);
+	}
+
+	release_sock(sk);
+	return ret;
+}
+
+static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
+			int flags, bool uncharge)
+{
+	bool apply = apply_bytes;
+	struct scatterlist *sge;
+	struct page *page;
+	int size, ret = 0;
+	u32 off;
+
+	while (1) {
+		sge = sk_msg_elem(msg, msg->sg.start);
+		size = (apply && apply_bytes < sge->length) ?
+			apply_bytes : sge->length;
+		off  = sge->offset;
+		page = sg_page(sge);
+
+		tcp_rate_check_app_limited(sk);
+retry:
+		ret = do_tcp_sendpages(sk, page, off, size, flags);
+		if (ret <= 0)
+			return ret;
+		if (apply)
+			apply_bytes -= ret;
+		msg->sg.size -= ret;
+		sge->offset += ret;
+		sge->length -= ret;
+		if (uncharge)
+			sk_mem_uncharge(sk, ret);
+		if (ret != size) {
+			size -= ret;
+			off  += ret;
+			goto retry;
+		}
+		if (!sge->length) {
+			put_page(page);
+			sk_msg_iter_next(msg, start);
+			sg_init_table(sge, 1);
+			if (msg->sg.start == msg->sg.end)
+				break;
+		}
+		if (apply && !apply_bytes)
+			break;
+	}
+
+	return 0;
+}
+
+static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
+			       u32 apply_bytes, int flags, bool uncharge)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = tcp_bpf_push(sk, msg, apply_bytes, flags, uncharge);
+	release_sock(sk);
+	return ret;
+}
+
+int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
+			  u32 bytes, int flags)
+{
+	bool ingress = sk_msg_to_ingress(msg);
+	struct sk_psock *psock = sk_psock_get(sk);
+	int ret;
+
+	if (unlikely(!psock)) {
+		sk_msg_free(sk, msg);
+		return 0;
+	}
+	ret = ingress ? bpf_tcp_ingress(sk, psock, msg, bytes, flags) :
+			tcp_bpf_push_locked(sk, msg, bytes, flags, false);
+	sk_psock_put(sk, psock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
+
+static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
+				struct sk_msg *msg, int *copied, int flags)
+{
+	bool cork = false, enospc = msg->sg.start == msg->sg.end;
+	struct sock *sk_redir;
+	u32 tosend;
+	int ret;
+
+more_data:
+	if (psock->eval == __SK_NONE)
+		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+
+	if (msg->cork_bytes &&
+	    msg->cork_bytes > msg->sg.size && !enospc) {
+		psock->cork_bytes = msg->cork_bytes - msg->sg.size;
+		if (!psock->cork) {
+			psock->cork = kzalloc(sizeof(*psock->cork),
+					      GFP_ATOMIC | __GFP_NOWARN);
+			if (!psock->cork)
+				return -ENOMEM;
+		}
+		memcpy(psock->cork, msg, sizeof(*msg));
+		return 0;
+	}
+
+	tosend = msg->sg.size;
+	if (psock->apply_bytes && psock->apply_bytes < tosend)
+		tosend = psock->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		ret = tcp_bpf_push(sk, msg, tosend, flags, true);
+		if (unlikely(ret)) {
+			*copied -= sk_msg_free(sk, msg);
+			break;
+		}
+		sk_msg_apply_bytes(psock, tosend);
+		break;
+	case __SK_REDIRECT:
+		sk_redir = psock->sk_redir;
+		sk_msg_apply_bytes(psock, tosend);
+		if (psock->cork) {
+			cork = true;
+			psock->cork = NULL;
+		}
+		sk_msg_return(sk, msg, tosend);
+		release_sock(sk);
+		ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
+		lock_sock(sk);
+		if (unlikely(ret < 0)) {
+			int free = sk_msg_free_nocharge(sk, msg);
+
+			if (!cork)
+				*copied -= free;
+		}
+		if (cork) {
+			sk_msg_free(sk, msg);
+			kfree(msg);
+			msg = NULL;
+			ret = 0;
+		}
+		break;
+	case __SK_DROP:
+	default:
+		sk_msg_free_partial(sk, msg, tosend);
+		sk_msg_apply_bytes(psock, tosend);
+		*copied -= tosend;
+		return -EACCES;
+	}
+
+	if (likely(!ret)) {
+		if (!psock->apply_bytes) {
+			psock->eval =  __SK_NONE;
+			if (psock->sk_redir) {
+				sock_put(psock->sk_redir);
+				psock->sk_redir = NULL;
+			}
+		}
+		if (msg &&
+		    msg->sg.data[msg->sg.start].page_link &&
+		    msg->sg.data[msg->sg.start].length)
+			goto more_data;
+	}
+	return ret;
+}
+
+static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+	struct sk_msg tmp, *msg_tx = NULL;
+	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
+	int copied = 0, err = 0;
+	struct sk_psock *psock;
+	long timeo;
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_sendmsg(sk, msg, size);
+
+	lock_sock(sk);
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	while (msg_data_left(msg)) {
+		bool enospc = false;
+		u32 copy, osize;
+
+		if (sk->sk_err) {
+			err = -sk->sk_err;
+			goto out_err;
+		}
+
+		copy = msg_data_left(msg);
+		if (!sk_stream_memory_free(sk))
+			goto wait_for_sndbuf;
+		if (psock->cork) {
+			msg_tx = psock->cork;
+		} else {
+			msg_tx = &tmp;
+			sk_msg_init(msg_tx);
+		}
+
+		osize = msg_tx->sg.size;
+		err = sk_msg_alloc(sk, msg_tx, msg_tx->sg.size + copy, msg_tx->sg.end - 1);
+		if (err) {
+			if (err != -ENOSPC)
+				goto wait_for_memory;
+			enospc = true;
+			copy = msg_tx->sg.size - osize;
+		}
+
+		err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx,
+					       copy);
+		if (err < 0) {
+			sk_msg_trim(sk, msg_tx, osize);
+			goto out_err;
+		}
+
+		copied += copy;
+		if (psock->cork_bytes) {
+			if (size > psock->cork_bytes)
+				psock->cork_bytes = 0;
+			else
+				psock->cork_bytes -= size;
+			if (psock->cork_bytes && !enospc)
+				goto out_err;
+			/* All cork bytes are accounted, rerun the prog. */
+			psock->eval = __SK_NONE;
+			psock->cork_bytes = 0;
+		}
+
+		err = tcp_bpf_send_verdict(sk, psock, msg_tx, &copied, flags);
+		if (unlikely(err < 0))
+			goto out_err;
+		continue;
+wait_for_sndbuf:
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+wait_for_memory:
+		err = sk_stream_wait_memory(sk, &timeo);
+		if (err) {
+			if (msg_tx && msg_tx != psock->cork)
+				sk_msg_free(sk, msg_tx);
+			goto out_err;
+		}
+	}
+out_err:
+	if (err < 0)
+		err = sk_stream_error(sk, msg->msg_flags, err);
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return copied ? copied : err;
+}
+
+static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
+			    size_t size, int flags)
+{
+	struct sk_msg tmp, *msg = NULL;
+	int err = 0, copied = 0;
+	struct sk_psock *psock;
+	bool enospc = false;
+
+	psock = sk_psock_get(sk);
+	if (unlikely(!psock))
+		return tcp_sendpage(sk, page, offset, size, flags);
+
+	lock_sock(sk);
+	if (psock->cork) {
+		msg = psock->cork;
+	} else {
+		msg = &tmp;
+		sk_msg_init(msg);
+	}
+
+	/* Catch case where ring is full and sendpage is stalled. */
+	if (unlikely(sk_msg_full(msg)))
+		goto out_err;
+
+	sk_msg_page_add(msg, page, size, offset);
+	sk_mem_charge(sk, size);
+	copied = size;
+	if (sk_msg_full(msg))
+		enospc = true;
+	if (psock->cork_bytes) {
+		if (size > psock->cork_bytes)
+			psock->cork_bytes = 0;
+		else
+			psock->cork_bytes -= size;
+		if (psock->cork_bytes && !enospc)
+			goto out_err;
+		/* All cork bytes are accounted, rerun the prog. */
+		psock->eval = __SK_NONE;
+		psock->cork_bytes = 0;
+	}
+
+	err = tcp_bpf_send_verdict(sk, psock, msg, &copied, flags);
+out_err:
+	release_sock(sk);
+	sk_psock_put(sk, psock);
+	return copied ? copied : err;
+}
+
+static void tcp_bpf_remove(struct sock *sk, struct sk_psock *psock)
+{
+	struct sk_psock_link *link;
+
+	sk_psock_cork_free(psock);
+	__sk_psock_purge_ingress_msg(psock);
+	while ((link = sk_psock_link_pop(psock))) {
+		sk_psock_unlink(sk, link);
+		sk_psock_free_link(link);
+	}
+}
+
+static void tcp_bpf_unhash(struct sock *sk)
+{
+	void (*saved_unhash)(struct sock *sk);
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		if (sk->sk_prot->unhash)
+			sk->sk_prot->unhash(sk);
+		return;
+	}
+
+	saved_unhash = psock->saved_unhash;
+	tcp_bpf_remove(sk, psock);
+	rcu_read_unlock();
+	saved_unhash(sk);
+}
+
+static void tcp_bpf_close(struct sock *sk, long timeout)
+{
+	void (*saved_close)(struct sock *sk, long timeout);
+	struct sk_psock *psock;
+
+	lock_sock(sk);
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		release_sock(sk);
+		return sk->sk_prot->close(sk, timeout);
+	}
+
+	saved_close = psock->saved_close;
+	tcp_bpf_remove(sk, psock);
+	rcu_read_unlock();
+	release_sock(sk);
+	saved_close(sk, timeout);
+}
+
+enum {
+	TCP_BPF_IPV4,
+	TCP_BPF_IPV6,
+	TCP_BPF_NUM_PROTS,
+};
+
+enum {
+	TCP_BPF_BASE,
+	TCP_BPF_TX,
+	TCP_BPF_NUM_CFGS,
+};
+
+static struct proto *tcpv6_prot_saved __read_mostly;
+static DEFINE_SPINLOCK(tcpv6_prot_lock);
+static struct proto tcp_bpf_prots[TCP_BPF_NUM_PROTS][TCP_BPF_NUM_CFGS];
+
+static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS],
+				   struct proto *base)
+{
+	prot[TCP_BPF_BASE]			= *base;
+	prot[TCP_BPF_BASE].unhash		= tcp_bpf_unhash;
+	prot[TCP_BPF_BASE].close		= tcp_bpf_close;
+	prot[TCP_BPF_BASE].recvmsg		= tcp_bpf_recvmsg;
+	prot[TCP_BPF_BASE].stream_memory_read	= tcp_bpf_stream_read;
+
+	prot[TCP_BPF_TX]			= prot[TCP_BPF_BASE];
+	prot[TCP_BPF_TX].sendmsg		= tcp_bpf_sendmsg;
+	prot[TCP_BPF_TX].sendpage		= tcp_bpf_sendpage;
+}
+
+static void tcp_bpf_check_v6_needs_rebuild(struct sock *sk, struct proto *ops)
+{
+	if (sk->sk_family == AF_INET6 &&
+	    unlikely(ops != smp_load_acquire(&tcpv6_prot_saved))) {
+		spin_lock_bh(&tcpv6_prot_lock);
+		if (likely(ops != tcpv6_prot_saved)) {
+			tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV6], ops);
+			smp_store_release(&tcpv6_prot_saved, ops);
+		}
+		spin_unlock_bh(&tcpv6_prot_lock);
+	}
+}
+
+static int __init tcp_bpf_v4_build_proto(void)
+{
+	tcp_bpf_rebuild_protos(tcp_bpf_prots[TCP_BPF_IPV4], &tcp_prot);
+	return 0;
+}
+core_initcall(tcp_bpf_v4_build_proto);
+
+static void tcp_bpf_update_sk_prot(struct sock *sk, struct sk_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
+
+	sk_psock_update_proto(sk, psock, &tcp_bpf_prots[family][config]);
+}
+
+static void tcp_bpf_reinit_sk_prot(struct sock *sk, struct sk_psock *psock)
+{
+	int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
+	int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
+
+	/* Reinit occurs when program types change e.g. TCP_BPF_TX is removed
+	 * or added requiring sk_prot hook updates. We keep original saved
+	 * hooks in this case.
+	 */
+	sk->sk_prot = &tcp_bpf_prots[family][config];
+}
+
+static int tcp_bpf_assert_proto_ops(struct proto *ops)
+{
+	/* In order to avoid retpoline, we make assumptions when we call
+	 * into ops if e.g. a psock is not present. Make sure they are
+	 * indeed valid assumptions.
+	 */
+	return ops->recvmsg  == tcp_recvmsg &&
+	       ops->sendmsg  == tcp_sendmsg &&
+	       ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
+}
+
+void tcp_bpf_reinit(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	sock_owned_by_me(sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	tcp_bpf_reinit_sk_prot(sk, psock);
+	rcu_read_unlock();
+}
+
+int tcp_bpf_init(struct sock *sk)
+{
+	struct proto *ops = READ_ONCE(sk->sk_prot);
+	struct sk_psock *psock;
+
+	sock_owned_by_me(sk);
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (unlikely(!psock || psock->sk_proto ||
+		     tcp_bpf_assert_proto_ops(ops))) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+	tcp_bpf_check_v6_needs_rebuild(sk, ops);
+	tcp_bpf_update_sk_prot(sk, psock);
+	rcu_read_unlock();
+	return 0;
+}
diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig
index 6cff3f6d0c3a..94da19a2a220 100644
--- a/net/strparser/Kconfig
+++ b/net/strparser/Kconfig
@@ -1,4 +1,2 @@
-
 config STREAM_PARSER
-	tristate
-	default n
+	def_bool n
-- 
cgit v1.2.3


From d829e9c4112b52f4f00195900fd4c685f61365ab Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 13 Oct 2018 02:45:59 +0200
Subject: tls: convert to generic sk_msg interface

Convert kTLS over to make use of sk_msg interface for plaintext and
encrypted scattergather data, so it reuses all the sk_msg helpers
and data structure which later on in a second step enables to glue
this to BPF.

This also allows to remove quite a bit of open coded helpers which
are covered by the sk_msg API. Recent changes in kTLs 80ece6a03aaf
("tls: Remove redundant vars from tls record structure") and
4e6d47206c32 ("tls: Add support for inplace records encryption")
changed the data path handling a bit; while we've kept the latter
optimization intact, we had to undo the former change to better
fit the sk_msg model, hence the sg_aead_in and sg_aead_out have
been brought back and are linked into the sk_msg sgs. Now the kTLS
record contains a msg_plaintext and msg_encrypted sk_msg each.

In the original code, the zerocopy_from_iter() has been used out
of TX but also RX path. For the strparser skb-based RX path,
we've left the zerocopy_from_iter() in decrypt_internal() mostly
untouched, meaning it has been moved into tls_setup_from_iter()
with charging logic removed (as not used from RX). Given RX path
is not based on sk_msg objects, we haven't pursued setting up a
dummy sk_msg to call into sk_msg_zerocopy_from_iter(), but it
could be an option to prusue in a later step.

Joint work with John.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/skmsg.h |   2 +
 include/net/sock.h    |   4 -
 include/net/tls.h     |  18 +-
 net/core/skmsg.c      |  39 ++++
 net/core/sock.c       |  61 ------
 net/tls/Kconfig       |   1 +
 net/tls/tls_device.c  |   2 +-
 net/tls/tls_sw.c      | 511 ++++++++++++++++++--------------------------------
 8 files changed, 236 insertions(+), 402 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 95678103c4a0..4e84b3c2eff8 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -102,6 +102,8 @@ struct sk_psock {
 
 int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
 		 int elem_first_coalesce);
+int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
+		 u32 off, u32 len);
 void sk_msg_trim(struct sock *sk, struct sk_msg *msg, int len);
 int sk_msg_free(struct sock *sk, struct sk_msg *msg);
 int sk_msg_free_nocharge(struct sock *sk, struct sk_msg *msg);
diff --git a/include/net/sock.h b/include/net/sock.h
index 751549ac0a84..7470c45d182d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2214,10 +2214,6 @@ static inline struct page_frag *sk_page_frag(struct sock *sk)
 
 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
 
-int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		int sg_start, int *sg_curr, unsigned int *sg_size,
-		int first_coalesce);
-
 /*
  *	Default write policy as shown to user space via poll/select/SIGIO
  */
diff --git a/include/net/tls.h b/include/net/tls.h
index 5e853835597e..3d22d8a59be7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -39,6 +39,8 @@
 #include <linux/crypto.h>
 #include <linux/socket.h>
 #include <linux/tcp.h>
+#include <linux/skmsg.h>
+
 #include <net/tcp.h>
 #include <net/strparser.h>
 #include <crypto/aead.h>
@@ -103,15 +105,13 @@ struct tls_rec {
 	int tx_flags;
 	int inplace_crypto;
 
-	/* AAD | sg_plaintext_data | sg_tag */
-	struct scatterlist sg_plaintext_data[MAX_SKB_FRAGS + 1];
-	/* AAD | sg_encrypted_data (data contain overhead for hdr&iv&tag) */
-	struct scatterlist sg_encrypted_data[MAX_SKB_FRAGS + 1];
+	struct sk_msg msg_plaintext;
+	struct sk_msg msg_encrypted;
 
-	unsigned int sg_plaintext_size;
-	unsigned int sg_encrypted_size;
-	int sg_plaintext_num_elem;
-	int sg_encrypted_num_elem;
+	/* AAD | msg_plaintext.sg.data | sg_tag */
+	struct scatterlist sg_aead_in[2];
+	/* AAD | msg_encrypted.sg.data (data contains overhead for hdr & iv & tag) */
+	struct scatterlist sg_aead_out[2];
 
 	char aad_space[TLS_AAD_SPACE_SIZE];
 	struct aead_request aead_req;
@@ -223,8 +223,8 @@ struct tls_context {
 
 	unsigned long flags;
 	bool in_tcp_sendpages;
+	bool pending_open_record_frags;
 
-	u16 pending_open_record_frags;
 	int (*push_pending_record)(struct sock *sk, int flags);
 
 	void (*sk_write_space)(struct sock *sk);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ae2b281c9c57..56a99d0c9aa0 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -73,6 +73,45 @@ int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
 }
 EXPORT_SYMBOL_GPL(sk_msg_alloc);
 
+int sk_msg_clone(struct sock *sk, struct sk_msg *dst, struct sk_msg *src,
+		 u32 off, u32 len)
+{
+	int i = src->sg.start;
+	struct scatterlist *sge = sk_msg_elem(src, i);
+	u32 sge_len, sge_off;
+
+	if (sk_msg_full(dst))
+		return -ENOSPC;
+
+	while (off) {
+		if (sge->length > off)
+			break;
+		off -= sge->length;
+		sk_msg_iter_var_next(i);
+		if (i == src->sg.end && off)
+			return -ENOSPC;
+		sge = sk_msg_elem(src, i);
+	}
+
+	while (len) {
+		sge_len = sge->length - off;
+		sge_off = sge->offset + off;
+		if (sge_len > len)
+			sge_len = len;
+		off = 0;
+		len -= sge_len;
+		sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off);
+		sk_mem_charge(sk, sge_len);
+		sk_msg_iter_var_next(i);
+		if (i == src->sg.end && len)
+			return -ENOSPC;
+		sge = sk_msg_elem(src, i);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sk_msg_clone);
+
 void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes)
 {
 	int i = msg->sg.start;
diff --git a/net/core/sock.c b/net/core/sock.c
index 7e8796a6a089..52e4f1c16b1e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2238,67 +2238,6 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
 }
 EXPORT_SYMBOL(sk_page_frag_refill);
 
-int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
-		int sg_start, int *sg_curr_index, unsigned int *sg_curr_size,
-		int first_coalesce)
-{
-	int sg_curr = *sg_curr_index, use = 0, rc = 0;
-	unsigned int size = *sg_curr_size;
-	struct page_frag *pfrag;
-	struct scatterlist *sge;
-
-	len -= size;
-	pfrag = sk_page_frag(sk);
-
-	while (len > 0) {
-		unsigned int orig_offset;
-
-		if (!sk_page_frag_refill(sk, pfrag)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		use = min_t(int, len, pfrag->size - pfrag->offset);
-
-		if (!sk_wmem_schedule(sk, use)) {
-			rc = -ENOMEM;
-			goto out;
-		}
-
-		sk_mem_charge(sk, use);
-		size += use;
-		orig_offset = pfrag->offset;
-		pfrag->offset += use;
-
-		sge = sg + sg_curr - 1;
-		if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
-		    sge->offset + sge->length == orig_offset) {
-			sge->length += use;
-		} else {
-			sge = sg + sg_curr;
-			sg_unmark_end(sge);
-			sg_set_page(sge, pfrag->page, use, orig_offset);
-			get_page(pfrag->page);
-			sg_curr++;
-
-			if (sg_curr == MAX_SKB_FRAGS)
-				sg_curr = 0;
-
-			if (sg_curr == sg_start) {
-				rc = -ENOSPC;
-				break;
-			}
-		}
-
-		len -= use;
-	}
-out:
-	*sg_curr_size = size;
-	*sg_curr_index = sg_curr;
-	return rc;
-}
-EXPORT_SYMBOL(sk_alloc_sg);
-
 static void __lock_sock(struct sock *sk)
 	__releases(&sk->sk_lock.slock)
 	__acquires(&sk->sk_lock.slock)
diff --git a/net/tls/Kconfig b/net/tls/Kconfig
index 73f05ece53d0..99c1a19c17b1 100644
--- a/net/tls/Kconfig
+++ b/net/tls/Kconfig
@@ -8,6 +8,7 @@ config TLS
 	select CRYPTO_AES
 	select CRYPTO_GCM
 	select STREAM_PARSER
+	select NET_SOCK_MSG
 	default n
 	---help---
 	Enable kernel support for TLS protocol. This allows symmetric
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 961b07d4d41c..276edbc04f38 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -421,7 +421,7 @@ last_record:
 			tls_push_record_flags = flags;
 			if (more) {
 				tls_ctx->pending_open_record_frags =
-						record->num_frags;
+						!!record->num_frags;
 				break;
 			}
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index aa9fdce272b6..5043b0be1448 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -213,153 +213,49 @@ static int tls_do_decryption(struct sock *sk,
 	return ret;
 }
 
-static void trim_sg(struct sock *sk, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size, int target_size)
-{
-	int i = *sg_num_elem - 1;
-	int trim = *sg_size - target_size;
-
-	if (trim <= 0) {
-		WARN_ON(trim < 0);
-		return;
-	}
-
-	*sg_size = target_size;
-	while (trim >= sg[i].length) {
-		trim -= sg[i].length;
-		sk_mem_uncharge(sk, sg[i].length);
-		put_page(sg_page(&sg[i]));
-		i--;
-
-		if (i < 0)
-			goto out;
-	}
-
-	sg[i].length -= trim;
-	sk_mem_uncharge(sk, trim);
-
-out:
-	*sg_num_elem = i + 1;
-}
-
-static void trim_both_sgl(struct sock *sk, int target_size)
+static void tls_trim_both_msgs(struct sock *sk, int target_size)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
 
-	trim_sg(sk, &rec->sg_plaintext_data[1],
-		&rec->sg_plaintext_num_elem,
-		&rec->sg_plaintext_size,
-		target_size);
-
+	sk_msg_trim(sk, &rec->msg_plaintext, target_size);
 	if (target_size > 0)
 		target_size += tls_ctx->tx.overhead_size;
-
-	trim_sg(sk, &rec->sg_encrypted_data[1],
-		&rec->sg_encrypted_num_elem,
-		&rec->sg_encrypted_size,
-		target_size);
+	sk_msg_trim(sk, &rec->msg_encrypted, target_size);
 }
 
-static int alloc_encrypted_sg(struct sock *sk, int len)
+static int tls_alloc_encrypted_msg(struct sock *sk, int len)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
-	int rc = 0;
-
-	rc = sk_alloc_sg(sk, len,
-			 &rec->sg_encrypted_data[1], 0,
-			 &rec->sg_encrypted_num_elem,
-			 &rec->sg_encrypted_size, 0);
-
-	if (rc == -ENOSPC)
-		rec->sg_encrypted_num_elem =
-			ARRAY_SIZE(rec->sg_encrypted_data) - 1;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
 
-	return rc;
+	return sk_msg_alloc(sk, msg_en, len, 0);
 }
 
-static int move_to_plaintext_sg(struct sock *sk, int required_size)
+static int tls_clone_plaintext_msg(struct sock *sk, int required)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
-	struct scatterlist *plain_sg = &rec->sg_plaintext_data[1];
-	struct scatterlist *enc_sg = &rec->sg_encrypted_data[1];
-	int enc_sg_idx = 0;
+	struct sk_msg *msg_pl = &rec->msg_plaintext;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
 	int skip, len;
 
-	if (rec->sg_plaintext_num_elem == MAX_SKB_FRAGS)
-		return -ENOSPC;
-
-	/* We add page references worth len bytes from enc_sg at the
-	 * end of plain_sg. It is guaranteed that sg_encrypted_data
+	/* We add page references worth len bytes from encrypted sg
+	 * at the end of plaintext sg. It is guaranteed that msg_en
 	 * has enough required room (ensured by caller).
 	 */
-	len = required_size - rec->sg_plaintext_size;
+	len = required - msg_pl->sg.size;
 
-	/* Skip initial bytes in sg_encrypted_data to be able
-	 * to use same offset of both plain and encrypted data.
+	/* Skip initial bytes in msg_en's data to be able to use
+	 * same offset of both plain and encrypted data.
 	 */
-	skip = tls_ctx->tx.prepend_size + rec->sg_plaintext_size;
-
-	while (enc_sg_idx < rec->sg_encrypted_num_elem) {
-		if (enc_sg[enc_sg_idx].length > skip)
-			break;
-
-		skip -= enc_sg[enc_sg_idx].length;
-		enc_sg_idx++;
-	}
+	skip = tls_ctx->tx.prepend_size + msg_pl->sg.size;
 
-	/* unmark the end of plain_sg*/
-	sg_unmark_end(plain_sg + rec->sg_plaintext_num_elem - 1);
-
-	while (len) {
-		struct page *page = sg_page(&enc_sg[enc_sg_idx]);
-		int bytes = enc_sg[enc_sg_idx].length - skip;
-		int offset = enc_sg[enc_sg_idx].offset + skip;
-
-		if (bytes > len)
-			bytes = len;
-		else
-			enc_sg_idx++;
-
-		/* Skipping is required only one time */
-		skip = 0;
-
-		/* Increment page reference */
-		get_page(page);
-
-		sg_set_page(&plain_sg[rec->sg_plaintext_num_elem], page,
-			    bytes, offset);
-
-		sk_mem_charge(sk, bytes);
-
-		len -= bytes;
-		rec->sg_plaintext_size += bytes;
-
-		rec->sg_plaintext_num_elem++;
-
-		if (rec->sg_plaintext_num_elem == MAX_SKB_FRAGS)
-			return -ENOSPC;
-	}
-
-	return 0;
-}
-
-static void free_sg(struct sock *sk, struct scatterlist *sg,
-		    int *sg_num_elem, unsigned int *sg_size)
-{
-	int i, n = *sg_num_elem;
-
-	for (i = 0; i < n; ++i) {
-		sk_mem_uncharge(sk, sg[i].length);
-		put_page(sg_page(&sg[i]));
-	}
-	*sg_num_elem = 0;
-	*sg_size = 0;
+	return sk_msg_clone(sk, msg_pl, msg_en, skip, len);
 }
 
 static void tls_free_open_rec(struct sock *sk)
@@ -372,14 +268,8 @@ static void tls_free_open_rec(struct sock *sk)
 	if (!rec)
 		return;
 
-	free_sg(sk, &rec->sg_encrypted_data[1],
-		&rec->sg_encrypted_num_elem,
-		&rec->sg_encrypted_size);
-
-	free_sg(sk, &rec->sg_plaintext_data[1],
-		&rec->sg_plaintext_num_elem,
-		&rec->sg_plaintext_size);
-
+	sk_msg_free(sk, &rec->msg_encrypted);
+	sk_msg_free(sk, &rec->msg_plaintext);
 	kfree(rec);
 }
 
@@ -388,6 +278,7 @@ int tls_tx_records(struct sock *sk, int flags)
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec, *tmp;
+	struct sk_msg *msg_en;
 	int tx_flags, rc = 0;
 
 	if (tls_is_partially_sent_record(tls_ctx)) {
@@ -407,9 +298,7 @@ int tls_tx_records(struct sock *sk, int flags)
 		 * Remove the head of tx_list
 		 */
 		list_del(&rec->list);
-		free_sg(sk, &rec->sg_plaintext_data[1],
-			&rec->sg_plaintext_num_elem, &rec->sg_plaintext_size);
-
+		sk_msg_free(sk, &rec->msg_plaintext);
 		kfree(rec);
 	}
 
@@ -421,17 +310,15 @@ int tls_tx_records(struct sock *sk, int flags)
 			else
 				tx_flags = flags;
 
+			msg_en = &rec->msg_encrypted;
 			rc = tls_push_sg(sk, tls_ctx,
-					 &rec->sg_encrypted_data[1],
+					 &msg_en->sg.data[msg_en->sg.curr],
 					 0, tx_flags);
 			if (rc)
 				goto tx_err;
 
 			list_del(&rec->list);
-			free_sg(sk, &rec->sg_plaintext_data[1],
-				&rec->sg_plaintext_num_elem,
-				&rec->sg_plaintext_size);
-
+			sk_msg_free(sk, &rec->msg_plaintext);
 			kfree(rec);
 		} else {
 			break;
@@ -451,15 +338,18 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 	struct sock *sk = req->data;
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct scatterlist *sge;
+	struct sk_msg *msg_en;
 	struct tls_rec *rec;
 	bool ready = false;
 	int pending;
 
 	rec = container_of(aead_req, struct tls_rec, aead_req);
+	msg_en = &rec->msg_encrypted;
 
-	rec->sg_encrypted_data[1].offset -= tls_ctx->tx.prepend_size;
-	rec->sg_encrypted_data[1].length += tls_ctx->tx.prepend_size;
-
+	sge = sk_msg_elem(msg_en, msg_en->sg.curr);
+	sge->offset -= tls_ctx->tx.prepend_size;
+	sge->length += tls_ctx->tx.prepend_size;
 
 	/* Check if error is previously set on socket */
 	if (err || sk->sk_err) {
@@ -497,31 +387,29 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 
 	/* Schedule the transmission */
 	if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
-		schedule_delayed_work(&ctx->tx_work.work, 2);
+		schedule_delayed_work(&ctx->tx_work.work, 1);
 }
 
 static int tls_do_encryption(struct sock *sk,
 			     struct tls_context *tls_ctx,
 			     struct tls_sw_context_tx *ctx,
 			     struct aead_request *aead_req,
-			     size_t data_len)
+			     size_t data_len, u32 start)
 {
 	struct tls_rec *rec = ctx->open_rec;
-	struct scatterlist *plain_sg = rec->sg_plaintext_data;
-	struct scatterlist *enc_sg = rec->sg_encrypted_data;
+	struct sk_msg *msg_en = &rec->msg_encrypted;
+	struct scatterlist *sge = sk_msg_elem(msg_en, start);
 	int rc;
 
-	/* Skip the first index as it contains AAD data */
-	rec->sg_encrypted_data[1].offset += tls_ctx->tx.prepend_size;
-	rec->sg_encrypted_data[1].length -= tls_ctx->tx.prepend_size;
+	sge->offset += tls_ctx->tx.prepend_size;
+	sge->length -= tls_ctx->tx.prepend_size;
 
-	/* If it is inplace crypto, then pass same SG list as both src, dst */
-	if (rec->inplace_crypto)
-		plain_sg = enc_sg;
+	msg_en->sg.curr = start;
 
 	aead_request_set_tfm(aead_req, ctx->aead_send);
 	aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);
-	aead_request_set_crypt(aead_req, plain_sg, enc_sg,
+	aead_request_set_crypt(aead_req, rec->sg_aead_in,
+			       rec->sg_aead_out,
 			       data_len, tls_ctx->tx.iv);
 
 	aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
@@ -534,8 +422,8 @@ static int tls_do_encryption(struct sock *sk,
 	rc = crypto_aead_encrypt(aead_req);
 	if (!rc || rc != -EINPROGRESS) {
 		atomic_dec(&ctx->encrypt_pending);
-		rec->sg_encrypted_data[1].offset -= tls_ctx->tx.prepend_size;
-		rec->sg_encrypted_data[1].length += tls_ctx->tx.prepend_size;
+		sge->offset -= tls_ctx->tx.prepend_size;
+		sge->length += tls_ctx->tx.prepend_size;
 	}
 
 	if (!rc) {
@@ -557,35 +445,50 @@ static int tls_push_record(struct sock *sk, int flags,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec = ctx->open_rec;
+	struct sk_msg *msg_pl, *msg_en;
 	struct aead_request *req;
 	int rc;
+	u32 i;
 
 	if (!rec)
 		return 0;
 
+	msg_pl = &rec->msg_plaintext;
+	msg_en = &rec->msg_encrypted;
+
 	rec->tx_flags = flags;
 	req = &rec->aead_req;
 
-	sg_mark_end(rec->sg_plaintext_data + rec->sg_plaintext_num_elem);
-	sg_mark_end(rec->sg_encrypted_data + rec->sg_encrypted_num_elem);
+	i = msg_pl->sg.end;
+	sk_msg_iter_var_prev(i);
+	sg_mark_end(sk_msg_elem(msg_pl, i));
 
-	tls_make_aad(rec->aad_space, rec->sg_plaintext_size,
+	i = msg_pl->sg.start;
+	sg_chain(rec->sg_aead_in, 2, rec->inplace_crypto ?
+		 &msg_en->sg.data[i] : &msg_pl->sg.data[i]);
+
+	i = msg_en->sg.end;
+	sk_msg_iter_var_prev(i);
+	sg_mark_end(sk_msg_elem(msg_en, i));
+
+	i = msg_en->sg.start;
+	sg_chain(rec->sg_aead_out, 2, &msg_en->sg.data[i]);
+
+	tls_make_aad(rec->aad_space, msg_pl->sg.size,
 		     tls_ctx->tx.rec_seq, tls_ctx->tx.rec_seq_size,
 		     record_type);
 
 	tls_fill_prepend(tls_ctx,
-			 page_address(sg_page(&rec->sg_encrypted_data[1])) +
-			 rec->sg_encrypted_data[1].offset,
-			 rec->sg_plaintext_size, record_type);
+			 page_address(sg_page(&msg_en->sg.data[i])) +
+			 msg_en->sg.data[i].offset, msg_pl->sg.size,
+			 record_type);
 
-	tls_ctx->pending_open_record_frags = 0;
-
-	rc = tls_do_encryption(sk, tls_ctx, ctx, req, rec->sg_plaintext_size);
-	if (rc == -EINPROGRESS)
-		return -EINPROGRESS;
+	tls_ctx->pending_open_record_frags = false;
 
+	rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size, i);
 	if (rc < 0) {
-		tls_err_abort(sk, EBADMSG);
+		if (rc != -EINPROGRESS)
+			tls_err_abort(sk, EBADMSG);
 		return rc;
 	}
 
@@ -597,104 +500,11 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags)
 	return tls_push_record(sk, flags, TLS_RECORD_TYPE_DATA);
 }
 
-static int zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
-			      int length, int *pages_used,
-			      unsigned int *size_used,
-			      struct scatterlist *to, int to_max_pages,
-			      bool charge)
-{
-	struct page *pages[MAX_SKB_FRAGS];
-
-	size_t offset;
-	ssize_t copied, use;
-	int i = 0;
-	unsigned int size = *size_used;
-	int num_elem = *pages_used;
-	int rc = 0;
-	int maxpages;
-
-	while (length > 0) {
-		i = 0;
-		maxpages = to_max_pages - num_elem;
-		if (maxpages == 0) {
-			rc = -EFAULT;
-			goto out;
-		}
-		copied = iov_iter_get_pages(from, pages,
-					    length,
-					    maxpages, &offset);
-		if (copied <= 0) {
-			rc = -EFAULT;
-			goto out;
-		}
-
-		iov_iter_advance(from, copied);
-
-		length -= copied;
-		size += copied;
-		while (copied) {
-			use = min_t(int, copied, PAGE_SIZE - offset);
-
-			sg_set_page(&to[num_elem],
-				    pages[i], use, offset);
-			sg_unmark_end(&to[num_elem]);
-			if (charge)
-				sk_mem_charge(sk, use);
-
-			offset = 0;
-			copied -= use;
-
-			++i;
-			++num_elem;
-		}
-	}
-
-	/* Mark the end in the last sg entry if newly added */
-	if (num_elem > *pages_used)
-		sg_mark_end(&to[num_elem - 1]);
-out:
-	if (rc)
-		iov_iter_revert(from, size - *size_used);
-	*size_used = size;
-	*pages_used = num_elem;
-
-	return rc;
-}
-
-static int memcopy_from_iter(struct sock *sk, struct iov_iter *from,
-			     int bytes)
-{
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
-	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct tls_rec *rec = ctx->open_rec;
-	struct scatterlist *sg = &rec->sg_plaintext_data[1];
-	int copy, i, rc = 0;
-
-	for (i = tls_ctx->pending_open_record_frags;
-	     i < rec->sg_plaintext_num_elem; ++i) {
-		copy = sg[i].length;
-		if (copy_from_iter(
-				page_address(sg_page(&sg[i])) + sg[i].offset,
-				copy, from) != copy) {
-			rc = -EFAULT;
-			goto out;
-		}
-		bytes -= copy;
-
-		++tls_ctx->pending_open_record_frags;
-
-		if (!bytes)
-			break;
-	}
-
-out:
-	return rc;
-}
-
 static struct tls_rec *get_rec(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct sk_msg *msg_pl, *msg_en;
 	struct tls_rec *rec;
 	int mem_size;
 
@@ -708,15 +518,21 @@ static struct tls_rec *get_rec(struct sock *sk)
 	if (!rec)
 		return NULL;
 
-	sg_init_table(&rec->sg_plaintext_data[0],
-		      ARRAY_SIZE(rec->sg_plaintext_data));
-	sg_init_table(&rec->sg_encrypted_data[0],
-		      ARRAY_SIZE(rec->sg_encrypted_data));
+	msg_pl = &rec->msg_plaintext;
+	msg_en = &rec->msg_encrypted;
+
+	sk_msg_init(msg_pl);
+	sk_msg_init(msg_en);
 
-	sg_set_buf(&rec->sg_plaintext_data[0], rec->aad_space,
+	sg_init_table(rec->sg_aead_in, 2);
+	sg_set_buf(&rec->sg_aead_in[0], rec->aad_space,
 		   sizeof(rec->aad_space));
-	sg_set_buf(&rec->sg_encrypted_data[0], rec->aad_space,
+	sg_unmark_end(&rec->sg_aead_in[1]);
+
+	sg_init_table(rec->sg_aead_out, 2);
+	sg_set_buf(&rec->sg_aead_out[0], rec->aad_space,
 		   sizeof(rec->aad_space));
+	sg_unmark_end(&rec->sg_aead_out[1]);
 
 	ctx->open_rec = rec;
 	rec->inplace_crypto = 1;
@@ -735,6 +551,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	bool is_kvec = msg->msg_iter.type & ITER_KVEC;
 	bool eor = !(msg->msg_flags & MSG_MORE);
 	size_t try_to_copy, copied = 0;
+	struct sk_msg *msg_pl, *msg_en;
 	struct tls_rec *rec;
 	int required_size;
 	int num_async = 0;
@@ -778,23 +595,26 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 			goto send_end;
 		}
 
-		orig_size = rec->sg_plaintext_size;
+		msg_pl = &rec->msg_plaintext;
+		msg_en = &rec->msg_encrypted;
+
+		orig_size = msg_pl->sg.size;
 		full_record = false;
 		try_to_copy = msg_data_left(msg);
-		record_room = TLS_MAX_PAYLOAD_SIZE - rec->sg_plaintext_size;
+		record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
 		if (try_to_copy >= record_room) {
 			try_to_copy = record_room;
 			full_record = true;
 		}
 
-		required_size = rec->sg_plaintext_size + try_to_copy +
+		required_size = msg_pl->sg.size + try_to_copy +
 				tls_ctx->tx.overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
 
 alloc_encrypted:
-		ret = alloc_encrypted_sg(sk, required_size);
+		ret = tls_alloc_encrypted_msg(sk, required_size);
 		if (ret) {
 			if (ret != -ENOSPC)
 				goto wait_for_memory;
@@ -803,17 +623,13 @@ alloc_encrypted:
 			 * actually allocated. The difference is due
 			 * to max sg elements limit
 			 */
-			try_to_copy -= required_size - rec->sg_encrypted_size;
+			try_to_copy -= required_size - msg_en->sg.size;
 			full_record = true;
 		}
 
 		if (!is_kvec && (full_record || eor) && !async_capable) {
-			ret = zerocopy_from_iter(sk, &msg->msg_iter,
-				try_to_copy, &rec->sg_plaintext_num_elem,
-				&rec->sg_plaintext_size,
-				&rec->sg_plaintext_data[1],
-				ARRAY_SIZE(rec->sg_plaintext_data) - 1,
-				true);
+			ret = sk_msg_zerocopy_from_iter(sk, &msg->msg_iter,
+							msg_pl, try_to_copy);
 			if (ret)
 				goto fallback_to_reg_send;
 
@@ -831,15 +647,12 @@ alloc_encrypted:
 			continue;
 
 fallback_to_reg_send:
-			trim_sg(sk, &rec->sg_plaintext_data[1],
-				&rec->sg_plaintext_num_elem,
-				&rec->sg_plaintext_size,
-				orig_size);
+			sk_msg_trim(sk, msg_pl, orig_size);
 		}
 
-		required_size = rec->sg_plaintext_size + try_to_copy;
+		required_size = msg_pl->sg.size + try_to_copy;
 
-		ret = move_to_plaintext_sg(sk, required_size);
+		ret = tls_clone_plaintext_msg(sk, required_size);
 		if (ret) {
 			if (ret != -ENOSPC)
 				goto send_end;
@@ -848,20 +661,21 @@ fallback_to_reg_send:
 			 * actually allocated. The difference is due
 			 * to max sg elements limit
 			 */
-			try_to_copy -= required_size - rec->sg_plaintext_size;
+			try_to_copy -= required_size - msg_pl->sg.size;
 			full_record = true;
-
-			trim_sg(sk, &rec->sg_encrypted_data[1],
-				&rec->sg_encrypted_num_elem,
-				&rec->sg_encrypted_size,
-				rec->sg_plaintext_size +
-				tls_ctx->tx.overhead_size);
+			sk_msg_trim(sk, msg_en, msg_pl->sg.size +
+				    tls_ctx->tx.overhead_size);
 		}
 
-		ret = memcopy_from_iter(sk, &msg->msg_iter, try_to_copy);
-		if (ret)
+		ret = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_pl,
+					       try_to_copy);
+		if (ret < 0)
 			goto trim_sgl;
 
+		/* Open records defined only if successfully copied, otherwise
+		 * we would trim the sg but not reset the open record frags.
+		 */
+		tls_ctx->pending_open_record_frags = true;
 		copied += try_to_copy;
 		if (full_record || eor) {
 			ret = tls_push_record(sk, msg->msg_flags, record_type);
@@ -881,11 +695,11 @@ wait_for_memory:
 		ret = sk_stream_wait_memory(sk, &timeo);
 		if (ret) {
 trim_sgl:
-			trim_both_sgl(sk, orig_size);
+			tls_trim_both_msgs(sk, orig_size);
 			goto send_end;
 		}
 
-		if (rec->sg_encrypted_size < required_size)
+		if (msg_en->sg.size < required_size)
 			goto alloc_encrypted;
 	}
 
@@ -929,7 +743,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
 	size_t orig_size = size;
-	struct scatterlist *sg;
+	struct sk_msg *msg_pl;
 	struct tls_rec *rec;
 	int num_async = 0;
 	bool full_record;
@@ -970,20 +784,23 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 			goto sendpage_end;
 		}
 
+		msg_pl = &rec->msg_plaintext;
+
 		full_record = false;
-		record_room = TLS_MAX_PAYLOAD_SIZE - rec->sg_plaintext_size;
+		record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
 		copy = size;
 		if (copy >= record_room) {
 			copy = record_room;
 			full_record = true;
 		}
-		required_size = rec->sg_plaintext_size + copy +
-			      tls_ctx->tx.overhead_size;
+
+		required_size = msg_pl->sg.size + copy +
+				tls_ctx->tx.overhead_size;
 
 		if (!sk_stream_memory_free(sk))
 			goto wait_for_sndbuf;
 alloc_payload:
-		ret = alloc_encrypted_sg(sk, required_size);
+		ret = tls_alloc_encrypted_msg(sk, required_size);
 		if (ret) {
 			if (ret != -ENOSPC)
 				goto wait_for_memory;
@@ -992,26 +809,18 @@ alloc_payload:
 			 * actually allocated. The difference is due
 			 * to max sg elements limit
 			 */
-			copy -= required_size - rec->sg_plaintext_size;
+			copy -= required_size - msg_pl->sg.size;
 			full_record = true;
 		}
 
-		get_page(page);
-		sg = &rec->sg_plaintext_data[1] + rec->sg_plaintext_num_elem;
-		sg_set_page(sg, page, copy, offset);
-		sg_unmark_end(sg);
-
-		rec->sg_plaintext_num_elem++;
-
+		sk_msg_page_add(msg_pl, page, copy, offset);
 		sk_mem_charge(sk, copy);
+
 		offset += copy;
 		size -= copy;
-		rec->sg_plaintext_size += copy;
-		tls_ctx->pending_open_record_frags = rec->sg_plaintext_num_elem;
 
-		if (full_record || eor ||
-		    rec->sg_plaintext_num_elem ==
-		    ARRAY_SIZE(rec->sg_plaintext_data) - 1) {
+		tls_ctx->pending_open_record_frags = true;
+		if (full_record || eor || sk_msg_full(msg_pl)) {
 			rec->inplace_crypto = 0;
 			ret = tls_push_record(sk, flags, record_type);
 			if (ret) {
@@ -1027,7 +836,7 @@ wait_for_sndbuf:
 wait_for_memory:
 		ret = sk_stream_wait_memory(sk, &timeo);
 		if (ret) {
-			trim_both_sgl(sk, rec->sg_plaintext_size);
+			tls_trim_both_msgs(sk, msg_pl->sg.size);
 			goto sendpage_end;
 		}
 
@@ -1092,6 +901,64 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
 	return skb;
 }
 
+static int tls_setup_from_iter(struct sock *sk, struct iov_iter *from,
+			       int length, int *pages_used,
+			       unsigned int *size_used,
+			       struct scatterlist *to,
+			       int to_max_pages)
+{
+	int rc = 0, i = 0, num_elem = *pages_used, maxpages;
+	struct page *pages[MAX_SKB_FRAGS];
+	unsigned int size = *size_used;
+	ssize_t copied, use;
+	size_t offset;
+
+	while (length > 0) {
+		i = 0;
+		maxpages = to_max_pages - num_elem;
+		if (maxpages == 0) {
+			rc = -EFAULT;
+			goto out;
+		}
+		copied = iov_iter_get_pages(from, pages,
+					    length,
+					    maxpages, &offset);
+		if (copied <= 0) {
+			rc = -EFAULT;
+			goto out;
+		}
+
+		iov_iter_advance(from, copied);
+
+		length -= copied;
+		size += copied;
+		while (copied) {
+			use = min_t(int, copied, PAGE_SIZE - offset);
+
+			sg_set_page(&to[num_elem],
+				    pages[i], use, offset);
+			sg_unmark_end(&to[num_elem]);
+			/* We do not uncharge memory from this API */
+
+			offset = 0;
+			copied -= use;
+
+			i++;
+			num_elem++;
+		}
+	}
+	/* Mark the end in the last sg entry if newly added */
+	if (num_elem > *pages_used)
+		sg_mark_end(&to[num_elem - 1]);
+out:
+	if (rc)
+		iov_iter_revert(from, size - *size_used);
+	*size_used = size;
+	*pages_used = num_elem;
+
+	return rc;
+}
+
 /* This function decrypts the input skb into either out_iov or in out_sg
  * or in skb buffers itself. The input parameter 'zc' indicates if
  * zero-copy mode needs to be tried or not. With zero-copy mode, either
@@ -1189,9 +1056,9 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb,
 			sg_set_buf(&sgout[0], aad, TLS_AAD_SPACE_SIZE);
 
 			*chunk = 0;
-			err = zerocopy_from_iter(sk, out_iov, data_len, &pages,
-						 chunk, &sgout[1],
-						 (n_sgout - 1), false);
+			err = tls_setup_from_iter(sk, out_iov, data_len,
+						  &pages, chunk, &sgout[1],
+						  (n_sgout - 1));
 			if (err < 0)
 				goto fallback_to_reg_recv;
 		} else if (out_sg) {
@@ -1619,25 +1486,15 @@ void tls_sw_free_resources_tx(struct sock *sk)
 
 		rec = list_first_entry(&ctx->tx_list,
 				       struct tls_rec, list);
-
-		free_sg(sk, &rec->sg_plaintext_data[1],
-			&rec->sg_plaintext_num_elem,
-			&rec->sg_plaintext_size);
-
 		list_del(&rec->list);
+		sk_msg_free(sk, &rec->msg_plaintext);
 		kfree(rec);
 	}
 
 	list_for_each_entry_safe(rec, tmp, &ctx->tx_list, list) {
-		free_sg(sk, &rec->sg_encrypted_data[1],
-			&rec->sg_encrypted_num_elem,
-			&rec->sg_encrypted_size);
-
-		free_sg(sk, &rec->sg_plaintext_data[1],
-			&rec->sg_plaintext_num_elem,
-			&rec->sg_plaintext_size);
-
 		list_del(&rec->list);
+		sk_msg_free(sk, &rec->msg_encrypted);
+		sk_msg_free(sk, &rec->msg_plaintext);
 		kfree(rec);
 	}
 
-- 
cgit v1.2.3


From d3b18ad31f93d0b6bae105c679018a1ba7daa9ca Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sat, 13 Oct 2018 02:46:01 +0200
Subject: tls: add bpf support to sk_msg handling

This work adds BPF sk_msg verdict program support to kTLS
allowing BPF and kTLS to be combined together. Previously kTLS
and sk_msg verdict programs were mutually exclusive in the
ULP layer which created challenges for the orchestrator when
trying to apply TCP based policy, for example. To resolve this,
leveraging the work from previous patches that consolidates
the use of sk_msg, we can finally enable BPF sk_msg verdict
programs so they continue to run after the kTLS socket is
created. No change in behavior when kTLS is not used in
combination with BPF, the kselftest suite for kTLS also runs
successfully.

Joint work with Daniel.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/skmsg.h |  41 ++++-
 net/tls/tls_sw.c      | 439 ++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 414 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 4e84b3c2eff8..0b919f0bc6d6 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -29,7 +29,11 @@ struct sk_msg_sg {
 	u32				size;
 	u32				copybreak;
 	bool				copy[MAX_MSG_FRAGS];
-	struct scatterlist		data[MAX_MSG_FRAGS];
+	/* The extra element is used for chaining the front and sections when
+	 * the list becomes partitioned (e.g. end < start). The crypto APIs
+	 * require the chaining.
+	 */
+	struct scatterlist		data[MAX_MSG_FRAGS + 1];
 };
 
 struct sk_msg {
@@ -112,6 +116,7 @@ void sk_msg_free_partial_nocharge(struct sock *sk, struct sk_msg *msg,
 				  u32 bytes);
 
 void sk_msg_return(struct sock *sk, struct sk_msg *msg, int bytes);
+void sk_msg_return_zero(struct sock *sk, struct sk_msg *msg, int bytes);
 
 int sk_msg_zerocopy_from_iter(struct sock *sk, struct iov_iter *from,
 			      struct sk_msg *msg, u32 bytes);
@@ -161,8 +166,9 @@ static inline void sk_msg_clear_meta(struct sk_msg *msg)
 
 static inline void sk_msg_init(struct sk_msg *msg)
 {
+	BUILD_BUG_ON(ARRAY_SIZE(msg->sg.data) - 1 != MAX_MSG_FRAGS);
 	memset(msg, 0, sizeof(*msg));
-	sg_init_marker(msg->sg.data, ARRAY_SIZE(msg->sg.data));
+	sg_init_marker(msg->sg.data, MAX_MSG_FRAGS);
 }
 
 static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
@@ -174,6 +180,12 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
 	src->sg.data[which].offset += size;
 }
 
+static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src)
+{
+	memcpy(dst, src, sizeof(*src));
+	sk_msg_init(src);
+}
+
 static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
 {
 	return msg->sg.end >= msg->sg.start ?
@@ -229,6 +241,26 @@ static inline void sk_msg_page_add(struct sk_msg *msg, struct page *page,
 	sk_msg_iter_next(msg, end);
 }
 
+static inline void sk_msg_sg_copy(struct sk_msg *msg, u32 i, bool copy_state)
+{
+	do {
+		msg->sg.copy[i] = copy_state;
+		sk_msg_iter_var_next(i);
+		if (i == msg->sg.end)
+			break;
+	} while (1);
+}
+
+static inline void sk_msg_sg_copy_set(struct sk_msg *msg, u32 start)
+{
+	sk_msg_sg_copy(msg, start, true);
+}
+
+static inline void sk_msg_sg_copy_clear(struct sk_msg *msg, u32 start)
+{
+	sk_msg_sg_copy(msg, start, false);
+}
+
 static inline struct sk_psock *sk_psock(const struct sock *sk)
 {
 	return rcu_dereference_sk_user_data(sk);
@@ -245,6 +277,11 @@ static inline void sk_psock_queue_msg(struct sk_psock *psock,
 	list_add_tail(&msg->list, &psock->ingress_msg);
 }
 
+static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
+{
+	return psock ? list_empty(&psock->ingress_msg) : true;
+}
+
 static inline void sk_psock_report_error(struct sk_psock *psock, int err)
 {
 	struct sock *sk = psock->sk;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 3b75e0dd51a2..a525fc4c2a4b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved.
  * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved.
  * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved.
+ * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -258,21 +259,58 @@ static int tls_clone_plaintext_msg(struct sock *sk, int required)
 	return sk_msg_clone(sk, msg_pl, msg_en, skip, len);
 }
 
-static void tls_free_open_rec(struct sock *sk)
+static struct tls_rec *tls_get_rec(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct tls_rec *rec = ctx->open_rec;
+	struct sk_msg *msg_pl, *msg_en;
+	struct tls_rec *rec;
+	int mem_size;
 
-	/* Return if there is no open record */
+	mem_size = sizeof(struct tls_rec) + crypto_aead_reqsize(ctx->aead_send);
+
+	rec = kzalloc(mem_size, sk->sk_allocation);
 	if (!rec)
-		return;
+		return NULL;
 
+	msg_pl = &rec->msg_plaintext;
+	msg_en = &rec->msg_encrypted;
+
+	sk_msg_init(msg_pl);
+	sk_msg_init(msg_en);
+
+	sg_init_table(rec->sg_aead_in, 2);
+	sg_set_buf(&rec->sg_aead_in[0], rec->aad_space,
+		   sizeof(rec->aad_space));
+	sg_unmark_end(&rec->sg_aead_in[1]);
+
+	sg_init_table(rec->sg_aead_out, 2);
+	sg_set_buf(&rec->sg_aead_out[0], rec->aad_space,
+		   sizeof(rec->aad_space));
+	sg_unmark_end(&rec->sg_aead_out[1]);
+
+	return rec;
+}
+
+static void tls_free_rec(struct sock *sk, struct tls_rec *rec)
+{
 	sk_msg_free(sk, &rec->msg_encrypted);
 	sk_msg_free(sk, &rec->msg_plaintext);
 	kfree(rec);
 }
 
+static void tls_free_open_rec(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_rec *rec = ctx->open_rec;
+
+	if (rec) {
+		tls_free_rec(sk, rec);
+		ctx->open_rec = NULL;
+	}
+}
+
 int tls_tx_records(struct sock *sk, int flags)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -439,16 +477,135 @@ static int tls_do_encryption(struct sock *sk,
 	return rc;
 }
 
+static int tls_split_open_record(struct sock *sk, struct tls_rec *from,
+				 struct tls_rec **to, struct sk_msg *msg_opl,
+				 struct sk_msg *msg_oen, u32 split_point,
+				 u32 tx_overhead_size, u32 *orig_end)
+{
+	u32 i, j, bytes = 0, apply = msg_opl->apply_bytes;
+	struct scatterlist *sge, *osge, *nsge;
+	u32 orig_size = msg_opl->sg.size;
+	struct scatterlist tmp = { };
+	struct sk_msg *msg_npl;
+	struct tls_rec *new;
+	int ret;
+
+	new = tls_get_rec(sk);
+	if (!new)
+		return -ENOMEM;
+	ret = sk_msg_alloc(sk, &new->msg_encrypted, msg_opl->sg.size +
+			   tx_overhead_size, 0);
+	if (ret < 0) {
+		tls_free_rec(sk, new);
+		return ret;
+	}
+
+	*orig_end = msg_opl->sg.end;
+	i = msg_opl->sg.start;
+	sge = sk_msg_elem(msg_opl, i);
+	while (apply && sge->length) {
+		if (sge->length > apply) {
+			u32 len = sge->length - apply;
+
+			get_page(sg_page(sge));
+			sg_set_page(&tmp, sg_page(sge), len,
+				    sge->offset + apply);
+			sge->length = apply;
+			bytes += apply;
+			apply = 0;
+		} else {
+			apply -= sge->length;
+			bytes += sge->length;
+		}
+
+		sk_msg_iter_var_next(i);
+		if (i == msg_opl->sg.end)
+			break;
+		sge = sk_msg_elem(msg_opl, i);
+	}
+
+	msg_opl->sg.end = i;
+	msg_opl->sg.curr = i;
+	msg_opl->sg.copybreak = 0;
+	msg_opl->apply_bytes = 0;
+	msg_opl->sg.size = bytes;
+
+	msg_npl = &new->msg_plaintext;
+	msg_npl->apply_bytes = apply;
+	msg_npl->sg.size = orig_size - bytes;
+
+	j = msg_npl->sg.start;
+	nsge = sk_msg_elem(msg_npl, j);
+	if (tmp.length) {
+		memcpy(nsge, &tmp, sizeof(*nsge));
+		sk_msg_iter_var_next(j);
+		nsge = sk_msg_elem(msg_npl, j);
+	}
+
+	osge = sk_msg_elem(msg_opl, i);
+	while (osge->length) {
+		memcpy(nsge, osge, sizeof(*nsge));
+		sg_unmark_end(nsge);
+		sk_msg_iter_var_next(i);
+		sk_msg_iter_var_next(j);
+		if (i == *orig_end)
+			break;
+		osge = sk_msg_elem(msg_opl, i);
+		nsge = sk_msg_elem(msg_npl, j);
+	}
+
+	msg_npl->sg.end = j;
+	msg_npl->sg.curr = j;
+	msg_npl->sg.copybreak = 0;
+
+	*to = new;
+	return 0;
+}
+
+static void tls_merge_open_record(struct sock *sk, struct tls_rec *to,
+				  struct tls_rec *from, u32 orig_end)
+{
+	struct sk_msg *msg_npl = &from->msg_plaintext;
+	struct sk_msg *msg_opl = &to->msg_plaintext;
+	struct scatterlist *osge, *nsge;
+	u32 i, j;
+
+	i = msg_opl->sg.end;
+	sk_msg_iter_var_prev(i);
+	j = msg_npl->sg.start;
+
+	osge = sk_msg_elem(msg_opl, i);
+	nsge = sk_msg_elem(msg_npl, j);
+
+	if (sg_page(osge) == sg_page(nsge) &&
+	    osge->offset + osge->length == nsge->offset) {
+		osge->length += nsge->length;
+		put_page(sg_page(nsge));
+	}
+
+	msg_opl->sg.end = orig_end;
+	msg_opl->sg.curr = orig_end;
+	msg_opl->sg.copybreak = 0;
+	msg_opl->apply_bytes = msg_opl->sg.size + msg_npl->sg.size;
+	msg_opl->sg.size += msg_npl->sg.size;
+
+	sk_msg_free(sk, &to->msg_encrypted);
+	sk_msg_xfer_full(&to->msg_encrypted, &from->msg_encrypted);
+
+	kfree(from);
+}
+
 static int tls_push_record(struct sock *sk, int flags,
 			   unsigned char record_type)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct tls_rec *rec = ctx->open_rec;
+	struct tls_rec *rec = ctx->open_rec, *tmp = NULL;
+	u32 i, split_point, uninitialized_var(orig_end);
 	struct sk_msg *msg_pl, *msg_en;
 	struct aead_request *req;
+	bool split;
 	int rc;
-	u32 i;
 
 	if (!rec)
 		return 0;
@@ -456,6 +613,18 @@ static int tls_push_record(struct sock *sk, int flags,
 	msg_pl = &rec->msg_plaintext;
 	msg_en = &rec->msg_encrypted;
 
+	split_point = msg_pl->apply_bytes;
+	split = split_point && split_point < msg_pl->sg.size;
+	if (split) {
+		rc = tls_split_open_record(sk, rec, &tmp, msg_pl, msg_en,
+					   split_point, tls_ctx->tx.overhead_size,
+					   &orig_end);
+		if (rc < 0)
+			return rc;
+		sk_msg_trim(sk, msg_en, msg_pl->sg.size +
+			    tls_ctx->tx.overhead_size);
+	}
+
 	rec->tx_flags = flags;
 	req = &rec->aead_req;
 
@@ -487,57 +656,139 @@ static int tls_push_record(struct sock *sk, int flags,
 
 	rc = tls_do_encryption(sk, tls_ctx, ctx, req, msg_pl->sg.size, i);
 	if (rc < 0) {
-		if (rc != -EINPROGRESS)
+		if (rc != -EINPROGRESS) {
 			tls_err_abort(sk, EBADMSG);
+			if (split) {
+				tls_ctx->pending_open_record_frags = true;
+				tls_merge_open_record(sk, rec, tmp, orig_end);
+			}
+		}
 		return rc;
+	} else if (split) {
+		msg_pl = &tmp->msg_plaintext;
+		msg_en = &tmp->msg_encrypted;
+		sk_msg_trim(sk, msg_en, msg_pl->sg.size +
+			    tls_ctx->tx.overhead_size);
+		tls_ctx->pending_open_record_frags = true;
+		ctx->open_rec = tmp;
 	}
 
 	return tls_tx_records(sk, flags);
 }
 
-static int tls_sw_push_pending_record(struct sock *sk, int flags)
-{
-	return tls_push_record(sk, flags, TLS_RECORD_TYPE_DATA);
-}
-
-static struct tls_rec *get_rec(struct sock *sk)
+static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
+			       bool full_record, u8 record_type,
+			       size_t *copied, int flags)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
-	struct sk_msg *msg_pl, *msg_en;
+	struct sk_msg msg_redir = { };
+	struct sk_psock *psock;
+	struct sock *sk_redir;
 	struct tls_rec *rec;
-	int mem_size;
+	int err = 0, send;
+	bool enospc;
+
+	psock = sk_psock_get(sk);
+	if (!psock)
+		return tls_push_record(sk, flags, record_type);
+more_data:
+	enospc = sk_msg_full(msg);
+	if (psock->eval == __SK_NONE)
+		psock->eval = sk_psock_msg_verdict(sk, psock, msg);
+	if (msg->cork_bytes && msg->cork_bytes > msg->sg.size &&
+	    !enospc && !full_record) {
+		err = -ENOSPC;
+		goto out_err;
+	}
+	msg->cork_bytes = 0;
+	send = msg->sg.size;
+	if (msg->apply_bytes && msg->apply_bytes < send)
+		send = msg->apply_bytes;
+
+	switch (psock->eval) {
+	case __SK_PASS:
+		err = tls_push_record(sk, flags, record_type);
+		if (err < 0) {
+			*copied -= sk_msg_free(sk, msg);
+			tls_free_open_rec(sk);
+			goto out_err;
+		}
+		break;
+	case __SK_REDIRECT:
+		sk_redir = psock->sk_redir;
+		memcpy(&msg_redir, msg, sizeof(*msg));
+		if (msg->apply_bytes < send)
+			msg->apply_bytes = 0;
+		else
+			msg->apply_bytes -= send;
+		sk_msg_return_zero(sk, msg, send);
+		msg->sg.size -= send;
+		release_sock(sk);
+		err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags);
+		lock_sock(sk);
+		if (err < 0) {
+			*copied -= sk_msg_free_nocharge(sk, &msg_redir);
+			msg->sg.size = 0;
+		}
+		if (msg->sg.size == 0)
+			tls_free_open_rec(sk);
+		break;
+	case __SK_DROP:
+	default:
+		sk_msg_free_partial(sk, msg, send);
+		if (msg->apply_bytes < send)
+			msg->apply_bytes = 0;
+		else
+			msg->apply_bytes -= send;
+		if (msg->sg.size == 0)
+			tls_free_open_rec(sk);
+		*copied -= send;
+		err = -EACCES;
+	}
 
-	/* Return if we already have an open record */
-	if (ctx->open_rec)
-		return ctx->open_rec;
+	if (likely(!err)) {
+		bool reset_eval = !ctx->open_rec;
 
-	mem_size = sizeof(struct tls_rec) + crypto_aead_reqsize(ctx->aead_send);
+		rec = ctx->open_rec;
+		if (rec) {
+			msg = &rec->msg_plaintext;
+			if (!msg->apply_bytes)
+				reset_eval = true;
+		}
+		if (reset_eval) {
+			psock->eval = __SK_NONE;
+			if (psock->sk_redir) {
+				sock_put(psock->sk_redir);
+				psock->sk_redir = NULL;
+			}
+		}
+		if (rec)
+			goto more_data;
+	}
+ out_err:
+	sk_psock_put(sk, psock);
+	return err;
+}
+
+static int tls_sw_push_pending_record(struct sock *sk, int flags)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+	struct tls_rec *rec = ctx->open_rec;
+	struct sk_msg *msg_pl;
+	size_t copied;
 
-	rec = kzalloc(mem_size, sk->sk_allocation);
 	if (!rec)
-		return NULL;
+		return 0;
 
 	msg_pl = &rec->msg_plaintext;
-	msg_en = &rec->msg_encrypted;
-
-	sk_msg_init(msg_pl);
-	sk_msg_init(msg_en);
-
-	sg_init_table(rec->sg_aead_in, 2);
-	sg_set_buf(&rec->sg_aead_in[0], rec->aad_space,
-		   sizeof(rec->aad_space));
-	sg_unmark_end(&rec->sg_aead_in[1]);
-
-	sg_init_table(rec->sg_aead_out, 2);
-	sg_set_buf(&rec->sg_aead_out[0], rec->aad_space,
-		   sizeof(rec->aad_space));
-	sg_unmark_end(&rec->sg_aead_out[1]);
-
-	ctx->open_rec = rec;
-	rec->inplace_crypto = 1;
+	copied = msg_pl->sg.size;
+	if (!copied)
+		return 0;
 
-	return rec;
+	return bpf_exec_tx_verdict(msg_pl, sk, true, TLS_RECORD_TYPE_DATA,
+				   &copied, flags);
 }
 
 int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
@@ -589,7 +840,10 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 			goto send_end;
 		}
 
-		rec = get_rec(sk);
+		if (ctx->open_rec)
+			rec = ctx->open_rec;
+		else
+			rec = ctx->open_rec = tls_get_rec(sk);
 		if (!rec) {
 			ret = -ENOMEM;
 			goto send_end;
@@ -628,6 +882,8 @@ alloc_encrypted:
 		}
 
 		if (!is_kvec && (full_record || eor) && !async_capable) {
+			u32 first = msg_pl->sg.end;
+
 			ret = sk_msg_zerocopy_from_iter(sk, &msg->msg_iter,
 							msg_pl, try_to_copy);
 			if (ret)
@@ -637,15 +893,27 @@ alloc_encrypted:
 
 			num_zc++;
 			copied += try_to_copy;
-			ret = tls_push_record(sk, msg->msg_flags, record_type);
+
+			sk_msg_sg_copy_set(msg_pl, first);
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied,
+						  msg->msg_flags);
 			if (ret) {
 				if (ret == -EINPROGRESS)
 					num_async++;
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ret == -ENOSPC)
+					goto rollback_iter;
 				else if (ret != -EAGAIN)
 					goto send_end;
 			}
 			continue;
-
+rollback_iter:
+			copied -= try_to_copy;
+			sk_msg_sg_copy_clear(msg_pl, first);
+			iov_iter_revert(&msg->msg_iter,
+					msg_pl->sg.size - orig_size);
 fallback_to_reg_send:
 			sk_msg_trim(sk, msg_pl, orig_size);
 		}
@@ -678,12 +946,19 @@ fallback_to_reg_send:
 		tls_ctx->pending_open_record_frags = true;
 		copied += try_to_copy;
 		if (full_record || eor) {
-			ret = tls_push_record(sk, msg->msg_flags, record_type);
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied,
+						  msg->msg_flags);
 			if (ret) {
 				if (ret == -EINPROGRESS)
 					num_async++;
-				else if (ret != -EAGAIN)
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ret != -EAGAIN) {
+					if (ret == -ENOSPC)
+						ret = 0;
 					goto send_end;
+				}
 			}
 		}
 
@@ -742,10 +1017,10 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
-	size_t orig_size = size;
 	struct sk_msg *msg_pl;
 	struct tls_rec *rec;
 	int num_async = 0;
+	size_t copied = 0;
 	bool full_record;
 	int record_room;
 	int ret = 0;
@@ -778,7 +1053,10 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 			goto sendpage_end;
 		}
 
-		rec = get_rec(sk);
+		if (ctx->open_rec)
+			rec = ctx->open_rec;
+		else
+			rec = ctx->open_rec = tls_get_rec(sk);
 		if (!rec) {
 			ret = -ENOMEM;
 			goto sendpage_end;
@@ -788,6 +1066,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 
 		full_record = false;
 		record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
+		copied = 0;
 		copy = size;
 		if (copy >= record_room) {
 			copy = record_room;
@@ -818,16 +1097,23 @@ alloc_payload:
 
 		offset += copy;
 		size -= copy;
+		copied += copy;
 
 		tls_ctx->pending_open_record_frags = true;
 		if (full_record || eor || sk_msg_full(msg_pl)) {
 			rec->inplace_crypto = 0;
-			ret = tls_push_record(sk, flags, record_type);
+			ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
+						  record_type, &copied, flags);
 			if (ret) {
 				if (ret == -EINPROGRESS)
 					num_async++;
-				else if (ret != -EAGAIN)
+				else if (ret == -ENOMEM)
+					goto wait_for_memory;
+				else if (ret != -EAGAIN) {
+					if (ret == -ENOSPC)
+						ret = 0;
 					goto sendpage_end;
+				}
 			}
 		}
 		continue;
@@ -851,24 +1137,20 @@ wait_for_memory:
 		}
 	}
 sendpage_end:
-	if (orig_size > size)
-		ret = orig_size - size;
-	else
-		ret = sk_stream_error(sk, flags, ret);
-
+	ret = sk_stream_error(sk, flags, ret);
 	release_sock(sk);
-	return ret;
+	return copied ? copied : ret;
 }
 
-static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
-				     long timeo, int *err)
+static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
+				     int flags, long timeo, int *err)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 	struct sk_buff *skb;
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
-	while (!(skb = ctx->recv_pkt)) {
+	while (!(skb = ctx->recv_pkt) && sk_psock_queue_empty(psock)) {
 		if (sk->sk_err) {
 			*err = sock_error(sk);
 			return NULL;
@@ -887,7 +1169,10 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
 
 		add_wait_queue(sk_sleep(sk), &wait);
 		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
-		sk_wait_event(sk, &timeo, ctx->recv_pkt != skb, &wait);
+		sk_wait_event(sk, &timeo,
+			      ctx->recv_pkt != skb ||
+			      !sk_psock_queue_empty(psock),
+			      &wait);
 		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 		remove_wait_queue(sk_sleep(sk), &wait);
 
@@ -1164,6 +1449,7 @@ int tls_sw_recvmsg(struct sock *sk,
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct sk_psock *psock;
 	unsigned char control;
 	struct strp_msg *rxm;
 	struct sk_buff *skb;
@@ -1179,6 +1465,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
 
+	psock = sk_psock_get(sk);
 	lock_sock(sk);
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -1188,9 +1475,19 @@ int tls_sw_recvmsg(struct sock *sk,
 		bool async = false;
 		int chunk = 0;
 
-		skb = tls_wait_data(sk, flags, timeo, &err);
-		if (!skb)
+		skb = tls_wait_data(sk, psock, flags, timeo, &err);
+		if (!skb) {
+			if (psock) {
+				int ret = __tcp_bpf_recvmsg(sk, psock, msg, len);
+
+				if (ret > 0) {
+					copied += ret;
+					len -= ret;
+					continue;
+				}
+			}
 			goto recv_end;
+		}
 
 		rxm = strp_msg(skb);
 
@@ -1296,6 +1593,8 @@ recv_end:
 	}
 
 	release_sock(sk);
+	if (psock)
+		sk_psock_put(sk, psock);
 	return copied ? : err;
 }
 
@@ -1318,7 +1617,7 @@ ssize_t tls_sw_splice_read(struct socket *sock,  loff_t *ppos,
 
 	timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
-	skb = tls_wait_data(sk, flags, timeo, &err);
+	skb = tls_wait_data(sk, NULL, flags, timeo, &err);
 	if (!skb)
 		goto splice_read_end;
 
@@ -1356,11 +1655,16 @@ bool tls_sw_stream_read(const struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	bool ingress_empty = true;
+	struct sk_psock *psock;
 
-	if (ctx->recv_pkt)
-		return true;
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock)
+		ingress_empty = list_empty(&psock->ingress_msg);
+	rcu_read_unlock();
 
-	return false;
+	return !ingress_empty || ctx->recv_pkt;
 }
 
 static int tls_read_size(struct strparser *strp, struct sk_buff *skb)
@@ -1439,8 +1743,15 @@ static void tls_data_ready(struct sock *sk)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+	struct sk_psock *psock;
 
 	strp_data_ready(&ctx->strp);
+
+	psock = sk_psock_get(sk);
+	if (psock && !list_empty(&psock->ingress_msg)) {
+		ctx->saved_data_ready(sk);
+		sk_psock_put(sk, psock);
+	}
 }
 
 void tls_sw_free_resources_tx(struct sock *sk)
-- 
cgit v1.2.3


From 9316a9ed6895c4ad2f0cde171d486f80c55d8283 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 15 Oct 2018 08:40:37 -0600
Subject: blk-mq: provide helper for setting up an SQ queue and tag set

This pattern is repeated throughout all the blk-mq conversions.
Provide a basic helper to get it done.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 33 +++++++++++++++++++++++++++++++++
 include/linux/blk-mq.h |  4 ++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c2ecd64a2403..dcf10e39995a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2507,6 +2507,39 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 }
 EXPORT_SYMBOL(blk_mq_init_queue);
 
+/*
+ * Helper for setting up a queue with mq ops, given queue depth, and
+ * the passed in mq ops flags.
+ */
+struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
+					   const struct blk_mq_ops *ops,
+					   unsigned int queue_depth,
+					   unsigned int set_flags)
+{
+	struct request_queue *q;
+	int ret;
+
+	memset(set, 0, sizeof(*set));
+	set->ops = ops;
+	set->nr_hw_queues = 1;
+	set->queue_depth = queue_depth;
+	set->numa_node = NUMA_NO_NODE;
+	set->flags = set_flags;
+
+	ret = blk_mq_alloc_tag_set(set);
+	if (ret)
+		return ERR_PTR(ret);
+
+	q = blk_mq_init_queue(set);
+	if (IS_ERR(q)) {
+		blk_mq_free_tag_set(set);
+		return q;
+	}
+
+	return q;
+}
+EXPORT_SYMBOL(blk_mq_init_sq_queue);
+
 static int blk_mq_hw_ctx_size(struct blk_mq_tag_set *tag_set)
 {
 	int hw_ctx_size = sizeof(struct blk_mq_hw_ctx);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1da59c16f637..2286dc12c6bc 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -203,6 +203,10 @@ enum {
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
 struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 						  struct request_queue *q);
+struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
+						const struct blk_mq_ops *ops,
+						unsigned int queue_depth,
+						unsigned int set_flags);
 int blk_mq_register_dev(struct device *, struct request_queue *);
 void blk_mq_unregister_dev(struct device *, struct request_queue *);
 
-- 
cgit v1.2.3


From 25ab0bc334b43bbbe4eabc255006ce42a9424da2 Mon Sep 17 00:00:00 2001
From: Nicholas Bellinger <nab@linux-iscsi.org>
Date: Wed, 10 Oct 2018 03:23:09 +0000
Subject: scsi: sched/wait: Add wait_event_lock_irq_timeout for
 TASK_UNINTERRUPTIBLE usage

Short of reverting commit 00d909a10710 ("scsi: target: Make the session
shutdown code also wait for commands that are being aborted") for v4.19,
target-core needs a wait_event_t macro can be executed using
TASK_UNINTERRUPTIBLE to function correctly with existing fabric drivers that
expect to run with signals pending during session shutdown and active se_cmd
I/O quiesce.

The most notable is iscsi-target/iser-target, while ibmvscsi_tgt invokes
session shutdown logic from userspace via configfs attribute that could also
potentially have signals pending.

So go ahead and introduce wait_event_lock_irq_timeout() to achieve this, and
update + rename __wait_event_lock_irq_timeout() to make it accept 'state' as a
parameter.

Fixes: 00d909a10710 ("scsi: target: Make the session shutdown code also wait for commands that are being aborted")
Cc: <stable@vger.kernel.org> # v4.19+
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Mike Christie <mchristi@redhat.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Bryant G. Ly <bryantly@linux.vnet.ibm.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Nicholas Bellinger <nab@linux-iscsi.org>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Reviewed-by: Bryant G. Ly <bly@catalogicsoftware.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/wait.h | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index d9f131ecf708..ed7c122cb31f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -1052,10 +1052,9 @@ do {										\
 	__ret;									\
 })
 
-#define __wait_event_interruptible_lock_irq_timeout(wq_head, condition,		\
-						    lock, timeout)		\
+#define __wait_event_lock_irq_timeout(wq_head, condition, lock, timeout, state)	\
 	___wait_event(wq_head, ___wait_cond_timeout(condition),			\
-		      TASK_INTERRUPTIBLE, 0, timeout,				\
+		      state, 0, timeout,					\
 		      spin_unlock_irq(&lock);					\
 		      __ret = schedule_timeout(__ret);				\
 		      spin_lock_irq(&lock));
@@ -1089,8 +1088,19 @@ do {										\
 ({										\
 	long __ret = timeout;							\
 	if (!___wait_cond_timeout(condition))					\
-		__ret = __wait_event_interruptible_lock_irq_timeout(		\
-					wq_head, condition, lock, timeout);	\
+		__ret = __wait_event_lock_irq_timeout(				\
+					wq_head, condition, lock, timeout,	\
+					TASK_INTERRUPTIBLE);			\
+	__ret;									\
+})
+
+#define wait_event_lock_irq_timeout(wq_head, condition, lock, timeout)		\
+({										\
+	long __ret = timeout;							\
+	if (!___wait_cond_timeout(condition))					\
+		__ret = __wait_event_lock_irq_timeout(				\
+					wq_head, condition, lock, timeout,	\
+					TASK_UNINTERRUPTIBLE);			\
 	__ret;									\
 })
 
-- 
cgit v1.2.3


From 9f9a742db40f95f4dc20fc7293de4ea6ddb24e47 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Tue, 9 Oct 2018 23:57:49 +0100
Subject: FDDI: defza: Support capturing outgoing SMT traffic

DEC FDDIcontroller 700 (DEFZA) uses a Tx/Rx queue pair to communicate
SMT frames with adapter's firmware.  Any SMT frame received from the RMC
via the Rx queue is queued back by the driver to the SMT Rx queue for
the firmware to process.  Similarly the firmware uses the SMT Tx queue
to supply the driver with SMT frames which are queued back to the Tx
queue for the RMC to send to the ring.

When a network tap is attached to an FDDI interface handled by `defza'
any incoming SMT frames captured are queued to our usual processing of
network data received, which in turn delivers them to any listening
taps.

However the outgoing SMT frames produced by the firmware bypass our
network protocol stack and are therefore not delivered to taps.  This in
turn means that taps are missing a part of network traffic sent by the
adapter, which may make it more difficult to track down network problems
or do general traffic analysis.

Call `dev_queue_xmit_nit' then in the SMT Tx path, having checked that
a network tap is attached, with a newly-created `dev_nit_active' helper
wrapping the usual condition used in the transmit path.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/fddi/defza.c  | 33 +++++++++++++++++++++++++++++++--
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 13 ++++++++++++-
 3 files changed, 44 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c
index 7d01b70f7ed8..3b7f10a5f06a 100644
--- a/drivers/net/fddi/defza.c
+++ b/drivers/net/fddi/defza.c
@@ -797,11 +797,40 @@ static void fza_tx_smt(struct net_device *dev)
 		smt_tx_ptr = fp->mmio + readl_u(&fp->ring_smt_tx[i].buffer);
 		len = readl_u(&fp->ring_smt_tx[i].rmc) & FZA_RING_PBC_MASK;
 
-		/* Queue the frame to the RMC transmit ring. */
-		if (!netif_queue_stopped(dev))
+		if (!netif_queue_stopped(dev)) {
+			if (dev_nit_active(dev)) {
+				struct sk_buff *skb;
+
+				/* Length must be a multiple of 4 as only word
+				 * reads are permitted!
+				 */
+				skb = fza_alloc_skb_irq(dev, (len + 3) & ~3);
+				if (!skb)
+					goto err_no_skb;	/* Drop. */
+
+				skb_data_ptr = (struct fza_buffer_tx *)
+					       skb->data;
+
+				fza_reads(smt_tx_ptr, skb_data_ptr,
+					  (len + 3) & ~3);
+				skb->dev = dev;
+				skb_reserve(skb, 3);	/* Skip over PRH. */
+				skb_put(skb, len - 3);
+				skb_reset_network_header(skb);
+
+				dev_queue_xmit_nit(skb, dev);
+
+				dev_kfree_skb_irq(skb);
+
+err_no_skb:
+				;
+			}
+
+			/* Queue the frame to the RMC transmit ring. */
 			fza_do_xmit((union fza_buffer_txp)
 				    { .mmio_ptr = smt_tx_ptr },
 				    len, dev, 1);
+		}
 
 		writel_o(FZA_RING_OWN_FZA, &fp->ring_smt_tx[i].own);
 		fp->ring_smt_tx_index =
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 22e4ef7bb701..dc1d9ed33b31 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3645,6 +3645,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev,
 	return 0;
 }
 
+bool dev_nit_active(struct net_device *dev);
 void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
 
 extern int		netdev_budget;
diff --git a/net/core/dev.c b/net/core/dev.c
index a4d39b87b4e5..8497feea8fb5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1976,6 +1976,17 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
 	return false;
 }
 
+/**
+ * dev_nit_active - return true if any network interface taps are in use
+ *
+ * @dev: network device to check for the presence of taps
+ */
+bool dev_nit_active(struct net_device *dev)
+{
+	return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all);
+}
+EXPORT_SYMBOL_GPL(dev_nit_active);
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -3233,7 +3244,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
 	unsigned int len;
 	int rc;
 
-	if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
+	if (dev_nit_active(dev))
 		dev_queue_xmit_nit(skb, dev);
 
 	len = skb->len;
-- 
cgit v1.2.3


From 5f6188a8003d080e3753b8f14f4a5a2325ae1ff6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 15 Oct 2018 09:37:52 -0700
Subject: tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh

In EDT design, I made the mistake of using tcp_wstamp_ns
to store the last tcp_clock_ns() sample and to store the
pacing virtual timer.

This causes major regressions at high speed flows.

Introduce tcp_clock_cache to store last tcp_clock_ns().
This is needed because some arches have slow high-resolution
kernel time service.

tcp_wstamp_ns is only updated when a packet is sent.

Note that we can remove tcp_mstamp in the future since
tcp_mstamp is essentially tcp_clock_cache/1000, so the
apparent socket size increase is temporary.

Fixes: 9799ccb0e984 ("tcp: add tcp_wstamp_ns socket field")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   | 1 +
 net/ipv4/tcp_output.c | 9 ++++++---
 net/ipv4/tcp_timer.c  | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 848f5b25e178..8ed77bb4ed86 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -249,6 +249,7 @@ struct tcp_sock {
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
 	u64	tcp_wstamp_ns;	/* departure time for next sent data packet */
+	u64	tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
 
 /* RTT measurement */
 	u64	tcp_mstamp;	/* most recent packet received/sent */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 059b67af28b1..f14df66a0c85 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp)
 {
 	u64 val = tcp_clock_ns();
 
-	/* departure time for next data packet */
-	if (val > tp->tcp_wstamp_ns)
-		tp->tcp_wstamp_ns = val;
+	if (val > tp->tcp_clock_cache)
+		tp->tcp_clock_cache = val;
 
 	val = div_u64(val, NSEC_PER_USEC);
 	if (val > tp->tcp_mstamp)
@@ -1050,6 +1049,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
 		if (unlikely(!skb))
 			return -ENOBUFS;
 	}
+
+	/* TODO: might take care of jitter here */
+	tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
+
 	skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
 
 	inet = inet_sk(sk);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 61023d50cd60..676020663ce8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk)
 	 */
 	start_ts = tcp_skb_timestamp(skb);
 	if (!start_ts)
-		skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
+		skb->skb_mstamp_ns = tp->tcp_clock_cache;
 	else if (icsk->icsk_user_timeout &&
 		 (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
 		goto abort;
-- 
cgit v1.2.3


From 22e6c58b8c2843337ec4e8464b1ce6e869ca5bf4 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:41 -0700
Subject: netlink: Add answer_flags to netlink_callback

With dump filtering we need a way to ensure the NLM_F_DUMP_FILTERED
flag is set on a message back to the user if the data returned is
influenced by some input attributes. Normally this can be done as
messages are added to the skb, but if the filter results in no data
being returned, the user could be confused as to why.

This patch adds answer_flags to the netlink_callback allowing dump
handlers to set the NLM_F_DUMP_FILTERED at a minimum in the
NLMSG_DONE message ensuring the flag gets back to the user.

The netlink_callback space is initialized to 0 via a memset in
__netlink_dump_start, so init of the new answer_flags is covered.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  | 1 +
 net/netlink/af_netlink.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 72580f1a72a2..4da90a6ab536 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -180,6 +180,7 @@ struct netlink_callback {
 	u16			family;
 	u16			min_dump_alloc;
 	bool			strict_check;
+	u16			answer_flags;
 	unsigned int		prev_seq, seq;
 	long			args[6];
 };
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e613a9f89600..6bb9f3cde0b0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2257,7 +2257,8 @@ static int netlink_dump(struct sock *sk)
 	}
 
 	nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
-			       sizeof(nlk->dump_done_errno), NLM_F_MULTI);
+			       sizeof(nlk->dump_done_errno),
+			       NLM_F_MULTI | cb->answer_flags);
 	if (WARN_ON(!nlh))
 		goto errout_skb;
 
-- 
cgit v1.2.3


From e1cedae1ba6b09ae8376c1486712bf91ea0dfc41 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:46 -0700
Subject: ipmr: Refactor mr_rtm_dumproute

Move per-table loops from mr_rtm_dumproute to mr_table_dump and export
mr_table_dump for dumps by specific table id.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h |  6 ++++
 net/ipv4/ipmr_base.c        | 88 ++++++++++++++++++++++++++++-----------------
 2 files changed, 61 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index 6675b9f81979..db85373c8d15 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -283,6 +283,12 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg);
 
 int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 		   struct mr_mfc *c, struct rtmsg *rtm);
+int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
+		  struct netlink_callback *cb,
+		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
+			      u32 portid, u32 seq, struct mr_mfc *c,
+			      int cmd, int flags),
+		  spinlock_t *lock);
 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 		     struct mr_table *(*iter)(struct net *net,
 					      struct mr_table *mrt),
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 1ad9aa62a97b..132dd2613ca5 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -268,6 +268,55 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(mr_fill_mroute);
 
+int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
+		  struct netlink_callback *cb,
+		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
+			      u32 portid, u32 seq, struct mr_mfc *c,
+			      int cmd, int flags),
+		  spinlock_t *lock)
+{
+	unsigned int e = 0, s_e = cb->args[1];
+	unsigned int flags = NLM_F_MULTI;
+	struct mr_mfc *mfc;
+	int err;
+
+	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+		if (e < s_e)
+			goto next_entry;
+
+		err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+			   cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
+		if (err < 0)
+			goto out;
+next_entry:
+		e++;
+	}
+	e = 0;
+	s_e = 0;
+
+	spin_lock_bh(lock);
+	list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
+		if (e < s_e)
+			goto next_entry2;
+
+		err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
+			   cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
+		if (err < 0) {
+			spin_unlock_bh(lock);
+			goto out;
+		}
+next_entry2:
+		e++;
+	}
+	spin_unlock_bh(lock);
+	err = 0;
+	e = 0;
+
+out:
+	cb->args[1] = e;
+	return err;
+}
+
 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 		     struct mr_table *(*iter)(struct net *net,
 					      struct mr_table *mrt),
@@ -277,51 +326,24 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 				 int cmd, int flags),
 		     spinlock_t *lock)
 {
-	unsigned int t = 0, e = 0, s_t = cb->args[0], s_e = cb->args[1];
+	unsigned int t = 0, s_t = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	struct mr_table *mrt;
-	struct mr_mfc *mfc;
+	int err;
 
 	rcu_read_lock();
 	for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
 		if (t < s_t)
 			goto next_table;
-		list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
-			if (e < s_e)
-				goto next_entry;
-			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
-				 cb->nlh->nlmsg_seq, mfc,
-				 RTM_NEWROUTE, NLM_F_MULTI) < 0)
-				goto done;
-next_entry:
-			e++;
-		}
-		e = 0;
-		s_e = 0;
-
-		spin_lock_bh(lock);
-		list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
-			if (e < s_e)
-				goto next_entry2;
-			if (fill(mrt, skb, NETLINK_CB(cb->skb).portid,
-				 cb->nlh->nlmsg_seq, mfc,
-				 RTM_NEWROUTE, NLM_F_MULTI) < 0) {
-				spin_unlock_bh(lock);
-				goto done;
-			}
-next_entry2:
-			e++;
-		}
-		spin_unlock_bh(lock);
-		e = 0;
-		s_e = 0;
+
+		err = mr_table_dump(mrt, skb, cb, fill, lock);
+		if (err < 0)
+			break;
 next_table:
 		t++;
 	}
-done:
 	rcu_read_unlock();
 
-	cb->args[1] = e;
 	cb->args[0] = t;
 
 	return skb->len;
-- 
cgit v1.2.3


From cb167893f41e21e6bd283d78e53489289dc0592d Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Mon, 15 Oct 2018 18:56:47 -0700
Subject: net: Plumb support for filtering ipv4 and ipv6 multicast route dumps

Implement kernel side filtering of routes by egress device index and
table id. If the table id is given in the filter, lookup table and
call mr_table_dump directly for it.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute_base.h |  7 ++++---
 net/ipv4/ipmr.c             | 18 +++++++++++++++---
 net/ipv4/ipmr_base.c        | 43 ++++++++++++++++++++++++++++++++++++++++---
 net/ipv6/ip6mr.c            | 18 +++++++++++++++---
 4 files changed, 74 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h
index db85373c8d15..34de06b426ef 100644
--- a/include/linux/mroute_base.h
+++ b/include/linux/mroute_base.h
@@ -7,6 +7,7 @@
 #include <net/net_namespace.h>
 #include <net/sock.h>
 #include <net/fib_notifier.h>
+#include <net/ip_fib.h>
 
 /**
  * struct vif_device - interface representor for multicast routing
@@ -288,7 +289,7 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
 		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
 			      u32 portid, u32 seq, struct mr_mfc *c,
 			      int cmd, int flags),
-		  spinlock_t *lock);
+		  spinlock_t *lock, struct fib_dump_filter *filter);
 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 		     struct mr_table *(*iter)(struct net *net,
 					      struct mr_table *mrt),
@@ -296,7 +297,7 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 				 struct sk_buff *skb,
 				 u32 portid, u32 seq, struct mr_mfc *c,
 				 int cmd, int flags),
-		     spinlock_t *lock);
+		     spinlock_t *lock, struct fib_dump_filter *filter);
 
 int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
 	    int (*rules_dump)(struct net *net,
@@ -346,7 +347,7 @@ mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 			     struct sk_buff *skb,
 			     u32 portid, u32 seq, struct mr_mfc *c,
 			     int cmd, int flags),
-		 spinlock_t *lock)
+		 spinlock_t *lock, struct fib_dump_filter *filter)
 {
 	return -EINVAL;
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 44d777058960..3fa988e6a3df 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2528,18 +2528,30 @@ errout_free:
 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct fib_dump_filter filter = {};
+	int err;
 
 	if (cb->strict_check) {
-		int err;
-
 		err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh,
 					    &filter, cb->extack);
 		if (err < 0)
 			return err;
 	}
 
+	if (filter.table_id) {
+		struct mr_table *mrt;
+
+		mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
+		if (!mrt) {
+			NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
+			return -ENOENT;
+		}
+		err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute,
+				    &mfc_unres_lock, &filter);
+		return skb->len ? : err;
+	}
+
 	return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter,
-				_ipmr_fill_mroute, &mfc_unres_lock);
+				_ipmr_fill_mroute, &mfc_unres_lock, &filter);
 }
 
 static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = {
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 132dd2613ca5..844806120f44 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -268,21 +268,45 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(mr_fill_mroute);
 
+static bool mr_mfc_uses_dev(const struct mr_table *mrt,
+			    const struct mr_mfc *c,
+			    const struct net_device *dev)
+{
+	int ct;
+
+	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
+		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
+			const struct vif_device *vif;
+
+			vif = &mrt->vif_table[ct];
+			if (vif->dev == dev)
+				return true;
+		}
+	}
+	return false;
+}
+
 int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
 		  struct netlink_callback *cb,
 		  int (*fill)(struct mr_table *mrt, struct sk_buff *skb,
 			      u32 portid, u32 seq, struct mr_mfc *c,
 			      int cmd, int flags),
-		  spinlock_t *lock)
+		  spinlock_t *lock, struct fib_dump_filter *filter)
 {
 	unsigned int e = 0, s_e = cb->args[1];
 	unsigned int flags = NLM_F_MULTI;
 	struct mr_mfc *mfc;
 	int err;
 
+	if (filter->filter_set)
+		flags |= NLM_F_DUMP_FILTERED;
+
 	list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
 		if (e < s_e)
 			goto next_entry;
+		if (filter->dev &&
+		    !mr_mfc_uses_dev(mrt, mfc, filter->dev))
+			goto next_entry;
 
 		err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
 			   cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
@@ -298,6 +322,9 @@ next_entry:
 	list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) {
 		if (e < s_e)
 			goto next_entry2;
+		if (filter->dev &&
+		    !mr_mfc_uses_dev(mrt, mfc, filter->dev))
+			goto next_entry2;
 
 		err = fill(mrt, skb, NETLINK_CB(cb->skb).portid,
 			   cb->nlh->nlmsg_seq, mfc, RTM_NEWROUTE, flags);
@@ -316,6 +343,7 @@ out:
 	cb->args[1] = e;
 	return err;
 }
+EXPORT_SYMBOL(mr_table_dump);
 
 int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 		     struct mr_table *(*iter)(struct net *net,
@@ -324,19 +352,28 @@ int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb,
 				 struct sk_buff *skb,
 				 u32 portid, u32 seq, struct mr_mfc *c,
 				 int cmd, int flags),
-		     spinlock_t *lock)
+		     spinlock_t *lock, struct fib_dump_filter *filter)
 {
 	unsigned int t = 0, s_t = cb->args[0];
 	struct net *net = sock_net(skb->sk);
 	struct mr_table *mrt;
 	int err;
 
+	/* multicast does not track protocol or have route type other
+	 * than RTN_MULTICAST
+	 */
+	if (filter->filter_set) {
+		if (filter->protocol || filter->flags ||
+		    (filter->rt_type && filter->rt_type != RTN_MULTICAST))
+			return skb->len;
+	}
+
 	rcu_read_lock();
 	for (mrt = iter(net, NULL); mrt; mrt = iter(net, mrt)) {
 		if (t < s_t)
 			goto next_table;
 
-		err = mr_table_dump(mrt, skb, cb, fill, lock);
+		err = mr_table_dump(mrt, skb, cb, fill, lock, filter);
 		if (err < 0)
 			break;
 next_table:
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index dbd5166c5599..9759b0aecdd6 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2459,16 +2459,28 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	const struct nlmsghdr *nlh = cb->nlh;
 	struct fib_dump_filter filter = {};
+	int err;
 
 	if (cb->strict_check) {
-		int err;
-
 		err = ip_valid_fib_dump_req(sock_net(skb->sk), nlh,
 					    &filter, cb->extack);
 		if (err < 0)
 			return err;
 	}
 
+	if (filter.table_id) {
+		struct mr_table *mrt;
+
+		mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
+		if (!mrt) {
+			NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
+			return -ENOENT;
+		}
+		err = mr_table_dump(mrt, skb, cb, _ip6mr_fill_mroute,
+				    &mfc_unres_lock, &filter);
+		return skb->len ? : err;
+	}
+
 	return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
-				_ip6mr_fill_mroute, &mfc_unres_lock);
+				_ip6mr_fill_mroute, &mfc_unres_lock, &filter);
 }
-- 
cgit v1.2.3


From a218dc82f0b5c6c8ad3d58c9870ed69e26c08b3e Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Wed, 10 Oct 2018 09:57:13 +0200
Subject: netfilter: nft_osf: Add ttl option support

Add ttl option support to the nftables "osf" expression.

Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nfnetlink_osf.h  |  3 ++-
 include/uapi/linux/netfilter/nf_tables.h |  7 +++++
 net/netfilter/nfnetlink_osf.c            | 46 +++++++++++++++-----------------
 net/netfilter/nft_osf.c                  | 15 ++++++++++-
 4 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink_osf.h b/include/linux/netfilter/nfnetlink_osf.h
index ecf7dab81e9e..c6000046c966 100644
--- a/include/linux/netfilter/nfnetlink_osf.h
+++ b/include/linux/netfilter/nfnetlink_osf.h
@@ -27,6 +27,7 @@ bool nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 		  const struct list_head *nf_osf_fingers);
 
 const char *nf_osf_find(const struct sk_buff *skb,
-                        const struct list_head *nf_osf_fingers);
+			const struct list_head *nf_osf_fingers,
+			const int ttl_check);
 
 #endif /* _NFOSF_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 5444e76870bb..579974b0bf0d 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1511,9 +1511,16 @@ enum nft_flowtable_hook_attributes {
 };
 #define NFTA_FLOWTABLE_HOOK_MAX	(__NFTA_FLOWTABLE_HOOK_MAX - 1)
 
+/**
+ * enum nft_osf_attributes - nftables osf expression netlink attributes
+ *
+ * @NFTA_OSF_DREG: destination register (NLA_U32: nft_registers)
+ * @NFTA_OSF_TTL: Value of the TTL osf option (NLA_U8)
+ */
 enum nft_osf_attributes {
 	NFTA_OSF_UNSPEC,
 	NFTA_OSF_DREG,
+	NFTA_OSF_TTL,
 	__NFTA_OSF_MAX,
 };
 #define NFTA_OSF_MAX (__NFTA_OSF_MAX - 1)
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 00db27dfd2ff..6f41dd74729d 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -30,32 +30,27 @@ EXPORT_SYMBOL_GPL(nf_osf_fingers);
 static inline int nf_osf_ttl(const struct sk_buff *skb,
 			     int ttl_check, unsigned char f_ttl)
 {
+	struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
 	const struct iphdr *ip = ip_hdr(skb);
-
-	if (ttl_check != -1) {
-		if (ttl_check == NF_OSF_TTL_TRUE)
-			return ip->ttl == f_ttl;
-		if (ttl_check == NF_OSF_TTL_NOCHECK)
-			return 1;
-		else if (ip->ttl <= f_ttl)
-			return 1;
-		else {
-			struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
-			int ret = 0;
-
-			for_ifa(in_dev) {
-				if (inet_ifa_match(ip->saddr, ifa)) {
-					ret = (ip->ttl == f_ttl);
-					break;
-				}
-			}
-			endfor_ifa(in_dev);
-
-			return ret;
+	int ret = 0;
+
+	if (ttl_check == NF_OSF_TTL_TRUE)
+		return ip->ttl == f_ttl;
+	if (ttl_check == NF_OSF_TTL_NOCHECK)
+		return 1;
+	else if (ip->ttl <= f_ttl)
+		return 1;
+
+	for_ifa(in_dev) {
+		if (inet_ifa_match(ip->saddr, ifa)) {
+			ret = (ip->ttl == f_ttl);
+			break;
 		}
 	}
 
-	return ip->ttl == f_ttl;
+	endfor_ifa(in_dev);
+
+	return ret;
 }
 
 struct nf_osf_hdr_ctx {
@@ -213,7 +208,7 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 	if (!tcp)
 		return false;
 
-	ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : -1;
+	ttl_check = (info->flags & NF_OSF_TTL) ? info->ttl : 0;
 
 	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
 
@@ -257,7 +252,8 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family,
 EXPORT_SYMBOL_GPL(nf_osf_match);
 
 const char *nf_osf_find(const struct sk_buff *skb,
-			const struct list_head *nf_osf_fingers)
+			const struct list_head *nf_osf_fingers,
+			const int ttl_check)
 {
 	const struct iphdr *ip = ip_hdr(skb);
 	const struct nf_osf_user_finger *f;
@@ -275,7 +271,7 @@ const char *nf_osf_find(const struct sk_buff *skb,
 
 	list_for_each_entry_rcu(kf, &nf_osf_fingers[ctx.df], finger_entry) {
 		f = &kf->finger;
-		if (!nf_osf_match_one(skb, f, -1, &ctx))
+		if (!nf_osf_match_one(skb, f, ttl_check, &ctx))
 			continue;
 
 		genre = f->genre;
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index a35fb59ace73..0b452fd470c4 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -6,10 +6,12 @@
 
 struct nft_osf {
 	enum nft_registers	dreg:8;
+	u8			ttl;
 };
 
 static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = {
 	[NFTA_OSF_DREG]		= { .type = NLA_U32 },
+	[NFTA_OSF_TTL]		= { .type = NLA_U8 },
 };
 
 static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
@@ -33,7 +35,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
 		return;
 	}
 
-	os_name = nf_osf_find(skb, nf_osf_fingers);
+	os_name = nf_osf_find(skb, nf_osf_fingers, priv->ttl);
 	if (!os_name)
 		strncpy((char *)dest, "unknown", NFT_OSF_MAXGENRELEN);
 	else
@@ -46,6 +48,14 @@ static int nft_osf_init(const struct nft_ctx *ctx,
 {
 	struct nft_osf *priv = nft_expr_priv(expr);
 	int err;
+	u8 ttl;
+
+	if (nla_get_u8(tb[NFTA_OSF_TTL])) {
+		ttl = nla_get_u8(tb[NFTA_OSF_TTL]);
+		if (ttl > 2)
+			return -EINVAL;
+		priv->ttl = ttl;
+	}
 
 	priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
 	err = nft_validate_register_store(ctx, priv->dreg, NULL,
@@ -60,6 +70,9 @@ static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr)
 {
 	const struct nft_osf *priv = nft_expr_priv(expr);
 
+	if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl))
+		goto nla_put_failure;
+
 	if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 4cf841e398503990df640f7a7c5b2ea56f11c08c Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 15 Oct 2018 16:11:31 -0700
Subject: ACPI/ADXL: Add address translation interface using an ACPI DSM

Some new Intel servers provide an interface so that the OS can ask the
BIOS to translate a system physical address to a memory address (socket,
memory controller, channel, rank, dimm, etc.). This is useful for EDAC
drivers that want to take the address of an error reported in a machine
check bank and let the user know which DIMM may need to be replaced.

Specification for this interface is available at:

  https://cdrdv2.intel.com/v1/dl/getContent/603354

 [ Based on earlier code by Qiuxu Zhuo <qiuxu.zhuo@intel.com>. ]

 [ bp: Make the first pr_info() in adxl_init() pr_debug() so that it
   doesn't pollute every dmesg. ]

Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
CC: Len Brown <lenb@kernel.org>
CC: linux-acpi@vger.kernel.org
CC: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/20181015202620.23610-1-tony.luck@intel.com
---
 drivers/acpi/Kconfig     |   3 +
 drivers/acpi/Makefile    |   3 +
 drivers/acpi/acpi_adxl.c | 192 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/adxl.h     |  13 ++++
 4 files changed, 211 insertions(+)
 create mode 100644 drivers/acpi/acpi_adxl.c
 create mode 100644 include/linux/adxl.h

(limited to 'include/linux')

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index dd1eea90f67f..09991cc91b89 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -498,6 +498,9 @@ config ACPI_EXTLOG
 	  driver adds support for that functionality with corresponding
 	  tracepoint which carries that information to userspace.
 
+config ACPI_ADXL
+	bool
+
 menuconfig PMIC_OPREGION
 	bool "PMIC (Power Management Integrated Circuit) operation region support"
 	help
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 6d59aa109a91..edc039313cd6 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -61,6 +61,9 @@ acpi-$(CONFIG_ACPI_LPIT)	+= acpi_lpit.o
 acpi-$(CONFIG_ACPI_GENERIC_GSI) += irq.o
 acpi-$(CONFIG_ACPI_WATCHDOG)	+= acpi_watchdog.o
 
+# Address translation
+acpi-$(CONFIG_ACPI_ADXL)	+= acpi_adxl.o
+
 # These are (potentially) separate modules
 
 # IPMI may be used by other drivers, so it has to initialise before them
diff --git a/drivers/acpi/acpi_adxl.c b/drivers/acpi/acpi_adxl.c
new file mode 100644
index 000000000000..13c8f7b50c46
--- /dev/null
+++ b/drivers/acpi/acpi_adxl.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Address translation interface via ACPI DSM.
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Specification for this interface is available at:
+ *
+ *	https://cdrdv2.intel.com/v1/dl/getContent/603354
+ */
+
+#include <linux/acpi.h>
+#include <linux/adxl.h>
+
+#define ADXL_REVISION			0x1
+#define ADXL_IDX_GET_ADDR_PARAMS	0x1
+#define ADXL_IDX_FORWARD_TRANSLATE	0x2
+#define ACPI_ADXL_PATH			"\\_SB.ADXL"
+
+/*
+ * The specification doesn't provide a limit on how many
+ * components are in a memory address. But since we allocate
+ * memory based on the number the BIOS tells us, we should
+ * defend against insane values.
+ */
+#define ADXL_MAX_COMPONENTS		500
+
+#undef pr_fmt
+#define pr_fmt(fmt) "ADXL: " fmt
+
+static acpi_handle handle;
+static union acpi_object *params;
+static const guid_t adxl_guid =
+	GUID_INIT(0xAA3C050A, 0x7EA4, 0x4C1F,
+		  0xAF, 0xDA, 0x12, 0x67, 0xDF, 0xD3, 0xD4, 0x8D);
+
+static int adxl_count;
+static char **adxl_component_names;
+
+static union acpi_object *adxl_dsm(int cmd, union acpi_object argv[])
+{
+	union acpi_object *obj, *o;
+
+	obj = acpi_evaluate_dsm_typed(handle, &adxl_guid, ADXL_REVISION,
+				      cmd, argv, ACPI_TYPE_PACKAGE);
+	if (!obj) {
+		pr_info("DSM call failed for cmd=%d\n", cmd);
+		return NULL;
+	}
+
+	if (obj->package.count != 2) {
+		pr_info("Bad pkg count %d\n", obj->package.count);
+		goto err;
+	}
+
+	o = obj->package.elements;
+	if (o->type != ACPI_TYPE_INTEGER) {
+		pr_info("Bad 1st element type %d\n", o->type);
+		goto err;
+	}
+	if (o->integer.value) {
+		pr_info("Bad ret val %llu\n", o->integer.value);
+		goto err;
+	}
+
+	o = obj->package.elements + 1;
+	if (o->type != ACPI_TYPE_PACKAGE) {
+		pr_info("Bad 2nd element type %d\n", o->type);
+		goto err;
+	}
+	return obj;
+
+err:
+	ACPI_FREE(obj);
+	return NULL;
+}
+
+/**
+ * adxl_get_component_names - get list of memory component names
+ * Returns NULL terminated list of string names
+ *
+ * Give the caller a pointer to the list of memory component names
+ * e.g. { "SystemAddress", "ProcessorSocketId", "ChannelId", ... NULL }
+ * Caller should count how many strings in order to allocate a buffer
+ * for the return from adxl_decode().
+ */
+const char * const *adxl_get_component_names(void)
+{
+	return (const char * const *)adxl_component_names;
+}
+EXPORT_SYMBOL_GPL(adxl_get_component_names);
+
+/**
+ * adxl_decode - ask BIOS to decode a system address to memory address
+ * @addr: the address to decode
+ * @component_values: pointer to array of values for each component
+ * Returns 0 on success, negative error code otherwise
+ *
+ * The index of each value returned in the array matches the index of
+ * each component name returned by adxl_get_component_names().
+ * Components that are not defined for this address translation (e.g.
+ * mirror channel number for a non-mirrored address) are set to ~0ull.
+ */
+int adxl_decode(u64 addr, u64 component_values[])
+{
+	union acpi_object argv4[2], *results, *r;
+	int i, cnt;
+
+	if (!adxl_component_names)
+		return -EOPNOTSUPP;
+
+	argv4[0].type = ACPI_TYPE_PACKAGE;
+	argv4[0].package.count = 1;
+	argv4[0].package.elements = &argv4[1];
+	argv4[1].integer.type = ACPI_TYPE_INTEGER;
+	argv4[1].integer.value = addr;
+
+	results = adxl_dsm(ADXL_IDX_FORWARD_TRANSLATE, argv4);
+	if (!results)
+		return -EINVAL;
+
+	r = results->package.elements + 1;
+	cnt = r->package.count;
+	if (cnt != adxl_count) {
+		ACPI_FREE(results);
+		return -EINVAL;
+	}
+	r = r->package.elements;
+
+	for (i = 0; i < cnt; i++)
+		component_values[i] = r[i].integer.value;
+
+	ACPI_FREE(results);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(adxl_decode);
+
+static int __init adxl_init(void)
+{
+	char *path = ACPI_ADXL_PATH;
+	union acpi_object *p;
+	acpi_status status;
+	int i;
+
+	status = acpi_get_handle(NULL, path, &handle);
+	if (ACPI_FAILURE(status)) {
+		pr_debug("No ACPI handle for path %s\n", path);
+		return -ENODEV;
+	}
+
+	if (!acpi_has_method(handle, "_DSM")) {
+		pr_info("No DSM method\n");
+		return -ENODEV;
+	}
+
+	if (!acpi_check_dsm(handle, &adxl_guid, ADXL_REVISION,
+			    ADXL_IDX_GET_ADDR_PARAMS |
+			    ADXL_IDX_FORWARD_TRANSLATE)) {
+		pr_info("DSM method does not support forward translate\n");
+		return -ENODEV;
+	}
+
+	params = adxl_dsm(ADXL_IDX_GET_ADDR_PARAMS, NULL);
+	if (!params) {
+		pr_info("Failed to get component names\n");
+		return -ENODEV;
+	}
+
+	p = params->package.elements + 1;
+	adxl_count = p->package.count;
+	if (adxl_count > ADXL_MAX_COMPONENTS) {
+		pr_info("Insane number of address component names %d\n", adxl_count);
+		ACPI_FREE(params);
+		return -ENODEV;
+	}
+	p = p->package.elements;
+
+	/*
+	 * Allocate one extra for NULL termination.
+	 */
+	adxl_component_names = kcalloc(adxl_count + 1, sizeof(char *), GFP_KERNEL);
+	if (!adxl_component_names) {
+		ACPI_FREE(params);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < adxl_count; i++)
+		adxl_component_names[i] = p[i].string.pointer;
+
+	return 0;
+}
+subsys_initcall(adxl_init);
diff --git a/include/linux/adxl.h b/include/linux/adxl.h
new file mode 100644
index 000000000000..2a629acb4c3f
--- /dev/null
+++ b/include/linux/adxl.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Address translation interface via ACPI DSM.
+ * Copyright (C) 2018 Intel Corporation
+ */
+
+#ifndef _LINUX_ADXL_H
+#define _LINUX_ADXL_H
+
+const char * const *adxl_get_component_names(void);
+int adxl_decode(u64 addr, u64 component_values[]);
+
+#endif /* _LINUX_ADXL_H */
-- 
cgit v1.2.3


From 084181fe8cc7472695b907f0018f4cd00934cb12 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@kernel.org>
Date: Mon, 15 Oct 2018 17:20:01 -0500
Subject: fpga: mgr: add devm_fpga_mgr_create

Add devm_fpga_mgr_create() which is the managed
version of fpga_mgr_create().

Change current FPGA manager drivers to use
devm_fpga_mgr_create()

Signed-off-by: Alan Tull <atull@kernel.org>
Suggested-by: Federico Vaga <federico.vaga@cern.ch>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/fpga/fpga-mgr.rst | 13 +++---
 drivers/fpga/altera-cvp.c                  |  8 ++--
 drivers/fpga/altera-pr-ip-core.c           |  9 +----
 drivers/fpga/altera-ps-spi.c               | 11 ++---
 drivers/fpga/dfl-fme-mgr.c                 | 11 ++---
 drivers/fpga/fpga-mgr.c                    | 64 ++++++++++++++++++++++++++----
 drivers/fpga/ice40-spi.c                   | 10 ++---
 drivers/fpga/machxo2-spi.c                 | 11 ++---
 drivers/fpga/socfpga-a10.c                 |  5 +--
 drivers/fpga/socfpga.c                     | 10 ++---
 drivers/fpga/ts73xx-fpga.c                 | 11 ++---
 drivers/fpga/xilinx-spi.c                  | 12 ++----
 drivers/fpga/zynq-fpga.c                   |  5 +--
 include/linux/fpga/fpga-mgr.h              |  4 ++
 14 files changed, 97 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/fpga/fpga-mgr.rst b/Documentation/driver-api/fpga/fpga-mgr.rst
index 82b6dbbd31cd..db8885efbc22 100644
--- a/Documentation/driver-api/fpga/fpga-mgr.rst
+++ b/Documentation/driver-api/fpga/fpga-mgr.rst
@@ -49,18 +49,14 @@ probe function calls fpga_mgr_register(), such as::
 		 * them in priv
 		 */
 
-		mgr = fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
-				      &socfpga_fpga_ops, priv);
+		mgr = devm_fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
+					   &socfpga_fpga_ops, priv);
 		if (!mgr)
 			return -ENOMEM;
 
 		platform_set_drvdata(pdev, mgr);
 
-		ret = fpga_mgr_register(mgr);
-		if (ret)
-			fpga_mgr_free(mgr);
-
-		return ret;
+		return fpga_mgr_register(mgr);
 	}
 
 	static int socfpga_fpga_remove(struct platform_device *pdev)
@@ -169,6 +165,9 @@ API for implementing a new FPGA Manager driver
 .. kernel-doc:: include/linux/fpga/fpga-mgr.h
    :functions: fpga_manager_ops
 
+.. kernel-doc:: drivers/fpga/fpga-mgr.c
+   :functions: devm_fpga_mgr_create
+
 .. kernel-doc:: drivers/fpga/fpga-mgr.c
    :functions: fpga_mgr_create
 
diff --git a/drivers/fpga/altera-cvp.c b/drivers/fpga/altera-cvp.c
index 7fa793672a7a..610a1558e0ed 100644
--- a/drivers/fpga/altera-cvp.c
+++ b/drivers/fpga/altera-cvp.c
@@ -453,8 +453,8 @@ static int altera_cvp_probe(struct pci_dev *pdev,
 	snprintf(conf->mgr_name, sizeof(conf->mgr_name), "%s @%s",
 		 ALTERA_CVP_MGR_NAME, pci_name(pdev));
 
-	mgr = fpga_mgr_create(&pdev->dev, conf->mgr_name,
-			      &altera_cvp_ops, conf);
+	mgr = devm_fpga_mgr_create(&pdev->dev, conf->mgr_name,
+				   &altera_cvp_ops, conf);
 	if (!mgr) {
 		ret = -ENOMEM;
 		goto err_unmap;
@@ -463,10 +463,8 @@ static int altera_cvp_probe(struct pci_dev *pdev,
 	pci_set_drvdata(pdev, mgr);
 
 	ret = fpga_mgr_register(mgr);
-	if (ret) {
-		fpga_mgr_free(mgr);
+	if (ret)
 		goto err_unmap;
-	}
 
 	ret = driver_create_file(&altera_cvp_driver.driver,
 				 &driver_attr_chkcfg);
diff --git a/drivers/fpga/altera-pr-ip-core.c b/drivers/fpga/altera-pr-ip-core.c
index 65e0b6a2c031..a7a3bf0b5202 100644
--- a/drivers/fpga/altera-pr-ip-core.c
+++ b/drivers/fpga/altera-pr-ip-core.c
@@ -177,7 +177,6 @@ int alt_pr_register(struct device *dev, void __iomem *reg_base)
 {
 	struct alt_pr_priv *priv;
 	struct fpga_manager *mgr;
-	int ret;
 	u32 val;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -192,17 +191,13 @@ int alt_pr_register(struct device *dev, void __iomem *reg_base)
 		(val & ALT_PR_CSR_STATUS_MSK) >> ALT_PR_CSR_STATUS_SFT,
 		(int)(val & ALT_PR_CSR_PR_START));
 
-	mgr = fpga_mgr_create(dev, dev_name(dev), &alt_pr_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, dev_name(dev), &alt_pr_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	dev_set_drvdata(dev, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 EXPORT_SYMBOL_GPL(alt_pr_register);
 
diff --git a/drivers/fpga/altera-ps-spi.c b/drivers/fpga/altera-ps-spi.c
index 24b25c626036..33aafda50af5 100644
--- a/drivers/fpga/altera-ps-spi.c
+++ b/drivers/fpga/altera-ps-spi.c
@@ -239,7 +239,6 @@ static int altera_ps_probe(struct spi_device *spi)
 	struct altera_ps_conf *conf;
 	const struct of_device_id *of_id;
 	struct fpga_manager *mgr;
-	int ret;
 
 	conf = devm_kzalloc(&spi->dev, sizeof(*conf), GFP_KERNEL);
 	if (!conf)
@@ -275,18 +274,14 @@ static int altera_ps_probe(struct spi_device *spi)
 	snprintf(conf->mgr_name, sizeof(conf->mgr_name), "%s %s",
 		 dev_driver_string(&spi->dev), dev_name(&spi->dev));
 
-	mgr = fpga_mgr_create(&spi->dev, conf->mgr_name,
-			      &altera_ps_ops, conf);
+	mgr = devm_fpga_mgr_create(&spi->dev, conf->mgr_name,
+				   &altera_ps_ops, conf);
 	if (!mgr)
 		return -ENOMEM;
 
 	spi_set_drvdata(spi, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int altera_ps_remove(struct spi_device *spi)
diff --git a/drivers/fpga/dfl-fme-mgr.c b/drivers/fpga/dfl-fme-mgr.c
index 9f045d058cfd..76f37709dd1a 100644
--- a/drivers/fpga/dfl-fme-mgr.c
+++ b/drivers/fpga/dfl-fme-mgr.c
@@ -287,7 +287,6 @@ static int fme_mgr_probe(struct platform_device *pdev)
 	struct fme_mgr_priv *priv;
 	struct fpga_manager *mgr;
 	struct resource *res;
-	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -309,19 +308,15 @@ static int fme_mgr_probe(struct platform_device *pdev)
 
 	fme_mgr_get_compat_id(priv->ioaddr, compat_id);
 
-	mgr = fpga_mgr_create(dev, "DFL FME FPGA Manager",
-			      &fme_mgr_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "DFL FME FPGA Manager",
+				   &fme_mgr_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	mgr->compat_id = compat_id;
 	platform_set_drvdata(pdev, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int fme_mgr_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/fpga-mgr.c b/drivers/fpga/fpga-mgr.c
index a41b07e37884..c3866816456a 100644
--- a/drivers/fpga/fpga-mgr.c
+++ b/drivers/fpga/fpga-mgr.c
@@ -558,6 +558,9 @@ EXPORT_SYMBOL_GPL(fpga_mgr_unlock);
  * @mops:	pointer to structure of fpga manager ops
  * @priv:	fpga manager private data
  *
+ * The caller of this function is responsible for freeing the struct with
+ * fpga_mgr_free().  Using devm_fpga_mgr_create() instead is recommended.
+ *
  * Return: pointer to struct fpga_manager or NULL
  */
 struct fpga_manager *fpga_mgr_create(struct device *dev, const char *name,
@@ -618,8 +621,8 @@ error_kfree:
 EXPORT_SYMBOL_GPL(fpga_mgr_create);
 
 /**
- * fpga_mgr_free - deallocate a FPGA manager
- * @mgr:	fpga manager struct created by fpga_mgr_create
+ * fpga_mgr_free - free a FPGA manager created with fpga_mgr_create()
+ * @mgr:	fpga manager struct
  */
 void fpga_mgr_free(struct fpga_manager *mgr)
 {
@@ -628,9 +631,55 @@ void fpga_mgr_free(struct fpga_manager *mgr)
 }
 EXPORT_SYMBOL_GPL(fpga_mgr_free);
 
+static void devm_fpga_mgr_release(struct device *dev, void *res)
+{
+	struct fpga_manager *mgr = *(struct fpga_manager **)res;
+
+	fpga_mgr_free(mgr);
+}
+
+/**
+ * devm_fpga_mgr_create - create and initialize a managed FPGA manager struct
+ * @dev:	fpga manager device from pdev
+ * @name:	fpga manager name
+ * @mops:	pointer to structure of fpga manager ops
+ * @priv:	fpga manager private data
+ *
+ * This function is intended for use in a FPGA manager driver's probe function.
+ * After the manager driver creates the manager struct with
+ * devm_fpga_mgr_create(), it should register it with fpga_mgr_register().  The
+ * manager driver's remove function should call fpga_mgr_unregister().  The
+ * manager struct allocated with this function will be freed automatically on
+ * driver detach.  This includes the case of a probe function returning error
+ * before calling fpga_mgr_register(), the struct will still get cleaned up.
+ *
+ * Return: pointer to struct fpga_manager or NULL
+ */
+struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name,
+					  const struct fpga_manager_ops *mops,
+					  void *priv)
+{
+	struct fpga_manager **ptr, *mgr;
+
+	ptr = devres_alloc(devm_fpga_mgr_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	mgr = fpga_mgr_create(dev, name, mops, priv);
+	if (!mgr) {
+		devres_free(ptr);
+	} else {
+		*ptr = mgr;
+		devres_add(dev, ptr);
+	}
+
+	return mgr;
+}
+EXPORT_SYMBOL_GPL(devm_fpga_mgr_create);
+
 /**
  * fpga_mgr_register - register a FPGA manager
- * @mgr:	fpga manager struct created by fpga_mgr_create
+ * @mgr: fpga manager struct
  *
  * Return: 0 on success, negative error code otherwise.
  */
@@ -661,8 +710,10 @@ error_device:
 EXPORT_SYMBOL_GPL(fpga_mgr_register);
 
 /**
- * fpga_mgr_unregister - unregister and free a FPGA manager
- * @mgr:	fpga manager struct
+ * fpga_mgr_unregister - unregister a FPGA manager
+ * @mgr: fpga manager struct
+ *
+ * This function is intended for use in a FPGA manager driver's remove function.
  */
 void fpga_mgr_unregister(struct fpga_manager *mgr)
 {
@@ -681,9 +732,6 @@ EXPORT_SYMBOL_GPL(fpga_mgr_unregister);
 
 static void fpga_mgr_dev_release(struct device *dev)
 {
-	struct fpga_manager *mgr = to_fpga_manager(dev);
-
-	fpga_mgr_free(mgr);
 }
 
 static int __init fpga_mgr_class_init(void)
diff --git a/drivers/fpga/ice40-spi.c b/drivers/fpga/ice40-spi.c
index 5981c7ee7a7d..6154661b8f76 100644
--- a/drivers/fpga/ice40-spi.c
+++ b/drivers/fpga/ice40-spi.c
@@ -175,18 +175,14 @@ static int ice40_fpga_probe(struct spi_device *spi)
 		return ret;
 	}
 
-	mgr = fpga_mgr_create(dev, "Lattice iCE40 FPGA Manager",
-			      &ice40_fpga_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "Lattice iCE40 FPGA Manager",
+				   &ice40_fpga_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	spi_set_drvdata(spi, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int ice40_fpga_remove(struct spi_device *spi)
diff --git a/drivers/fpga/machxo2-spi.c b/drivers/fpga/machxo2-spi.c
index a582e0000c97..4d8a87641587 100644
--- a/drivers/fpga/machxo2-spi.c
+++ b/drivers/fpga/machxo2-spi.c
@@ -356,25 +356,20 @@ static int machxo2_spi_probe(struct spi_device *spi)
 {
 	struct device *dev = &spi->dev;
 	struct fpga_manager *mgr;
-	int ret;
 
 	if (spi->max_speed_hz > MACHXO2_MAX_SPEED) {
 		dev_err(dev, "Speed is too high\n");
 		return -EINVAL;
 	}
 
-	mgr = fpga_mgr_create(dev, "Lattice MachXO2 SPI FPGA Manager",
-			      &machxo2_ops, spi);
+	mgr = devm_fpga_mgr_create(dev, "Lattice MachXO2 SPI FPGA Manager",
+				   &machxo2_ops, spi);
 	if (!mgr)
 		return -ENOMEM;
 
 	spi_set_drvdata(spi, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int machxo2_spi_remove(struct spi_device *spi)
diff --git a/drivers/fpga/socfpga-a10.c b/drivers/fpga/socfpga-a10.c
index be30c48eb6e4..573d88bdf730 100644
--- a/drivers/fpga/socfpga-a10.c
+++ b/drivers/fpga/socfpga-a10.c
@@ -508,8 +508,8 @@ static int socfpga_a10_fpga_probe(struct platform_device *pdev)
 		return -EBUSY;
 	}
 
-	mgr = fpga_mgr_create(dev, "SoCFPGA Arria10 FPGA Manager",
-			      &socfpga_a10_fpga_mgr_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "SoCFPGA Arria10 FPGA Manager",
+				   &socfpga_a10_fpga_mgr_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
@@ -517,7 +517,6 @@ static int socfpga_a10_fpga_probe(struct platform_device *pdev)
 
 	ret = fpga_mgr_register(mgr);
 	if (ret) {
-		fpga_mgr_free(mgr);
 		clk_disable_unprepare(priv->clk);
 		return ret;
 	}
diff --git a/drivers/fpga/socfpga.c b/drivers/fpga/socfpga.c
index 959d71f26896..4a8a2fcd4e6c 100644
--- a/drivers/fpga/socfpga.c
+++ b/drivers/fpga/socfpga.c
@@ -571,18 +571,14 @@ static int socfpga_fpga_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	mgr = fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
-			      &socfpga_fpga_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "Altera SOCFPGA FPGA Manager",
+				   &socfpga_fpga_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int socfpga_fpga_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/ts73xx-fpga.c b/drivers/fpga/ts73xx-fpga.c
index 08efd1895b1b..dc22a5842609 100644
--- a/drivers/fpga/ts73xx-fpga.c
+++ b/drivers/fpga/ts73xx-fpga.c
@@ -118,7 +118,6 @@ static int ts73xx_fpga_probe(struct platform_device *pdev)
 	struct ts73xx_fpga_priv *priv;
 	struct fpga_manager *mgr;
 	struct resource *res;
-	int ret;
 
 	priv = devm_kzalloc(kdev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -133,18 +132,14 @@ static int ts73xx_fpga_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->io_base);
 	}
 
-	mgr = fpga_mgr_create(kdev, "TS-73xx FPGA Manager",
-			      &ts73xx_fpga_ops, priv);
+	mgr = devm_fpga_mgr_create(kdev, "TS-73xx FPGA Manager",
+				   &ts73xx_fpga_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int ts73xx_fpga_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/xilinx-spi.c b/drivers/fpga/xilinx-spi.c
index 8d1945966533..469486be20c4 100644
--- a/drivers/fpga/xilinx-spi.c
+++ b/drivers/fpga/xilinx-spi.c
@@ -144,7 +144,6 @@ static int xilinx_spi_probe(struct spi_device *spi)
 {
 	struct xilinx_spi_conf *conf;
 	struct fpga_manager *mgr;
-	int ret;
 
 	conf = devm_kzalloc(&spi->dev, sizeof(*conf), GFP_KERNEL);
 	if (!conf)
@@ -167,18 +166,15 @@ static int xilinx_spi_probe(struct spi_device *spi)
 		return PTR_ERR(conf->done);
 	}
 
-	mgr = fpga_mgr_create(&spi->dev, "Xilinx Slave Serial FPGA Manager",
-			      &xilinx_spi_ops, conf);
+	mgr = devm_fpga_mgr_create(&spi->dev,
+				   "Xilinx Slave Serial FPGA Manager",
+				   &xilinx_spi_ops, conf);
 	if (!mgr)
 		return -ENOMEM;
 
 	spi_set_drvdata(spi, mgr);
 
-	ret = fpga_mgr_register(mgr);
-	if (ret)
-		fpga_mgr_free(mgr);
-
-	return ret;
+	return fpga_mgr_register(mgr);
 }
 
 static int xilinx_spi_remove(struct spi_device *spi)
diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index 3110e00121ca..bb82efeebb9d 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -614,8 +614,8 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 
 	clk_disable(priv->clk);
 
-	mgr = fpga_mgr_create(dev, "Xilinx Zynq FPGA Manager",
-			      &zynq_fpga_ops, priv);
+	mgr = devm_fpga_mgr_create(dev, "Xilinx Zynq FPGA Manager",
+				   &zynq_fpga_ops, priv);
 	if (!mgr)
 		return -ENOMEM;
 
@@ -624,7 +624,6 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 	err = fpga_mgr_register(mgr);
 	if (err) {
 		dev_err(dev, "unable to register FPGA manager\n");
-		fpga_mgr_free(mgr);
 		clk_unprepare(priv->clk);
 		return err;
 	}
diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h
index 8ab5df769923..e8ca62b2cb5b 100644
--- a/include/linux/fpga/fpga-mgr.h
+++ b/include/linux/fpga/fpga-mgr.h
@@ -198,4 +198,8 @@ void fpga_mgr_free(struct fpga_manager *mgr);
 int fpga_mgr_register(struct fpga_manager *mgr);
 void fpga_mgr_unregister(struct fpga_manager *mgr);
 
+struct fpga_manager *devm_fpga_mgr_create(struct device *dev, const char *name,
+					  const struct fpga_manager_ops *mops,
+					  void *priv);
+
 #endif /*_LINUX_FPGA_MGR_H */
-- 
cgit v1.2.3


From 213befe049c70cfcfcbb4f6010bd5276dbc1f7b9 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@kernel.org>
Date: Mon, 15 Oct 2018 17:20:02 -0500
Subject: fpga: bridge: add devm_fpga_bridge_create

Add devm_fpga_bridge_create() which is the managed
version of fpga_bridge_create().

Change current bridge drivers to use
devm_fpga_bridge_create().

Signed-off-by: Alan Tull <atull@kernel.org>
Suggested-by: Federico Vaga <federico.vaga@cern.ch>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/fpga/fpga-bridge.rst |  3 ++
 drivers/fpga/altera-fpga2sdram.c              |  8 ++--
 drivers/fpga/altera-freeze-bridge.c           | 13 ++---
 drivers/fpga/altera-hps2fpga.c                |  7 ++-
 drivers/fpga/dfl-fme-br.c                     | 11 ++---
 drivers/fpga/fpga-bridge.c                    | 68 +++++++++++++++++++++++----
 drivers/fpga/xilinx-pr-decoupler.c            |  4 +-
 include/linux/fpga/fpga-bridge.h              |  4 ++
 8 files changed, 80 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/fpga/fpga-bridge.rst b/Documentation/driver-api/fpga/fpga-bridge.rst
index 2c2aaca894bf..ebbcbde687b3 100644
--- a/Documentation/driver-api/fpga/fpga-bridge.rst
+++ b/Documentation/driver-api/fpga/fpga-bridge.rst
@@ -10,6 +10,9 @@ API to implement a new FPGA bridge
 .. kernel-doc:: include/linux/fpga/fpga-bridge.h
    :functions: fpga_bridge_ops
 
+.. kernel-doc:: drivers/fpga/fpga-bridge.c
+   :functions: devm_fpga_bridge_create
+
 .. kernel-doc:: drivers/fpga/fpga-bridge.c
    :functions: fpga_bridge_create
 
diff --git a/drivers/fpga/altera-fpga2sdram.c b/drivers/fpga/altera-fpga2sdram.c
index 23660ccd634b..a78e49c63c64 100644
--- a/drivers/fpga/altera-fpga2sdram.c
+++ b/drivers/fpga/altera-fpga2sdram.c
@@ -121,18 +121,16 @@ static int alt_fpga_bridge_probe(struct platform_device *pdev)
 	/* Get f2s bridge configuration saved in handoff register */
 	regmap_read(sysmgr, SYSMGR_ISWGRP_HANDOFF3, &priv->mask);
 
-	br = fpga_bridge_create(dev, F2S_BRIDGE_NAME,
-				&altera_fpga2sdram_br_ops, priv);
+	br = devm_fpga_bridge_create(dev, F2S_BRIDGE_NAME,
+				     &altera_fpga2sdram_br_ops, priv);
 	if (!br)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, br);
 
 	ret = fpga_bridge_register(br);
-	if (ret) {
-		fpga_bridge_free(br);
+	if (ret)
 		return ret;
-	}
 
 	dev_info(dev, "driver initialized with handoff %08x\n", priv->mask);
 
diff --git a/drivers/fpga/altera-freeze-bridge.c b/drivers/fpga/altera-freeze-bridge.c
index ffd586c48ecf..dd58c4aea92e 100644
--- a/drivers/fpga/altera-freeze-bridge.c
+++ b/drivers/fpga/altera-freeze-bridge.c
@@ -213,7 +213,6 @@ static int altera_freeze_br_probe(struct platform_device *pdev)
 	struct fpga_bridge *br;
 	struct resource *res;
 	u32 status, revision;
-	int ret;
 
 	if (!np)
 		return -ENODEV;
@@ -245,20 +244,14 @@ static int altera_freeze_br_probe(struct platform_device *pdev)
 
 	priv->base_addr = base_addr;
 
-	br = fpga_bridge_create(dev, FREEZE_BRIDGE_NAME,
-				&altera_freeze_br_br_ops, priv);
+	br = devm_fpga_bridge_create(dev, FREEZE_BRIDGE_NAME,
+				     &altera_freeze_br_br_ops, priv);
 	if (!br)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, br);
 
-	ret = fpga_bridge_register(br);
-	if (ret) {
-		fpga_bridge_free(br);
-		return ret;
-	}
-
-	return 0;
+	return fpga_bridge_register(br);
 }
 
 static int altera_freeze_br_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/altera-hps2fpga.c b/drivers/fpga/altera-hps2fpga.c
index a974d3f60321..77b95f251821 100644
--- a/drivers/fpga/altera-hps2fpga.c
+++ b/drivers/fpga/altera-hps2fpga.c
@@ -180,7 +180,8 @@ static int alt_fpga_bridge_probe(struct platform_device *pdev)
 		}
 	}
 
-	br = fpga_bridge_create(dev, priv->name, &altera_hps2fpga_br_ops, priv);
+	br = devm_fpga_bridge_create(dev, priv->name,
+				     &altera_hps2fpga_br_ops, priv);
 	if (!br) {
 		ret = -ENOMEM;
 		goto err;
@@ -190,12 +191,10 @@ static int alt_fpga_bridge_probe(struct platform_device *pdev)
 
 	ret = fpga_bridge_register(br);
 	if (ret)
-		goto err_free;
+		goto err;
 
 	return 0;
 
-err_free:
-	fpga_bridge_free(br);
 err:
 	clk_disable_unprepare(priv->clk);
 
diff --git a/drivers/fpga/dfl-fme-br.c b/drivers/fpga/dfl-fme-br.c
index 7cc041def8b3..3ff9f3a687ce 100644
--- a/drivers/fpga/dfl-fme-br.c
+++ b/drivers/fpga/dfl-fme-br.c
@@ -61,7 +61,6 @@ static int fme_br_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct fme_br_priv *priv;
 	struct fpga_bridge *br;
-	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -69,18 +68,14 @@ static int fme_br_probe(struct platform_device *pdev)
 
 	priv->pdata = dev_get_platdata(dev);
 
-	br = fpga_bridge_create(dev, "DFL FPGA FME Bridge",
-				&fme_bridge_ops, priv);
+	br = devm_fpga_bridge_create(dev, "DFL FPGA FME Bridge",
+				     &fme_bridge_ops, priv);
 	if (!br)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, br);
 
-	ret = fpga_bridge_register(br);
-	if (ret)
-		fpga_bridge_free(br);
-
-	return ret;
+	return fpga_bridge_register(br);
 }
 
 static int fme_br_remove(struct platform_device *pdev)
diff --git a/drivers/fpga/fpga-bridge.c b/drivers/fpga/fpga-bridge.c
index c983dac97501..80bd8f1b2aa6 100644
--- a/drivers/fpga/fpga-bridge.c
+++ b/drivers/fpga/fpga-bridge.c
@@ -324,6 +324,9 @@ ATTRIBUTE_GROUPS(fpga_bridge);
  * @br_ops:	pointer to structure of fpga bridge ops
  * @priv:	FPGA bridge private data
  *
+ * The caller of this function is responsible for freeing the bridge with
+ * fpga_bridge_free().  Using devm_fpga_bridge_create() instead is recommended.
+ *
  * Return: struct fpga_bridge or NULL
  */
 struct fpga_bridge *fpga_bridge_create(struct device *dev, const char *name,
@@ -378,8 +381,8 @@ error_kfree:
 EXPORT_SYMBOL_GPL(fpga_bridge_create);
 
 /**
- * fpga_bridge_free - free a fpga bridge and its id
- * @bridge:	FPGA bridge struct created by fpga_bridge_create
+ * fpga_bridge_free - free a fpga bridge created by fpga_bridge_create()
+ * @bridge:	FPGA bridge struct
  */
 void fpga_bridge_free(struct fpga_bridge *bridge)
 {
@@ -388,9 +391,56 @@ void fpga_bridge_free(struct fpga_bridge *bridge)
 }
 EXPORT_SYMBOL_GPL(fpga_bridge_free);
 
+static void devm_fpga_bridge_release(struct device *dev, void *res)
+{
+	struct fpga_bridge *bridge = *(struct fpga_bridge **)res;
+
+	fpga_bridge_free(bridge);
+}
+
 /**
- * fpga_bridge_register - register a fpga bridge
- * @bridge:	FPGA bridge struct created by fpga_bridge_create
+ * devm_fpga_bridge_create - create and init a managed struct fpga_bridge
+ * @dev:	FPGA bridge device from pdev
+ * @name:	FPGA bridge name
+ * @br_ops:	pointer to structure of fpga bridge ops
+ * @priv:	FPGA bridge private data
+ *
+ * This function is intended for use in a FPGA bridge driver's probe function.
+ * After the bridge driver creates the struct with devm_fpga_bridge_create(), it
+ * should register the bridge with fpga_bridge_register().  The bridge driver's
+ * remove function should call fpga_bridge_unregister().  The bridge struct
+ * allocated with this function will be freed automatically on driver detach.
+ * This includes the case of a probe function returning error before calling
+ * fpga_bridge_register(), the struct will still get cleaned up.
+ *
+ *  Return: struct fpga_bridge or NULL
+ */
+struct fpga_bridge
+*devm_fpga_bridge_create(struct device *dev, const char *name,
+			 const struct fpga_bridge_ops *br_ops, void *priv)
+{
+	struct fpga_bridge **ptr, *bridge;
+
+	ptr = devres_alloc(devm_fpga_bridge_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	bridge = fpga_bridge_create(dev, name, br_ops, priv);
+	if (!bridge) {
+		devres_free(ptr);
+	} else {
+		*ptr = bridge;
+		devres_add(dev, ptr);
+	}
+
+	return bridge;
+}
+EXPORT_SYMBOL_GPL(devm_fpga_bridge_create);
+
+/**
+ * fpga_bridge_register - register a FPGA bridge
+ *
+ * @bridge: FPGA bridge struct
  *
  * Return: 0 for success, error code otherwise.
  */
@@ -412,8 +462,11 @@ int fpga_bridge_register(struct fpga_bridge *bridge)
 EXPORT_SYMBOL_GPL(fpga_bridge_register);
 
 /**
- * fpga_bridge_unregister - unregister and free a fpga bridge
- * @bridge:	FPGA bridge struct created by fpga_bridge_create
+ * fpga_bridge_unregister - unregister a FPGA bridge
+ *
+ * @bridge: FPGA bridge struct
+ *
+ * This function is intended for use in a FPGA bridge driver's remove function.
  */
 void fpga_bridge_unregister(struct fpga_bridge *bridge)
 {
@@ -430,9 +483,6 @@ EXPORT_SYMBOL_GPL(fpga_bridge_unregister);
 
 static void fpga_bridge_dev_release(struct device *dev)
 {
-	struct fpga_bridge *bridge = to_fpga_bridge(dev);
-
-	fpga_bridge_free(bridge);
 }
 
 static int __init fpga_bridge_dev_init(void)
diff --git a/drivers/fpga/xilinx-pr-decoupler.c b/drivers/fpga/xilinx-pr-decoupler.c
index 07ba1539e82c..641036135207 100644
--- a/drivers/fpga/xilinx-pr-decoupler.c
+++ b/drivers/fpga/xilinx-pr-decoupler.c
@@ -121,8 +121,8 @@ static int xlnx_pr_decoupler_probe(struct platform_device *pdev)
 
 	clk_disable(priv->clk);
 
-	br = fpga_bridge_create(&pdev->dev, "Xilinx PR Decoupler",
-				&xlnx_pr_decoupler_br_ops, priv);
+	br = devm_fpga_bridge_create(&pdev->dev, "Xilinx PR Decoupler",
+				     &xlnx_pr_decoupler_br_ops, priv);
 	if (!br) {
 		err = -ENOMEM;
 		goto err_clk;
diff --git a/include/linux/fpga/fpga-bridge.h b/include/linux/fpga/fpga-bridge.h
index ce550fcf6360..817600a32c93 100644
--- a/include/linux/fpga/fpga-bridge.h
+++ b/include/linux/fpga/fpga-bridge.h
@@ -69,4 +69,8 @@ void fpga_bridge_free(struct fpga_bridge *br);
 int fpga_bridge_register(struct fpga_bridge *br);
 void fpga_bridge_unregister(struct fpga_bridge *br);
 
+struct fpga_bridge
+*devm_fpga_bridge_create(struct device *dev, const char *name,
+			 const struct fpga_bridge_ops *br_ops, void *priv);
+
 #endif /* _LINUX_FPGA_BRIDGE_H */
-- 
cgit v1.2.3


From fea82b7f6670002ff36bf1bc77d0345b0b2f2d1c Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@kernel.org>
Date: Mon, 15 Oct 2018 17:20:03 -0500
Subject: fpga: add devm_fpga_region_create

Add devm_fpga_region_create() which is the
managed version of fpga_region_create().

Change current region drivers to use
devm_fpga_region_create().

Signed-off-by: Alan Tull <atull@kernel.org>
Suggested-by: Federico Vaga <federico.vaga@cern.ch>
Acked-by: Moritz Fischer <mdf@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/fpga/fpga-region.rst |  3 ++
 drivers/fpga/dfl-fme-region.c                 |  6 +--
 drivers/fpga/dfl.c                            |  6 +--
 drivers/fpga/fpga-region.c                    | 65 +++++++++++++++++++++++----
 drivers/fpga/of-fpga-region.c                 |  6 +--
 include/linux/fpga/fpga-region.h              |  4 ++
 6 files changed, 70 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-api/fpga/fpga-region.rst b/Documentation/driver-api/fpga/fpga-region.rst
index f30333ce828e..dc9f75cc3352 100644
--- a/Documentation/driver-api/fpga/fpga-region.rst
+++ b/Documentation/driver-api/fpga/fpga-region.rst
@@ -89,6 +89,9 @@ API to add a new FPGA region
 .. kernel-doc:: include/linux/fpga/fpga-region.h
    :functions: fpga_region
 
+.. kernel-doc:: drivers/fpga/fpga-region.c
+   :functions: devm_fpga_region_create
+
 .. kernel-doc:: drivers/fpga/fpga-region.c
    :functions: fpga_region_create
 
diff --git a/drivers/fpga/dfl-fme-region.c b/drivers/fpga/dfl-fme-region.c
index 51a5ac2293a7..ec134ec93f08 100644
--- a/drivers/fpga/dfl-fme-region.c
+++ b/drivers/fpga/dfl-fme-region.c
@@ -39,7 +39,7 @@ static int fme_region_probe(struct platform_device *pdev)
 	if (IS_ERR(mgr))
 		return -EPROBE_DEFER;
 
-	region = fpga_region_create(dev, mgr, fme_region_get_bridges);
+	region = devm_fpga_region_create(dev, mgr, fme_region_get_bridges);
 	if (!region) {
 		ret = -ENOMEM;
 		goto eprobe_mgr_put;
@@ -51,14 +51,12 @@ static int fme_region_probe(struct platform_device *pdev)
 
 	ret = fpga_region_register(region);
 	if (ret)
-		goto region_free;
+		goto eprobe_mgr_put;
 
 	dev_dbg(dev, "DFL FME FPGA Region probed\n");
 
 	return 0;
 
-region_free:
-	fpga_region_free(region);
 eprobe_mgr_put:
 	fpga_mgr_put(mgr);
 	return ret;
diff --git a/drivers/fpga/dfl.c b/drivers/fpga/dfl.c
index a9b521bccb06..2c09e502e721 100644
--- a/drivers/fpga/dfl.c
+++ b/drivers/fpga/dfl.c
@@ -899,7 +899,7 @@ dfl_fpga_feature_devs_enumerate(struct dfl_fpga_enum_info *info)
 	if (!cdev)
 		return ERR_PTR(-ENOMEM);
 
-	cdev->region = fpga_region_create(info->dev, NULL, NULL);
+	cdev->region = devm_fpga_region_create(info->dev, NULL, NULL);
 	if (!cdev->region) {
 		ret = -ENOMEM;
 		goto free_cdev_exit;
@@ -911,7 +911,7 @@ dfl_fpga_feature_devs_enumerate(struct dfl_fpga_enum_info *info)
 
 	ret = fpga_region_register(cdev->region);
 	if (ret)
-		goto free_region_exit;
+		goto free_cdev_exit;
 
 	/* create and init build info for enumeration */
 	binfo = devm_kzalloc(info->dev, sizeof(*binfo), GFP_KERNEL);
@@ -942,8 +942,6 @@ dfl_fpga_feature_devs_enumerate(struct dfl_fpga_enum_info *info)
 
 unregister_region_exit:
 	fpga_region_unregister(cdev->region);
-free_region_exit:
-	fpga_region_free(cdev->region);
 free_cdev_exit:
 	devm_kfree(info->dev, cdev);
 	return ERR_PTR(ret);
diff --git a/drivers/fpga/fpga-region.c b/drivers/fpga/fpga-region.c
index 0d65220d5ec5..bde5a9d460c5 100644
--- a/drivers/fpga/fpga-region.c
+++ b/drivers/fpga/fpga-region.c
@@ -185,6 +185,10 @@ ATTRIBUTE_GROUPS(fpga_region);
  * @mgr: manager that programs this region
  * @get_bridges: optional function to get bridges to a list
  *
+ * The caller of this function is responsible for freeing the resulting region
+ * struct with fpga_region_free().  Using devm_fpga_region_create() instead is
+ * recommended.
+ *
  * Return: struct fpga_region or NULL
  */
 struct fpga_region
@@ -230,8 +234,8 @@ err_free:
 EXPORT_SYMBOL_GPL(fpga_region_create);
 
 /**
- * fpga_region_free - free a struct fpga_region
- * @region: FPGA region created by fpga_region_create
+ * fpga_region_free - free a FPGA region created by fpga_region_create()
+ * @region: FPGA region
  */
 void fpga_region_free(struct fpga_region *region)
 {
@@ -240,21 +244,69 @@ void fpga_region_free(struct fpga_region *region)
 }
 EXPORT_SYMBOL_GPL(fpga_region_free);
 
+static void devm_fpga_region_release(struct device *dev, void *res)
+{
+	struct fpga_region *region = *(struct fpga_region **)res;
+
+	fpga_region_free(region);
+}
+
+/**
+ * devm_fpga_region_create - create and initialize a managed FPGA region struct
+ * @dev: device parent
+ * @mgr: manager that programs this region
+ * @get_bridges: optional function to get bridges to a list
+ *
+ * This function is intended for use in a FPGA region driver's probe function.
+ * After the region driver creates the region struct with
+ * devm_fpga_region_create(), it should register it with fpga_region_register().
+ * The region driver's remove function should call fpga_region_unregister().
+ * The region struct allocated with this function will be freed automatically on
+ * driver detach.  This includes the case of a probe function returning error
+ * before calling fpga_region_register(), the struct will still get cleaned up.
+ *
+ * Return: struct fpga_region or NULL
+ */
+struct fpga_region
+*devm_fpga_region_create(struct device *dev,
+			 struct fpga_manager *mgr,
+			 int (*get_bridges)(struct fpga_region *))
+{
+	struct fpga_region **ptr, *region;
+
+	ptr = devres_alloc(devm_fpga_region_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return NULL;
+
+	region = fpga_region_create(dev, mgr, get_bridges);
+	if (!region) {
+		devres_free(ptr);
+	} else {
+		*ptr = region;
+		devres_add(dev, ptr);
+	}
+
+	return region;
+}
+EXPORT_SYMBOL_GPL(devm_fpga_region_create);
+
 /**
  * fpga_region_register - register a FPGA region
- * @region: FPGA region created by fpga_region_create
+ * @region: FPGA region
+ *
  * Return: 0 or -errno
  */
 int fpga_region_register(struct fpga_region *region)
 {
 	return device_add(&region->dev);
-
 }
 EXPORT_SYMBOL_GPL(fpga_region_register);
 
 /**
- * fpga_region_unregister - unregister and free a FPGA region
+ * fpga_region_unregister - unregister a FPGA region
  * @region: FPGA region
+ *
+ * This function is intended for use in a FPGA region driver's remove function.
  */
 void fpga_region_unregister(struct fpga_region *region)
 {
@@ -264,9 +316,6 @@ EXPORT_SYMBOL_GPL(fpga_region_unregister);
 
 static void fpga_region_dev_release(struct device *dev)
 {
-	struct fpga_region *region = to_fpga_region(dev);
-
-	fpga_region_free(region);
 }
 
 /**
diff --git a/drivers/fpga/of-fpga-region.c b/drivers/fpga/of-fpga-region.c
index 052a1342ab7e..122286fd255a 100644
--- a/drivers/fpga/of-fpga-region.c
+++ b/drivers/fpga/of-fpga-region.c
@@ -410,7 +410,7 @@ static int of_fpga_region_probe(struct platform_device *pdev)
 	if (IS_ERR(mgr))
 		return -EPROBE_DEFER;
 
-	region = fpga_region_create(dev, mgr, of_fpga_region_get_bridges);
+	region = devm_fpga_region_create(dev, mgr, of_fpga_region_get_bridges);
 	if (!region) {
 		ret = -ENOMEM;
 		goto eprobe_mgr_put;
@@ -418,7 +418,7 @@ static int of_fpga_region_probe(struct platform_device *pdev)
 
 	ret = fpga_region_register(region);
 	if (ret)
-		goto eprobe_free;
+		goto eprobe_mgr_put;
 
 	of_platform_populate(np, fpga_region_of_match, NULL, &region->dev);
 	dev_set_drvdata(dev, region);
@@ -427,8 +427,6 @@ static int of_fpga_region_probe(struct platform_device *pdev)
 
 	return 0;
 
-eprobe_free:
-	fpga_region_free(region);
 eprobe_mgr_put:
 	fpga_mgr_put(mgr);
 	return ret;
diff --git a/include/linux/fpga/fpga-region.h b/include/linux/fpga/fpga-region.h
index 0521b7f577a4..27cb706275db 100644
--- a/include/linux/fpga/fpga-region.h
+++ b/include/linux/fpga/fpga-region.h
@@ -44,4 +44,8 @@ void fpga_region_free(struct fpga_region *region);
 int fpga_region_register(struct fpga_region *region);
 void fpga_region_unregister(struct fpga_region *region);
 
+struct fpga_region
+*devm_fpga_region_create(struct device *dev, struct fpga_manager *mgr,
+			int (*get_bridges)(struct fpga_region *));
+
 #endif /* _FPGA_REGION_H */
-- 
cgit v1.2.3


From 8514c470dc18e58f27dee10a787175ef8f31162f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 11 Jul 2018 15:20:32 +0200
Subject: driver core: add BUS_ATTR_WO() macro

Many bus attributes are write-only, so provide a simple macro for that
to be able to match the other driver core attribute macros.

Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index 8f882549edee..767cf1938b14 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -55,6 +55,8 @@ struct bus_attribute {
 	struct bus_attribute bus_attr_##_name = __ATTR_RW(_name)
 #define BUS_ATTR_RO(_name) \
 	struct bus_attribute bus_attr_##_name = __ATTR_RO(_name)
+#define BUS_ATTR_WO(_name) \
+	struct bus_attribute bus_attr_##_name = __ATTR_WO(_name)
 
 extern int __must_check bus_create_file(struct bus_type *,
 					struct bus_attribute *);
-- 
cgit v1.2.3


From 0571967dfb5d2573c2a06871517d748932a899d1 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Sun, 14 Oct 2018 17:20:07 +0200
Subject: devres: constify p in devm_kfree()

Make devm_kfree() signature uniform with that of kfree(). To avoid
compiler warnings: cast p to (void *) when calling devres_destroy().

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/devres.c  | 5 +++--
 include/linux/device.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index f98a097e73f2..438c91a43508 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -885,11 +885,12 @@ EXPORT_SYMBOL_GPL(devm_kasprintf);
  *
  * Free memory allocated with devm_kmalloc().
  */
-void devm_kfree(struct device *dev, void *p)
+void devm_kfree(struct device *dev, const void *p)
 {
 	int rc;
 
-	rc = devres_destroy(dev, devm_kmalloc_release, devm_kmalloc_match, p);
+	rc = devres_destroy(dev, devm_kmalloc_release,
+			    devm_kmalloc_match, (void *)p);
 	WARN_ON(rc);
 }
 EXPORT_SYMBOL_GPL(devm_kfree);
diff --git a/include/linux/device.h b/include/linux/device.h
index 767cf1938b14..c2022c1daef6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -694,7 +694,7 @@ static inline void *devm_kcalloc(struct device *dev,
 {
 	return devm_kmalloc_array(dev, n, size, flags | __GFP_ZERO);
 }
-extern void devm_kfree(struct device *dev, void *p);
+extern void devm_kfree(struct device *dev, const void *p);
 extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc;
 extern void *devm_kmemdup(struct device *dev, const void *src, size_t len,
 			  gfp_t gfp);
-- 
cgit v1.2.3


From 09d1ea1c7309c8ca91151778bb3efe514f2e03ed Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <brgl@bgdev.pl>
Date: Sun, 14 Oct 2018 17:20:09 +0200
Subject: devres: provide devm_kstrdup_const()

Provide a resource managed version of kstrdup_const(). This variant
internally calls devm_kstrdup() on pointers that are outside of
.rodata section and returns the string as is otherwise.

Make devm_kfree() check if the passed pointer doesn't point to .rodata
and if so - don't actually destroy the resource.

Signed-off-by: Bartosz Golaszewski <brgl@bgdev.pl>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Acked-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/devres.c  | 31 +++++++++++++++++++++++++++++++
 include/linux/device.h |  2 ++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 438c91a43508..4aaf00d2098b 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -11,6 +11,8 @@
 #include <linux/slab.h>
 #include <linux/percpu.h>
 
+#include <asm/sections.h>
+
 #include "base.h"
 
 struct devres_node {
@@ -822,6 +824,28 @@ char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp)
 }
 EXPORT_SYMBOL_GPL(devm_kstrdup);
 
+/**
+ * devm_kstrdup_const - resource managed conditional string duplication
+ * @dev: device for which to duplicate the string
+ * @s: the string to duplicate
+ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+ *
+ * Strings allocated by devm_kstrdup_const will be automatically freed when
+ * the associated device is detached.
+ *
+ * RETURNS:
+ * Source string if it is in .rodata section otherwise it falls back to
+ * devm_kstrdup.
+ */
+const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp)
+{
+	if (is_kernel_rodata((unsigned long)s))
+		return s;
+
+	return devm_kstrdup(dev, s, gfp);
+}
+EXPORT_SYMBOL_GPL(devm_kstrdup_const);
+
 /**
  * devm_kvasprintf - Allocate resource managed space and format a string
  *		     into that.
@@ -889,6 +913,13 @@ void devm_kfree(struct device *dev, const void *p)
 {
 	int rc;
 
+	/*
+	 * Special case: pointer to a string in .rodata returned by
+	 * devm_kstrdup_const().
+	 */
+	if (unlikely(is_kernel_rodata((unsigned long)p)))
+		return;
+
 	rc = devres_destroy(dev, devm_kmalloc_release,
 			    devm_kmalloc_match, (void *)p);
 	WARN_ON(rc);
diff --git a/include/linux/device.h b/include/linux/device.h
index c2022c1daef6..fecd9722400e 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -696,6 +696,8 @@ static inline void *devm_kcalloc(struct device *dev,
 }
 extern void devm_kfree(struct device *dev, const void *p);
 extern char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc;
+extern const char *devm_kstrdup_const(struct device *dev,
+				      const char *s, gfp_t gfp);
 extern void *devm_kmemdup(struct device *dev, const void *src, size_t len,
 			  gfp_t gfp);
 
-- 
cgit v1.2.3


From 891b7c5fbf619d2a314e424775b3c232eb227e90 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 16 Oct 2018 08:09:58 -0600
Subject: mtd_blkdevs: convert to blk-mq

Straight forward conversion, using an internal list to enable the
driver to pull requests at will.

Dynamically allocate the tag set to avoid having to pull in the
block headers for blktrans.h, since various mtd drivers use
block conflicting names for defines and functions.

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: linux-mtd@lists.infradead.org
Tested-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/mtd/mtd_blkdevs.c    | 100 +++++++++++++++++++++++++++----------------
 include/linux/mtd/blktrans.h |   5 +--
 2 files changed, 65 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6a41dfa3c36b..b0d44f9214b0 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -27,6 +27,7 @@
 #include <linux/mtd/blktrans.h>
 #include <linux/mtd/mtd.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/blkpg.h>
 #include <linux/spinlock.h>
 #include <linux/hdreg.h>
@@ -45,6 +46,8 @@ static void blktrans_dev_release(struct kref *kref)
 
 	dev->disk->private_data = NULL;
 	blk_cleanup_queue(dev->rq);
+	blk_mq_free_tag_set(dev->tag_set);
+	kfree(dev->tag_set);
 	put_disk(dev->disk);
 	list_del(&dev->list);
 	kfree(dev);
@@ -134,28 +137,39 @@ int mtd_blktrans_cease_background(struct mtd_blktrans_dev *dev)
 }
 EXPORT_SYMBOL_GPL(mtd_blktrans_cease_background);
 
-static void mtd_blktrans_work(struct work_struct *work)
+static struct request *mtd_next_request(struct mtd_blktrans_dev *dev)
+{
+	struct request *rq;
+
+	rq = list_first_entry_or_null(&dev->rq_list, struct request, queuelist);
+	if (rq) {
+		list_del_init(&rq->queuelist);
+		blk_mq_start_request(rq);
+		return rq;
+	}
+
+	return NULL;
+}
+
+static void mtd_blktrans_work(struct mtd_blktrans_dev *dev)
+	__releases(&dev->queue_lock)
+	__acquires(&dev->queue_lock)
 {
-	struct mtd_blktrans_dev *dev =
-		container_of(work, struct mtd_blktrans_dev, work);
 	struct mtd_blktrans_ops *tr = dev->tr;
-	struct request_queue *rq = dev->rq;
 	struct request *req = NULL;
 	int background_done = 0;
 
-	spin_lock_irq(rq->queue_lock);
-
 	while (1) {
 		blk_status_t res;
 
 		dev->bg_stop = false;
-		if (!req && !(req = blk_fetch_request(rq))) {
+		if (!req && !(req = mtd_next_request(dev))) {
 			if (tr->background && !background_done) {
-				spin_unlock_irq(rq->queue_lock);
+				spin_unlock_irq(&dev->queue_lock);
 				mutex_lock(&dev->lock);
 				tr->background(dev);
 				mutex_unlock(&dev->lock);
-				spin_lock_irq(rq->queue_lock);
+				spin_lock_irq(&dev->queue_lock);
 				/*
 				 * Do background processing just once per idle
 				 * period.
@@ -166,35 +180,39 @@ static void mtd_blktrans_work(struct work_struct *work)
 			break;
 		}
 
-		spin_unlock_irq(rq->queue_lock);
+		spin_unlock_irq(&dev->queue_lock);
 
 		mutex_lock(&dev->lock);
 		res = do_blktrans_request(dev->tr, dev, req);
 		mutex_unlock(&dev->lock);
 
-		spin_lock_irq(rq->queue_lock);
-
-		if (!__blk_end_request_cur(req, res))
+		if (!blk_update_request(req, res, blk_rq_cur_bytes(req))) {
+			__blk_mq_end_request(req, res);
 			req = NULL;
+		}
 
 		background_done = 0;
+		spin_lock_irq(&dev->queue_lock);
 	}
-
-	spin_unlock_irq(rq->queue_lock);
 }
 
-static void mtd_blktrans_request(struct request_queue *rq)
+static blk_status_t mtd_queue_rq(struct blk_mq_hw_ctx *hctx,
+				 const struct blk_mq_queue_data *bd)
 {
 	struct mtd_blktrans_dev *dev;
-	struct request *req = NULL;
 
-	dev = rq->queuedata;
+	dev = hctx->queue->queuedata;
+	if (!dev) {
+		blk_mq_start_request(bd->rq);
+		return BLK_STS_IOERR;
+	}
+
+	spin_lock_irq(&dev->queue_lock);
+	list_add_tail(&bd->rq->queuelist, &dev->rq_list);
+	mtd_blktrans_work(dev);
+	spin_unlock_irq(&dev->queue_lock);
 
-	if (!dev)
-		while ((req = blk_fetch_request(rq)) != NULL)
-			__blk_end_request_all(req, BLK_STS_IOERR);
-	else
-		queue_work(dev->wq, &dev->work);
+	return BLK_STS_OK;
 }
 
 static int blktrans_open(struct block_device *bdev, fmode_t mode)
@@ -329,6 +347,10 @@ static const struct block_device_operations mtd_block_ops = {
 	.getgeo		= blktrans_getgeo,
 };
 
+static const struct blk_mq_ops mtd_mq_ops = {
+	.queue_rq	= mtd_queue_rq,
+};
+
 int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 {
 	struct mtd_blktrans_ops *tr = new->tr;
@@ -416,11 +438,20 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 
 	/* Create the request queue */
 	spin_lock_init(&new->queue_lock);
-	new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock);
+	INIT_LIST_HEAD(&new->rq_list);
 
-	if (!new->rq)
+	new->tag_set = kzalloc(sizeof(*new->tag_set), GFP_KERNEL);
+	if (!new->tag_set)
 		goto error3;
 
+	new->rq = blk_mq_init_sq_queue(new->tag_set, &mtd_mq_ops, 2,
+				BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
+	if (IS_ERR(new->rq)) {
+		ret = PTR_ERR(new->rq);
+		new->rq = NULL;
+		goto error4;
+	}
+
 	if (tr->flush)
 		blk_queue_write_cache(new->rq, true, false);
 
@@ -437,13 +468,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 
 	gd->queue = new->rq;
 
-	/* Create processing workqueue */
-	new->wq = alloc_workqueue("%s%d", 0, 0,
-				  tr->name, new->mtd->index);
-	if (!new->wq)
-		goto error4;
-	INIT_WORK(&new->work, mtd_blktrans_work);
-
 	if (new->readonly)
 		set_disk_ro(gd, 1);
 
@@ -456,7 +480,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	}
 	return 0;
 error4:
-	blk_cleanup_queue(new->rq);
+	kfree(new->tag_set);
 error3:
 	put_disk(new->disk);
 error2:
@@ -481,15 +505,17 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 	/* Stop new requests to arrive */
 	del_gendisk(old->disk);
 
-	/* Stop workqueue. This will perform any pending request. */
-	destroy_workqueue(old->wq);
-
 	/* Kill current requests */
 	spin_lock_irqsave(&old->queue_lock, flags);
 	old->rq->queuedata = NULL;
-	blk_start_queue(old->rq);
 	spin_unlock_irqrestore(&old->queue_lock, flags);
 
+	/* freeze+quiesce queue to ensure all requests are flushed */
+	blk_mq_freeze_queue(old->rq);
+	blk_mq_quiesce_queue(old->rq);
+	blk_mq_unquiesce_queue(old->rq);
+	blk_mq_unfreeze_queue(old->rq);
+
 	/* If the device is currently open, tell trans driver to close it,
 		then put mtd device, and don't touch it again */
 	mutex_lock(&old->lock);
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index e93837f647de..1d3ade69d39a 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -23,7 +23,6 @@
 #include <linux/mutex.h>
 #include <linux/kref.h>
 #include <linux/sysfs.h>
-#include <linux/workqueue.h>
 
 struct hd_geometry;
 struct mtd_info;
@@ -44,9 +43,9 @@ struct mtd_blktrans_dev {
 	struct kref ref;
 	struct gendisk *disk;
 	struct attribute_group *disk_attributes;
-	struct workqueue_struct *wq;
-	struct work_struct work;
 	struct request_queue *rq;
+	struct list_head rq_list;
+	struct blk_mq_tag_set *tag_set;
 	spinlock_t queue_lock;
 	void *priv;
 	fmode_t file_mode;
-- 
cgit v1.2.3


From c87228f16f0acdf242dee62f139013212208473f Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Thu, 11 Oct 2018 12:20:45 -0700
Subject: amiflop: fold headers into C file

amifd.h and amifdreg.h are only used from amiflop.c, and they're pretty
small, so move the contents to amiflop.c and get rid of the .h files.
This is preparation for adding a struct blk_mq_tag_set to struct
amiga_floppy_struct.

Signed-off-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/amiflop.c  | 123 +++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/amifd.h    |  63 ------------------------
 include/linux/amifdreg.h |  82 -------------------------------
 3 files changed, 120 insertions(+), 148 deletions(-)
 delete mode 100644 include/linux/amifd.h
 delete mode 100644 include/linux/amifdreg.h

(limited to 'include/linux')

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 3aaf6af3ec23..a7d6e6a9b12f 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -61,10 +61,8 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
-#include <linux/amifdreg.h>
-#include <linux/amifd.h>
 #include <linux/fs.h>
-#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/elevator.h>
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
@@ -86,6 +84,125 @@
  *  Defines
  */
 
+/*
+ * CIAAPRA bits (read only)
+ */
+
+#define DSKRDY      (0x1<<5)        /* disk ready when low */
+#define DSKTRACK0   (0x1<<4)        /* head at track zero when low */
+#define DSKPROT     (0x1<<3)        /* disk protected when low */
+#define DSKCHANGE   (0x1<<2)        /* low when disk removed */
+
+/*
+ * CIAAPRB bits (read/write)
+ */
+
+#define DSKMOTOR    (0x1<<7)        /* motor on when low */
+#define DSKSEL3     (0x1<<6)        /* select drive 3 when low */
+#define DSKSEL2     (0x1<<5)        /* select drive 2 when low */
+#define DSKSEL1     (0x1<<4)        /* select drive 1 when low */
+#define DSKSEL0     (0x1<<3)        /* select drive 0 when low */
+#define DSKSIDE     (0x1<<2)        /* side selection: 0 = upper, 1 = lower */
+#define DSKDIREC    (0x1<<1)        /* step direction: 0=in, 1=out (to trk 0) */
+#define DSKSTEP     (0x1)           /* pulse low to step head 1 track */
+
+/*
+ * DSKBYTR bits (read only)
+ */
+
+#define DSKBYT      (1<<15)         /* register contains valid byte when set */
+#define DMAON       (1<<14)         /* disk DMA enabled */
+#define DISKWRITE   (1<<13)         /* disk write bit in DSKLEN enabled */
+#define WORDEQUAL   (1<<12)         /* DSKSYNC register match when true */
+/* bits 7-0 are data */
+
+/*
+ * ADKCON/ADKCONR bits
+ */
+
+#ifndef SETCLR
+#define ADK_SETCLR      (1<<15)     /* control bit */
+#endif
+#define ADK_PRECOMP1    (1<<14)     /* precompensation selection */
+#define ADK_PRECOMP0    (1<<13)     /* 00=none, 01=140ns, 10=280ns, 11=500ns */
+#define ADK_MFMPREC     (1<<12)     /* 0=GCR precomp., 1=MFM precomp. */
+#define ADK_WORDSYNC    (1<<10)     /* enable DSKSYNC auto DMA */
+#define ADK_MSBSYNC     (1<<9)      /* when 1, enable sync on MSbit (for GCR) */
+#define ADK_FAST        (1<<8)      /* bit cell: 0=2us (GCR), 1=1us (MFM) */
+
+/*
+ * DSKLEN bits
+ */
+
+#define DSKLEN_DMAEN    (1<<15)
+#define DSKLEN_WRITE    (1<<14)
+
+/*
+ * INTENA/INTREQ bits
+ */
+
+#define DSKINDEX    (0x1<<4)        /* DSKINDEX bit */
+
+/*
+ * Misc
+ */
+
+#define MFM_SYNC    0x4489          /* standard MFM sync value */
+
+/* Values for FD_COMMAND */
+#define FD_RECALIBRATE		0x07	/* move to track 0 */
+#define FD_SEEK			0x0F	/* seek track */
+#define FD_READ			0xE6	/* read with MT, MFM, SKip deleted */
+#define FD_WRITE		0xC5	/* write with MT, MFM */
+#define FD_SENSEI		0x08	/* Sense Interrupt Status */
+#define FD_SPECIFY		0x03	/* specify HUT etc */
+#define FD_FORMAT		0x4D	/* format one track */
+#define FD_VERSION		0x10	/* get version code */
+#define FD_CONFIGURE		0x13	/* configure FIFO operation */
+#define FD_PERPENDICULAR	0x12	/* perpendicular r/w mode */
+
+#define FD_MAX_UNITS    4	/* Max. Number of drives */
+#define FLOPPY_MAX_SECTORS	22	/* Max. Number of sectors per track */
+
+struct fd_data_type {
+	char *name;		/* description of data type */
+	int sects;		/* sectors per track */
+	int (*read_fkt)(int);	/* read whole track */
+	void (*write_fkt)(int);	/* write whole track */
+};
+
+struct fd_drive_type {
+	unsigned long code;		/* code returned from drive */
+	char *name;			/* description of drive */
+	unsigned int tracks;	/* number of tracks */
+	unsigned int heads;		/* number of heads */
+	unsigned int read_size;	/* raw read size for one track */
+	unsigned int write_size;	/* raw write size for one track */
+	unsigned int sect_mult;	/* sectors and gap multiplier (HD = 2) */
+	unsigned int precomp1;	/* start track for precomp 1 */
+	unsigned int precomp2;	/* start track for precomp 2 */
+	unsigned int step_delay;	/* time (in ms) for delay after step */
+	unsigned int settle_time;	/* time to settle after dir change */
+	unsigned int side_time;	/* time needed to change sides */
+};
+
+struct amiga_floppy_struct {
+	struct fd_drive_type *type;	/* type of floppy for this unit */
+	struct fd_data_type *dtype;	/* type of floppy for this unit */
+	int track;			/* current track (-1 == unknown) */
+	unsigned char *trackbuf;	/* current track (kmaloc()'d */
+
+	int blocks;			/* total # blocks on disk */
+
+	int changed;			/* true when not known */
+	int disk;			/* disk in drive (-1 == unknown) */
+	int motor;			/* true when motor is at speed */
+	int busy;			/* true when drive is active */
+	int dirty;			/* true when trackbuf is not on disk */
+	int status;			/* current error code for unit */
+	struct gendisk *gendisk;
+};
+
 /*
  *  Error codes
  */
diff --git a/include/linux/amifd.h b/include/linux/amifd.h
deleted file mode 100644
index 202a77dbe46d..000000000000
--- a/include/linux/amifd.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _AMIFD_H
-#define _AMIFD_H
-
-/* Definitions for the Amiga floppy driver */
-
-#include <linux/fd.h>
-
-#define FD_MAX_UNITS    4	/* Max. Number of drives */
-#define FLOPPY_MAX_SECTORS	22	/* Max. Number of sectors per track */
-
-#ifndef ASSEMBLER
-
-struct fd_data_type {
-    char *name;			/* description of data type */
-    int sects;			/* sectors per track */
-#ifdef __STDC__
-    int (*read_fkt)(int);
-    void (*write_fkt)(int);
-#else
-    int (*read_fkt)();		/* read whole track */
-    void (*write_fkt)();		/* write whole track */
-#endif
-};
-
-/*
-** Floppy type descriptions
-*/
-
-struct fd_drive_type {
-    unsigned long code;		/* code returned from drive */
-    char *name;			/* description of drive */
-    unsigned int tracks;	/* number of tracks */
-    unsigned int heads;		/* number of heads */
-    unsigned int read_size;	/* raw read size for one track */
-    unsigned int write_size;	/* raw write size for one track */
-    unsigned int sect_mult;	/* sectors and gap multiplier (HD = 2) */
-    unsigned int precomp1;	/* start track for precomp 1 */
-    unsigned int precomp2;	/* start track for precomp 2 */
-    unsigned int step_delay;	/* time (in ms) for delay after step */
-    unsigned int settle_time;	/* time to settle after dir change */
-    unsigned int side_time;	/* time needed to change sides */
-};
-
-struct amiga_floppy_struct {
-    struct fd_drive_type *type;	/* type of floppy for this unit */
-    struct fd_data_type *dtype;	/* type of floppy for this unit */
-    int track;			/* current track (-1 == unknown) */
-    unsigned char *trackbuf;    /* current track (kmaloc()'d */
-
-    int blocks;			/* total # blocks on disk */
-
-    int changed;		/* true when not known */
-    int disk;			/* disk in drive (-1 == unknown) */
-    int motor;			/* true when motor is at speed */
-    int busy;			/* true when drive is active */
-    int dirty;			/* true when trackbuf is not on disk */
-    int status;			/* current error code for unit */
-    struct gendisk *gendisk;
-};
-#endif
-
-#endif
diff --git a/include/linux/amifdreg.h b/include/linux/amifdreg.h
deleted file mode 100644
index 9b514d05ec70..000000000000
--- a/include/linux/amifdreg.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_AMIFDREG_H
-#define _LINUX_AMIFDREG_H
-
-/*
-** CIAAPRA bits (read only)
-*/
-
-#define DSKRDY      (0x1<<5)        /* disk ready when low */
-#define DSKTRACK0   (0x1<<4)        /* head at track zero when low */
-#define DSKPROT     (0x1<<3)        /* disk protected when low */
-#define DSKCHANGE   (0x1<<2)        /* low when disk removed */
-
-/*
-** CIAAPRB bits (read/write)
-*/
-
-#define DSKMOTOR    (0x1<<7)        /* motor on when low */
-#define DSKSEL3     (0x1<<6)        /* select drive 3 when low */
-#define DSKSEL2     (0x1<<5)        /* select drive 2 when low */
-#define DSKSEL1     (0x1<<4)        /* select drive 1 when low */
-#define DSKSEL0     (0x1<<3)        /* select drive 0 when low */
-#define DSKSIDE     (0x1<<2)        /* side selection: 0 = upper, 1 = lower */
-#define DSKDIREC    (0x1<<1)        /* step direction: 0=in, 1=out (to trk 0) */
-#define DSKSTEP     (0x1)           /* pulse low to step head 1 track */
-
-/*
-** DSKBYTR bits (read only)
-*/
-
-#define DSKBYT      (1<<15)         /* register contains valid byte when set */
-#define DMAON       (1<<14)         /* disk DMA enabled */
-#define DISKWRITE   (1<<13)         /* disk write bit in DSKLEN enabled */
-#define WORDEQUAL   (1<<12)         /* DSKSYNC register match when true */
-/* bits 7-0 are data */
-
-/*
-** ADKCON/ADKCONR bits
-*/
-
-#ifndef SETCLR
-#define ADK_SETCLR      (1<<15)     /* control bit */
-#endif
-#define ADK_PRECOMP1    (1<<14)     /* precompensation selection */
-#define ADK_PRECOMP0    (1<<13)     /* 00=none, 01=140ns, 10=280ns, 11=500ns */
-#define ADK_MFMPREC     (1<<12)     /* 0=GCR precomp., 1=MFM precomp. */
-#define ADK_WORDSYNC    (1<<10)     /* enable DSKSYNC auto DMA */
-#define ADK_MSBSYNC     (1<<9)      /* when 1, enable sync on MSbit (for GCR) */
-#define ADK_FAST        (1<<8)      /* bit cell: 0=2us (GCR), 1=1us (MFM) */
- 
-/*
-** DSKLEN bits
-*/
-
-#define DSKLEN_DMAEN    (1<<15)
-#define DSKLEN_WRITE    (1<<14)
-
-/*
-** INTENA/INTREQ bits
-*/
-
-#define DSKINDEX    (0x1<<4)        /* DSKINDEX bit */
-
-/*
-** Misc
-*/
- 
-#define MFM_SYNC    0x4489          /* standard MFM sync value */
-
-/* Values for FD_COMMAND */
-#define FD_RECALIBRATE		0x07	/* move to track 0 */
-#define FD_SEEK			0x0F	/* seek track */
-#define FD_READ			0xE6	/* read with MT, MFM, SKip deleted */
-#define FD_WRITE		0xC5	/* write with MT, MFM */
-#define FD_SENSEI		0x08	/* Sense Interrupt Status */
-#define FD_SPECIFY		0x03	/* specify HUT etc */
-#define FD_FORMAT		0x4D	/* format one track */
-#define FD_VERSION		0x10	/* get version code */
-#define FD_CONFIGURE		0x13	/* configure FIFO operation */
-#define FD_PERPENDICULAR	0x12	/* perpendicular r/w mode */
-
-#endif /* _LINUX_AMIFDREG_H */
-- 
cgit v1.2.3


From 4354994f097d068a894aa1a0860da54571df3582 Mon Sep 17 00:00:00 2001
From: Daniel Rosenberg <drosen@google.com>
Date: Mon, 20 Aug 2018 19:21:43 -0700
Subject: f2fs: checkpoint disabling

Note that, it requires "f2fs: return correct errno in f2fs_gc".

This adds a lightweight non-persistent snapshotting scheme to f2fs.

To use, mount with the option checkpoint=disable, and to return to
normal operation, remount with checkpoint=enable. If the filesystem
is shut down before remounting with checkpoint=enable, it will revert
back to its apparent state when it was first mounted with
checkpoint=disable. This is useful for situations where you wish to be
able to roll back the state of the disk in case of some critical
failure.

Signed-off-by: Daniel Rosenberg <drosen@google.com>
[Jaegeuk Kim: use SB_RDONLY instead of MS_RDONLY]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.txt |   5 ++
 fs/f2fs/checkpoint.c               |  12 ++++
 fs/f2fs/data.c                     |  14 ++++-
 fs/f2fs/debug.c                    |   3 +-
 fs/f2fs/f2fs.h                     |  18 +++++-
 fs/f2fs/file.c                     |  12 +++-
 fs/f2fs/gc.c                       |   9 ++-
 fs/f2fs/inode.c                    |   6 +-
 fs/f2fs/namei.c                    |  19 ++++++
 fs/f2fs/segment.c                  |  98 +++++++++++++++++++++++++++--
 fs/f2fs/segment.h                  |  15 +++++
 fs/f2fs/super.c                    | 126 ++++++++++++++++++++++++++++++++++++-
 include/linux/f2fs_fs.h            |   1 +
 13 files changed, 324 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
index bde3e91e5372..e46c2147ddf8 100644
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -212,6 +212,11 @@ fsync_mode=%s          Control the policy of fsync. Currently supports "posix",
                        non-atomic files likewise "nobarrier" mount option.
 test_dummy_encryption  Enable dummy encryption, which provides a fake fscrypt
                        context. The fake fscrypt context is used by xfstests.
+checkpoint=%s          Set to "disable" to turn off checkpointing. Set to "enable"
+                       to reenable checkpointing. Is enabled by default. While
+                       disabled, any unmounting or unexpected shutdowns will cause
+                       the filesystem contents to appear as they did when the
+                       filesystem was mounted with that option.
 
 ================================================================================
 DEBUGFS ENTRIES
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 97b429ba2911..eb6ac79640f8 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1210,6 +1210,11 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
 		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);
 
+	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED))
+		__set_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+	else
+		__clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
+
 	/* set this flag to activate crc|cp_ver for recovery */
 	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
 	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1417,6 +1422,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	clear_sbi_flag(sbi, SBI_IS_DIRTY);
 	clear_sbi_flag(sbi, SBI_NEED_CP);
+	sbi->unusable_block_count = 0;
 	__set_cp_next_pack(sbi);
 
 	/*
@@ -1441,6 +1447,12 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	unsigned long long ckpt_ver;
 	int err = 0;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		if (cpc->reason != CP_PAUSE)
+			return 0;
+		f2fs_msg(sbi->sb, KERN_WARNING,
+				"Start checkpoint disabled!");
+	}
 	mutex_lock(&sbi->cp_mutex);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 873f9ea7769f..9ef6f1f01eda 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -537,7 +537,8 @@ skip:
 	if (fio->in_list)
 		goto next;
 out:
-	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
+	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
+				f2fs_is_checkpoint_ready(sbi))
 		__submit_merged_bio(io);
 	up_write(&io->io_rwsem);
 }
@@ -1703,6 +1704,10 @@ static inline bool check_inplace_update_policy(struct inode *inode,
 			is_inode_flag_set(inode, FI_NEED_IPU))
 		return true;
 
+	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+		return true;
+
 	return false;
 }
 
@@ -1733,6 +1738,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 			return true;
 		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
 			return true;
+		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
+			return true;
 	}
 	return false;
 }
@@ -2353,6 +2361,10 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 
 	trace_f2fs_write_begin(inode, pos, len, flags);
 
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		goto fail;
+
 	if ((f2fs_is_atomic_file(inode) &&
 			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
 			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 75bc62edc4c1..026e10f30889 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -272,7 +272,8 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "\n=====[ partition info(%pg). #%d, %s, CP: %s]=====\n",
 			si->sbi->sb->s_bdev, i++,
 			f2fs_readonly(si->sbi->sb) ? "RO": "RW",
-			f2fs_cp_error(si->sbi) ? "Error": "Good");
+			is_set_ckpt_flags(si->sbi, CP_DISABLED_FLAG) ?
+			"Disabled": (f2fs_cp_error(si->sbi) ? "Error": "Good"));
 		seq_printf(s, "[SB: 1] [CP: 2] [SIT: %d] [NAT: %d] ",
 			   si->sit_area_segs, si->nat_area_segs);
 		seq_printf(s, "[SSA: %d] [MAIN: %d",
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 95d9edd8ff6e..ff540f523a3b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -99,6 +99,7 @@ extern char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_QUOTA		0x00400000
 #define F2FS_MOUNT_INLINE_XATTR_SIZE	0x00800000
 #define F2FS_MOUNT_RESERVE_ROOT		0x01000000
+#define F2FS_MOUNT_DISABLE_CHECKPOINT	0x02000000
 
 #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
 #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -178,6 +179,7 @@ enum {
 #define	CP_RECOVERY	0x00000008
 #define	CP_DISCARD	0x00000010
 #define CP_TRIMMED	0x00000020
+#define CP_PAUSE	0x00000040
 
 #define MAX_DISCARD_BLOCKS(sbi)		BLKS_PER_SEC(sbi)
 #define DEF_MAX_DISCARD_REQUEST		8	/* issue 8 discards per round */
@@ -187,6 +189,7 @@ enum {
 #define DEF_DISCARD_URGENT_UTIL		80	/* do more discard over 80% */
 #define DEF_CP_INTERVAL			60	/* 60 secs */
 #define DEF_IDLE_INTERVAL		5	/* 5 secs */
+#define DEF_DISABLE_INTERVAL		5	/* 5 secs */
 
 struct cp_control {
 	int reason;
@@ -1092,6 +1095,7 @@ enum {
 	SBI_NEED_CP,				/* need to checkpoint */
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
 	SBI_IS_RECOVERED,			/* recovered orphan/data */
+	SBI_CP_DISABLED,			/* CP was disabled last mount */
 };
 
 enum {
@@ -1099,6 +1103,7 @@ enum {
 	REQ_TIME,
 	DISCARD_TIME,
 	GC_TIME,
+	DISABLE_TIME,
 	MAX_TIME,
 };
 
@@ -1225,6 +1230,9 @@ struct f2fs_sb_info {
 	block_t reserved_blocks;		/* configurable reserved blocks */
 	block_t current_reserved_blocks;	/* current reserved blocks */
 
+	/* Additional tracking for no checkpoint mode */
+	block_t unusable_block_count;		/* # of blocks saved by last cp */
+
 	unsigned int nquota_files;		/* # of quota sysfile */
 
 	u32 s_next_generation;			/* for NFS support */
@@ -1735,7 +1743,8 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, true))
 		avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks;
-
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		avail_user_block_count -= sbi->unusable_block_count;
 	if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) {
 		diff = sbi->total_valid_block_count - avail_user_block_count;
 		if (diff > *count)
@@ -1942,6 +1951,8 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 
 	if (!__allow_reserved_blocks(sbi, inode, false))
 		valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		valid_block_count += sbi->unusable_block_count;
 
 	if (unlikely(valid_block_count > sbi->user_block_count)) {
 		spin_unlock(&sbi->stat_lock);
@@ -2945,6 +2956,8 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
 bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
 					struct cp_control *cpc);
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi);
 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
@@ -3532,6 +3545,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
 	if (test_opt(sbi, LFS) && (rw == WRITE) &&
 				block_unaligned_IO(inode, iocb, iter))
 		return true;
+	if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
+		return true;
+
 	return false;
 }
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index e29715ea736f..b1aaa73e1eeb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -210,7 +210,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 	};
 	unsigned int seq_id = 0;
 
-	if (unlikely(f2fs_readonly(inode->i_sb)))
+	if (unlikely(f2fs_readonly(inode->i_sb) ||
+				is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		return 0;
 
 	trace_f2fs_sync_file_enter(inode);
@@ -2157,6 +2158,12 @@ static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		f2fs_msg(sbi->sb, KERN_INFO,
+			"Skipping Checkpoint. Checkpoints currently disabled.");
+		return -EINVAL;
+	}
+
 	ret = mnt_want_write_file(filp);
 	if (ret)
 		return ret;
@@ -2528,6 +2535,9 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return -EINVAL;
+
 	if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg,
 							sizeof(range)))
 		return -EFAULT;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 99ed8a5d9249..78288c54b68c 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -370,6 +370,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 
 		if (sec_usage_check(sbi, secno))
 			goto next;
+		/* Don't touch checkpointed data */
+		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
+					get_ckpt_valid_blocks(sbi, segno)))
+			goto next;
 		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
 			goto next;
 
@@ -1189,7 +1193,8 @@ gc_more:
 		 * threshold, we can make them free by checkpoint. Then, we
 		 * secure free segments which doesn't need fggc any more.
 		 */
-		if (prefree_segments(sbi)) {
+		if (prefree_segments(sbi) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 			ret = f2fs_write_checkpoint(sbi, &cpc);
 			if (ret)
 				goto stop;
@@ -1241,7 +1246,7 @@ gc_more:
 			segno = NULL_SEGNO;
 			goto gc_more;
 		}
-		if (gc_type == FG_GC)
+		if (gc_type == FG_GC && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))
 			ret = f2fs_write_checkpoint(sbi, &cpc);
 	}
 stop:
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 86e7333d60c1..4ee9d6c4b719 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -607,6 +607,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	if (!is_inode_flag_set(inode, FI_DIRTY_INODE))
 		return 0;
 
+	if (f2fs_is_checkpoint_ready(sbi))
+		return -ENOSPC;
+
 	/*
 	 * We need to balance fs here to prevent from producing dirty node pages
 	 * during the urgent cleaning time when runing out of free sections.
@@ -688,7 +691,8 @@ no_delete:
 	stat_dec_inline_dir(inode);
 	stat_dec_inline_inode(inode);
 
-	if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG)))
+	if (likely(!is_set_ckpt_flags(sbi, CP_ERROR_FLAG) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		f2fs_bug_on(sbi, is_inode_flag_set(inode, FI_DIRTY_INODE));
 	else
 		f2fs_inode_synced(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index ded185baa9ae..a14632744a6a 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -16,6 +16,7 @@
 
 #include "f2fs.h"
 #include "node.h"
+#include "segment.h"
 #include "xattr.h"
 #include "acl.h"
 #include <trace/events/f2fs.h>
@@ -269,6 +270,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = dquot_initialize(dir);
 	if (err)
@@ -315,6 +319,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = fscrypt_prepare_link(old_dentry, dir, dentry);
 	if (err)
@@ -561,6 +568,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
 				      &disk_link);
@@ -690,6 +700,9 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	err = dquot_initialize(dir);
 	if (err)
@@ -824,6 +837,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	if (is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
 			(!projid_eq(F2FS_I(new_dir)->i_projid,
@@ -1014,6 +1030,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return -EIO;
+	err = f2fs_is_checkpoint_ready(sbi);
+	if (err)
+		return err;
 
 	if ((is_inode_flag_set(new_dir, FI_PROJ_INHERIT) &&
 			!projid_eq(F2FS_I(new_dir)->i_projid,
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index e16dae0f0a5b..195dc8142bff 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -176,6 +176,8 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
 		return false;
 	if (sbi->gc_mode == GC_URGENT)
 		return true;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return true;
 
 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
@@ -480,6 +482,9 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
 	if (need && excess_cached_nats(sbi))
 		f2fs_balance_fs_bg(sbi);
 
+	if (f2fs_is_checkpoint_ready(sbi))
+		return;
+
 	/*
 	 * We should do GC or end up with checkpoint, if there are so many dirty
 	 * dir/node pages without enough free segments.
@@ -796,7 +801,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
-	unsigned short valid_blocks;
+	unsigned short valid_blocks, ckpt_valid_blocks;
 
 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
 		return;
@@ -804,8 +809,10 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_lock(&dirty_i->seglist_lock);
 
 	valid_blocks = get_valid_blocks(sbi, segno, false);
+	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
 
-	if (valid_blocks == 0) {
+	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
+				ckpt_valid_blocks == sbi->blocks_per_seg)) {
 		__locate_dirty_segment(sbi, segno, PRE);
 		__remove_dirty_segment(sbi, segno, DIRTY);
 	} else if (valid_blocks < sbi->blocks_per_seg) {
@@ -818,6 +825,66 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 	mutex_unlock(&dirty_i->seglist_lock);
 }
 
+/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
+void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (get_valid_blocks(sbi, segno, false))
+			continue;
+		if (IS_CURSEG(sbi, segno))
+			continue;
+		__locate_dirty_segment(sbi, segno, PRE);
+		__remove_dirty_segment(sbi, segno, DIRTY);
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+}
+
+int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+	block_t holes[2] = {0, 0};	/* DATA and NODE */
+	struct seg_entry *se;
+	unsigned int segno;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		se = get_seg_entry(sbi, segno);
+		if (IS_NODESEG(se->type))
+			holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+		else
+			holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+
+	if (holes[DATA] > ovp || holes[NODE] > ovp)
+		return -EAGAIN;
+	return 0;
+}
+
+/* This is only used by SBI_CP_DISABLED */
+static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
+{
+	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+	unsigned int segno = 0;
+
+	mutex_lock(&dirty_i->seglist_lock);
+	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
+		if (get_valid_blocks(sbi, segno, false))
+			continue;
+		if (get_ckpt_valid_blocks(sbi, segno))
+			continue;
+		mutex_unlock(&dirty_i->seglist_lock);
+		return segno;
+	}
+	mutex_unlock(&dirty_i->seglist_lock);
+	return NULL_SEGNO;
+}
+
 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t lstart,
 		block_t start, block_t len)
@@ -2028,7 +2095,8 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			sbi->discard_blks--;
 
 		/* don't overwrite by SSR to keep node chain */
-		if (IS_NODESEG(se->type)) {
+		if (IS_NODESEG(se->type) &&
+				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
 			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
 				se->ckpt_valid_blocks++;
 		}
@@ -2050,6 +2118,15 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
 			f2fs_bug_on(sbi, 1);
 			se->valid_blocks++;
 			del = 0;
+		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+			/*
+			 * If checkpoints are off, we must not reuse data that
+			 * was used in the previous checkpoint. If it was used
+			 * before, we must track that to know how much space we
+			 * really have.
+			 */
+			if (f2fs_test_bit(offset, se->ckpt_valid_map))
+				sbi->unusable_block_count++;
 		}
 
 		if (f2fs_test_and_clear_bit(offset, se->discard_map))
@@ -2332,6 +2409,9 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
 	if (sbi->segs_per_sec != 1)
 		return CURSEG_I(sbi, type)->segno;
 
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
+
 	if (test_opt(sbi, NOHEAP) &&
 		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
 		return 0;
@@ -2476,6 +2556,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
 			return 1;
 		}
 	}
+
+	/* find valid_blocks=0 in dirty list */
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+		segno = get_free_segment(sbi);
+		if (segno != NULL_SEGNO) {
+			curseg->next_segno = segno;
+			return 1;
+		}
+	}
 	return 0;
 }
 
@@ -2493,7 +2582,8 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
 					type == CURSEG_WARM_NODE)
 		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
+	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
 		new_curseg(sbi, type, false);
 	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
 		change_curseg(sbi, type);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 086150028c6d..ab3465faddf1 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -339,6 +339,12 @@ static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi,
 		return get_seg_entry(sbi, segno)->valid_blocks;
 }
 
+static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
+				unsigned int segno)
+{
+	return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+}
+
 static inline void seg_info_from_raw_sit(struct seg_entry *se,
 					struct f2fs_sit_entry *rs)
 {
@@ -576,6 +582,15 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
 		reserved_sections(sbi) + needed);
 }
 
+static inline int f2fs_is_checkpoint_ready(struct f2fs_sb_info *sbi)
+{
+	if (likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
+	if (likely(!has_not_enough_free_secs(sbi, 0, 0)))
+		return 0;
+	return -ENOSPC;
+}
+
 static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi)
 {
 	return prefree_segments(sbi) > SM_I(sbi)->rec_prefree_segments;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a44913224e3b..19933d839008 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -136,6 +136,7 @@ enum {
 	Opt_alloc,
 	Opt_fsync,
 	Opt_test_dummy_encryption,
+	Opt_checkpoint,
 	Opt_err,
 };
 
@@ -194,6 +195,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_alloc, "alloc_mode=%s"},
 	{Opt_fsync, "fsync_mode=%s"},
 	{Opt_test_dummy_encryption, "test_dummy_encryption"},
+	{Opt_checkpoint, "checkpoint=%s"},
 	{Opt_err, NULL},
 };
 
@@ -769,6 +771,23 @@ static int parse_options(struct super_block *sb, char *options)
 					"Test dummy encryption mount option ignored");
 #endif
 			break;
+		case Opt_checkpoint:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+
+			if (strlen(name) == 6 &&
+					!strncmp(name, "enable", 6)) {
+				clear_opt(sbi, DISABLE_CHECKPOINT);
+			} else if (strlen(name) == 7 &&
+					!strncmp(name, "disable", 7)) {
+				set_opt(sbi, DISABLE_CHECKPOINT);
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 		default:
 			f2fs_msg(sb, KERN_ERR,
 				"Unrecognized mount option \"%s\" or missing value",
@@ -827,6 +846,12 @@ static int parse_options(struct super_block *sb, char *options)
 		}
 	}
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT) && test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"LFS not compatible with checkpoint=disable\n");
+		return -EINVAL;
+	}
+
 	/* Not pass down write hints if the number of active logs is lesser
 	 * than NR_CURSEG_TYPE.
 	 */
@@ -1014,8 +1039,8 @@ static void f2fs_put_super(struct super_block *sb)
 	 * But, the previous checkpoint was not done by umount, it needs to do
 	 * clean checkpoint again.
 	 */
-	if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
-			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+	if ((is_sbi_flag_set(sbi, SBI_IS_DIRTY) ||
+			!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT,
 		};
@@ -1087,6 +1112,8 @@ int f2fs_sync_fs(struct super_block *sb, int sync)
 
 	if (unlikely(f2fs_cp_error(sbi)))
 		return 0;
+	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return 0;
 
 	trace_f2fs_sync_fs(sb, sync);
 
@@ -1186,6 +1213,11 @@ static int f2fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	buf->f_blocks = total_count - start_count;
 	buf->f_bfree = user_block_count - valid_user_blocks(sbi) -
 						sbi->current_reserved_blocks;
+	if (unlikely(buf->f_bfree <= sbi->unusable_block_count))
+		buf->f_bfree = 0;
+	else
+		buf->f_bfree -= sbi->unusable_block_count;
+
 	if (buf->f_bfree > F2FS_OPTION(sbi).root_reserved_blocks)
 		buf->f_bavail = buf->f_bfree -
 				F2FS_OPTION(sbi).root_reserved_blocks;
@@ -1365,6 +1397,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
 		seq_printf(seq, ",alloc_mode=%s", "reuse");
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT))
+		seq_puts(seq, ",checkpoint=disable");
+
 	if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_POSIX)
 		seq_printf(seq, ",fsync_mode=%s", "posix");
 	else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_STRICT)
@@ -1392,6 +1427,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, INLINE_DENTRY);
 	set_opt(sbi, EXTENT_CACHE);
 	set_opt(sbi, NOHEAP);
+	clear_opt(sbi, DISABLE_CHECKPOINT);
 	sbi->sb->s_flags |= SB_LAZYTIME;
 	set_opt(sbi, FLUSH_MERGE);
 	set_opt(sbi, DISCARD);
@@ -1413,6 +1449,57 @@ static void default_options(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_QUOTA
 static int f2fs_enable_quotas(struct super_block *sb);
 #endif
+
+static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	struct cp_control cpc;
+	int err;
+
+	sbi->sb->s_flags |= SB_ACTIVE;
+
+	mutex_lock(&sbi->gc_mutex);
+	f2fs_update_time(sbi, DISABLE_TIME);
+
+	while (!f2fs_time_over(sbi, DISABLE_TIME)) {
+		err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+		if (err == -ENODATA)
+			break;
+		if (err && err != -EAGAIN) {
+			mutex_unlock(&sbi->gc_mutex);
+			return err;
+		}
+	}
+	mutex_unlock(&sbi->gc_mutex);
+
+	err = sync_filesystem(sbi->sb);
+	if (err)
+		return err;
+
+	if (f2fs_disable_cp_again(sbi))
+		return -EAGAIN;
+
+	mutex_lock(&sbi->gc_mutex);
+	cpc.reason = CP_PAUSE;
+	set_sbi_flag(sbi, SBI_CP_DISABLED);
+	f2fs_write_checkpoint(sbi, &cpc);
+
+	sbi->unusable_block_count = 0;
+	mutex_unlock(&sbi->gc_mutex);
+	return 0;
+}
+
+static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
+{
+	mutex_lock(&sbi->gc_mutex);
+	f2fs_dirty_to_prefree(sbi);
+
+	clear_sbi_flag(sbi, SBI_CP_DISABLED);
+	set_sbi_flag(sbi, SBI_IS_DIRTY);
+	mutex_unlock(&sbi->gc_mutex);
+
+	f2fs_sync_fs(sbi->sb, 1);
+}
+
 static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct f2fs_sb_info *sbi = F2FS_SB(sb);
@@ -1422,6 +1509,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_restart_gc = false;
 	bool need_stop_gc = false;
 	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+	bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
+	bool checkpoint_changed;
 #ifdef CONFIG_QUOTA
 	int i, j;
 #endif
@@ -1466,6 +1555,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	err = parse_options(sb, data);
 	if (err)
 		goto restore_opts;
+	checkpoint_changed =
+			disable_checkpoint != test_opt(sbi, DISABLE_CHECKPOINT);
 
 	/*
 	 * Previous and new state of filesystem is RO,
@@ -1479,7 +1570,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		err = dquot_suspend(sb, -1);
 		if (err < 0)
 			goto restore_opts;
-	} else if (f2fs_readonly(sb) && !(*flags & MS_RDONLY)) {
+	} else if (f2fs_readonly(sb) && !(*flags & SB_RDONLY)) {
 		/* dquot_resume needs RW */
 		sb->s_flags &= ~SB_RDONLY;
 		if (sb_any_quota_suspended(sb)) {
@@ -1499,6 +1590,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		goto restore_opts;
 	}
 
+	if ((*flags & SB_RDONLY) && test_opt(sbi, DISABLE_CHECKPOINT)) {
+		err = -EINVAL;
+		f2fs_msg(sbi->sb, KERN_WARNING,
+			"disabling checkpoint not compatible with read-only");
+		goto restore_opts;
+	}
+
 	/*
 	 * We stop the GC thread if FS is mounted as RO
 	 * or if background_gc = off is passed in mount
@@ -1527,6 +1625,16 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		clear_sbi_flag(sbi, SBI_IS_CLOSE);
 	}
 
+	if (checkpoint_changed) {
+		if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+			err = f2fs_disable_checkpoint(sbi);
+			if (err)
+				goto restore_gc;
+		} else {
+			f2fs_enable_checkpoint(sbi);
+		}
+	}
+
 	/*
 	 * We stop issue flush thread if FS is mounted as RO
 	 * or if flush_merge is not passed in mount option.
@@ -2485,6 +2593,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 	sbi->interval_time[REQ_TIME] = DEF_IDLE_INTERVAL;
 	sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
 	sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
+	sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
 	clear_sbi_flag(sbi, SBI_NEED_FSCK);
 
 	for (i = 0; i < NR_COUNT_TYPE; i++)
@@ -3093,6 +3202,9 @@ try_onemore:
 	if (err)
 		goto free_meta;
 
+	if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
+		goto skip_recovery;
+
 	/* recover fsynced data */
 	if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
 		/*
@@ -3132,6 +3244,14 @@ skip_recovery:
 	/* f2fs_recover_fsync_data() cleared this already */
 	clear_sbi_flag(sbi, SBI_POR_DOING);
 
+	if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+		err = f2fs_disable_checkpoint(sbi);
+		if (err)
+			goto free_meta;
+	} else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
+		f2fs_enable_checkpoint(sbi);
+	}
+
 	/*
 	 * If filesystem is not mounted as read-only then
 	 * do start the gc_thread.
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 1db13ff9a3f4..8b9c7dc0260c 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -116,6 +116,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_DISABLED_FLAG		0x00001000
 #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
 #define CP_NOCRC_RECOVERY_FLAG	0x00000200
 #define CP_TRIMMED_FLAG		0x00000100
-- 
cgit v1.2.3


From c56a8be7e7aa855ebcccf0e9d9eba2216514d399 Mon Sep 17 00:00:00 2001
From: Rahul Verma <Rahul.Verma@cavium.com>
Date: Tue, 16 Oct 2018 03:59:20 -0700
Subject: qed: Add supported link and advertise link to display in ethtool.

	Added transceiver type, speed capability and board types
	in HSI, are utilizing to display the accurate link
	information in ethtool.

Signed-off-by: Rahul Verma <Rahul.Verma@cavium.com>
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c      | 199 ++++++++++++++++++------
 drivers/net/ethernet/qlogic/qed/qed_mcp.c       | 182 ++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h       |  46 ++++++
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c |  31 +++-
 include/linux/qed/qed_if.h                      |  26 +++-
 5 files changed, 426 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 8c7cbbde65a6..e762881fdb38 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -58,6 +58,7 @@
 #include "qed_iscsi.h"
 
 #include "qed_mcp.h"
+#include "qed_reg_addr.h"
 #include "qed_hw.h"
 #include "qed_selftest.h"
 #include "qed_debug.h"
@@ -1330,8 +1331,7 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params)
 		link_params->speed.autoneg = params->autoneg;
 	if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) {
 		link_params->speed.advertised_speeds = 0;
-		if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) ||
-		    (params->adv_speeds & QED_LM_1000baseT_Full_BIT))
+		if (params->adv_speeds & QED_LM_1000baseT_Full_BIT)
 			link_params->speed.advertised_speeds |=
 			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
 		if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT)
@@ -1462,13 +1462,149 @@ static int qed_get_link_data(struct qed_hwfn *hwfn,
 	return 0;
 }
 
+static void qed_fill_link_capability(struct qed_hwfn *hwfn,
+				     struct qed_ptt *ptt, u32 capability,
+				     u32 *if_capability)
+{
+	u32 media_type, tcvr_state, tcvr_type;
+	u32 speed_mask, board_cfg;
+
+	if (qed_mcp_get_media_type(hwfn, ptt, &media_type))
+		media_type = MEDIA_UNSPECIFIED;
+
+	if (qed_mcp_get_transceiver_data(hwfn, ptt, &tcvr_state, &tcvr_type))
+		tcvr_type = ETH_TRANSCEIVER_STATE_UNPLUGGED;
+
+	if (qed_mcp_trans_speed_mask(hwfn, ptt, &speed_mask))
+		speed_mask = 0xFFFFFFFF;
+
+	if (qed_mcp_get_board_config(hwfn, ptt, &board_cfg))
+		board_cfg = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED;
+
+	DP_VERBOSE(hwfn->cdev, NETIF_MSG_DRV,
+		   "Media_type = 0x%x tcvr_state = 0x%x tcvr_type = 0x%x speed_mask = 0x%x board_cfg = 0x%x\n",
+		   media_type, tcvr_state, tcvr_type, speed_mask, board_cfg);
+
+	switch (media_type) {
+	case MEDIA_DA_TWINAX:
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		/* For DAC media multiple speed capabilities are supported*/
+		capability = capability & speed_mask;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+			*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+			*if_capability |= QED_LM_10000baseCR_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+			*if_capability |= QED_LM_40000baseCR4_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+			*if_capability |= QED_LM_25000baseCR_Full_BIT;
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseCR2_Full_BIT;
+		if (capability &
+			NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
+			*if_capability |= QED_LM_100000baseCR4_Full_BIT;
+		break;
+	case MEDIA_BASE_T:
+		if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_EXT_PHY) {
+			if (capability &
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
+				*if_capability |= QED_LM_1000baseT_Full_BIT;
+			}
+			if (capability &
+			    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) {
+				*if_capability |= QED_LM_10000baseT_Full_BIT;
+			}
+		}
+		if (board_cfg & NVM_CFG1_PORT_PORT_TYPE_MODULE) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_1000BASET)
+				*if_capability |= QED_LM_1000baseT_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_BASET)
+				*if_capability |= QED_LM_10000baseT_Full_BIT;
+		}
+		break;
+	case MEDIA_SFP_1G_FIBER:
+	case MEDIA_SFPP_10G_FIBER:
+	case MEDIA_XFP_FIBER:
+	case MEDIA_MODULE_FIBER:
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) {
+			if ((tcvr_type == ETH_TRANSCEIVER_TYPE_1G_LX) ||
+			    (tcvr_type == ETH_TRANSCEIVER_TYPE_1G_SX))
+				*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_SR)
+				*if_capability |= QED_LM_10000baseSR_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LR)
+				*if_capability |= QED_LM_10000baseLR_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_LRM)
+				*if_capability |= QED_LM_10000baseLRM_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_10G_ER)
+				*if_capability |= QED_LM_10000baseR_FEC_BIT;
+		}
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_25G_SR)
+				*if_capability |= QED_LM_25000baseSR_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_LR4)
+				*if_capability |= QED_LM_40000baseLR4_Full_BIT;
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_40G_SR4)
+				*if_capability |= QED_LM_40000baseSR4_Full_BIT;
+		}
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) {
+			if (tcvr_type == ETH_TRANSCEIVER_TYPE_100G_SR4)
+				*if_capability |= QED_LM_100000baseSR4_Full_BIT;
+		}
+
+		break;
+	case MEDIA_KR:
+		if (capability & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
+			*if_capability |= QED_LM_20000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
+			*if_capability |= QED_LM_1000baseKX_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
+			*if_capability |= QED_LM_10000baseKR_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
+			*if_capability |= QED_LM_25000baseKR_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
+			*if_capability |= QED_LM_40000baseKR4_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
+			*if_capability |= QED_LM_50000baseKR2_Full_BIT;
+		if (capability &
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
+			*if_capability |= QED_LM_100000baseKR4_Full_BIT;
+		break;
+	case MEDIA_UNSPECIFIED:
+	case MEDIA_NOT_PRESENT:
+		DP_VERBOSE(hwfn->cdev, QED_MSG_DEBUG,
+			   "Unknown media and transceiver type;\n");
+		break;
+	}
+}
+
 static void qed_fill_link(struct qed_hwfn *hwfn,
 			  struct qed_ptt *ptt,
 			  struct qed_link_output *if_link)
 {
+	struct qed_mcp_link_capabilities link_caps;
 	struct qed_mcp_link_params params;
 	struct qed_mcp_link_state link;
-	struct qed_mcp_link_capabilities link_caps;
 	u32 media_type;
 
 	memset(if_link, 0, sizeof(*if_link));
@@ -1499,51 +1635,13 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 		if_link->advertised_caps |= QED_LM_Autoneg_BIT;
 	else
 		if_link->advertised_caps &= ~QED_LM_Autoneg_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT |
-		    QED_LM_1000baseT_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
-		if_link->advertised_caps |= QED_LM_20000baseKR2_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
-		if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT;
-	if (params.speed.advertised_speeds &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT;
-
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G)
-		if_link->supported_caps |= QED_LM_1000baseT_Half_BIT |
-		    QED_LM_1000baseT_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G)
-		if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G)
-		if_link->supported_caps |= QED_LM_20000baseKR2_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G)
-		if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G)
-		if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G)
-		if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT;
-	if (link_caps.speed_capabilities &
-	    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G)
-		if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT;
+
+	/* Fill link advertised capability*/
+	qed_fill_link_capability(hwfn, ptt, params.speed.advertised_speeds,
+				 &if_link->advertised_caps);
+	/* Fill link supported capability*/
+	qed_fill_link_capability(hwfn, ptt, link_caps.speed_capabilities,
+				 &if_link->supported_caps);
 
 	if (link.link_up)
 		if_link->speed = link.speed;
@@ -1563,9 +1661,8 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 		if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE;
 
 	/* Link partner capabilities */
-	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD)
-		if_link->lp_caps |= QED_LM_1000baseT_Half_BIT;
-	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD)
+	if (link.partner_adv_speed &
+	    QED_LINK_PARTNER_SPEED_1G_FD)
 		if_link->lp_caps |= QED_LM_1000baseT_Full_BIT;
 	if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G)
 		if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 92c5950ad156..554d57ac1629 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1870,6 +1870,8 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
 int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt, u32 *p_media_type)
 {
+	*p_media_type = MEDIA_UNSPECIFIED;
+
 	if (IS_VF(p_hwfn->cdev))
 		return -EINVAL;
 
@@ -1891,6 +1893,186 @@ int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *p_transceiver_state,
+				 u32 *p_transceiver_type)
+{
+	u32 transceiver_info;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
+		return -EBUSY;
+	}
+
+	*p_transceiver_type = ETH_TRANSCEIVER_TYPE_NONE;
+	*p_transceiver_state = ETH_TRANSCEIVER_STATE_UPDATING;
+
+	transceiver_info = qed_rd(p_hwfn, p_ptt,
+				  p_hwfn->mcp_info->port_addr +
+				  offsetof(struct public_port,
+					   transceiver_data));
+
+	*p_transceiver_state = (transceiver_info &
+				ETH_TRANSCEIVER_STATE_MASK) >>
+				ETH_TRANSCEIVER_STATE_OFFSET;
+
+	if (*p_transceiver_state == ETH_TRANSCEIVER_STATE_PRESENT)
+		*p_transceiver_type = (transceiver_info &
+				       ETH_TRANSCEIVER_TYPE_MASK) >>
+				       ETH_TRANSCEIVER_TYPE_OFFSET;
+	else
+		*p_transceiver_type = ETH_TRANSCEIVER_TYPE_UNKNOWN;
+
+	return 0;
+}
+static bool qed_is_transceiver_ready(u32 transceiver_state,
+				     u32 transceiver_type)
+{
+	if ((transceiver_state & ETH_TRANSCEIVER_STATE_PRESENT) &&
+	    ((transceiver_state & ETH_TRANSCEIVER_STATE_UPDATING) == 0x0) &&
+	    (transceiver_type != ETH_TRANSCEIVER_TYPE_NONE))
+		return true;
+
+	return false;
+}
+
+int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_speed_mask)
+{
+	u32 transceiver_type, transceiver_state;
+
+	qed_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
+				     &transceiver_type);
+
+	if (qed_is_transceiver_ready(transceiver_state, transceiver_type) ==
+				     false)
+		return -EINVAL;
+
+	switch (transceiver_type) {
+	case ETH_TRANSCEIVER_TYPE_1G_LX:
+	case ETH_TRANSCEIVER_TYPE_1G_SX:
+	case ETH_TRANSCEIVER_TYPE_1G_PCC:
+	case ETH_TRANSCEIVER_TYPE_1G_ACC:
+	case ETH_TRANSCEIVER_TYPE_1000BASET:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_10G_SR:
+	case ETH_TRANSCEIVER_TYPE_10G_LR:
+	case ETH_TRANSCEIVER_TYPE_10G_LRM:
+	case ETH_TRANSCEIVER_TYPE_10G_ER:
+	case ETH_TRANSCEIVER_TYPE_10G_PCC:
+	case ETH_TRANSCEIVER_TYPE_10G_ACC:
+	case ETH_TRANSCEIVER_TYPE_4x10G:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_40G_LR4:
+	case ETH_TRANSCEIVER_TYPE_40G_SR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_SR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_LR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_100G_AOC:
+	case ETH_TRANSCEIVER_TYPE_100G_SR4:
+	case ETH_TRANSCEIVER_TYPE_100G_LR4:
+	case ETH_TRANSCEIVER_TYPE_100G_ER4:
+	case ETH_TRANSCEIVER_TYPE_100G_ACC:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_25G_SR:
+	case ETH_TRANSCEIVER_TYPE_25G_LR:
+	case ETH_TRANSCEIVER_TYPE_25G_AOC:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_S:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_M:
+	case ETH_TRANSCEIVER_TYPE_25G_ACC_L:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_25G_CA_N:
+	case ETH_TRANSCEIVER_TYPE_25G_CA_S:
+	case ETH_TRANSCEIVER_TYPE_25G_CA_L:
+	case ETH_TRANSCEIVER_TYPE_4x25G_CR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_40G_CR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_10G_40G_CR:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_100G_CR4:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_CR:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_20G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_SR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_LR:
+	case ETH_TRANSCEIVER_TYPE_MULTI_RATE_40G_100G_AOC:
+		*p_speed_mask =
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_XLPPI:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G;
+		break;
+	case ETH_TRANSCEIVER_TYPE_10G_BASET:
+		*p_speed_mask = NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G |
+		    NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G;
+		break;
+	default:
+		DP_INFO(p_hwfn, "Unknown transcevier type 0x%x\n",
+			transceiver_type);
+		*p_speed_mask = 0xff;
+		break;
+	}
+
+	return 0;
+}
+
+int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_board_config)
+{
+	u32 nvm_cfg_addr, nvm_cfg1_offset, port_cfg_addr;
+
+	if (IS_VF(p_hwfn->cdev))
+		return -EINVAL;
+
+	if (!qed_mcp_is_init(p_hwfn)) {
+		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
+		return -EBUSY;
+	}
+	if (!p_ptt) {
+		*p_board_config = NVM_CFG1_PORT_PORT_TYPE_UNDEFINED;
+		return -EINVAL;
+	}
+
+	nvm_cfg_addr = qed_rd(p_hwfn, p_ptt, MISC_REG_GEN_PURP_CR0);
+	nvm_cfg1_offset = qed_rd(p_hwfn, p_ptt, nvm_cfg_addr + 4);
+	port_cfg_addr = MCP_REG_SCRATCH + nvm_cfg1_offset +
+			offsetof(struct nvm_cfg1, port[MFW_PORT(p_hwfn)]);
+	*p_board_config = qed_rd(p_hwfn, p_ptt,
+				 port_cfg_addr +
+				 offsetof(struct nvm_cfg1_port,
+					  board_cfg));
+
+	return 0;
+}
+
 /* Old MFW has a global configuration for all PFs regarding RDMA support */
 static void
 qed_mcp_get_shmem_proto_legacy(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 80a6b5d1ff33..1adfe52b3905 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -332,6 +332,52 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn,
 int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn,
 			   struct qed_ptt *p_ptt, u32 *media_type);
 
+/**
+ * @brief Get transceiver data of the port.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_transceiver_state - transceiver state.
+ * @param p_transceiver_type - media type value
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+int qed_mcp_get_transceiver_data(struct qed_hwfn *p_hwfn,
+				 struct qed_ptt *p_ptt,
+				 u32 *p_transceiver_state,
+				 u32 *p_tranceiver_type);
+
+/**
+ * @brief Get transceiver supported speed mask.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_speed_mask - Bit mask of all supported speeds.
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+
+int qed_mcp_trans_speed_mask(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_speed_mask);
+
+/**
+ * @brief Get board configuration.
+ *
+ * @param cdev      - qed dev pointer
+ * @param p_ptt
+ * @param p_board_config - Board config.
+ *
+ * @return int -
+ *      0 - Operation was successful.
+ *      -EBUSY - Operation failed
+ */
+int qed_mcp_get_board_config(struct qed_hwfn *p_hwfn,
+			     struct qed_ptt *p_ptt, u32 *p_board_config);
+
 /**
  * @brief General function for sending commands to the MCP
  *        mailbox. It acquire mutex lock for the entire
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 7ff50b4488f6..df3ad591140d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -413,19 +413,42 @@ struct qede_link_mode_mapping {
 };
 
 static const struct qede_link_mode_mapping qed_lm_map[] = {
-	{QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT},
 	{QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT},
 	{QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT},
 	{QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT},
-	{QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT},
 	{QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
+	{QED_LM_10000baseT_Full_BIT, ETHTOOL_LINK_MODE_10000baseT_Full_BIT},
+	{QED_LM_2500baseX_Full_BIT, ETHTOOL_LINK_MODE_2500baseX_Full_BIT},
+	{QED_LM_Backplane_BIT, ETHTOOL_LINK_MODE_Backplane_BIT},
+	{QED_LM_1000baseKX_Full_BIT, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT},
+	{QED_LM_10000baseKX4_Full_BIT, ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT},
 	{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+	{QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT},
+	{QED_LM_10000baseR_FEC_BIT, ETHTOOL_LINK_MODE_10000baseR_FEC_BIT},
 	{QED_LM_20000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT},
-	{QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+	{QED_LM_40000baseKR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT},
+	{QED_LM_40000baseCR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT},
+	{QED_LM_40000baseSR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT},
 	{QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT},
+	{QED_LM_25000baseCR_Full_BIT, ETHTOOL_LINK_MODE_25000baseCR_Full_BIT},
+	{QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT},
+	{QED_LM_25000baseSR_Full_BIT, ETHTOOL_LINK_MODE_25000baseSR_Full_BIT},
+	{QED_LM_50000baseCR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT},
 	{QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT},
 	{QED_LM_100000baseKR4_Full_BIT,
-	 ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+		ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT},
+	{QED_LM_100000baseSR4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT},
+	{QED_LM_100000baseCR4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT},
+	{QED_LM_100000baseLR4_ER4_Full_BIT,
+		ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT},
+	{QED_LM_50000baseSR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT},
+	{QED_LM_1000baseX_Full_BIT, ETHTOOL_LINK_MODE_1000baseX_Full_BIT},
+	{QED_LM_10000baseCR_Full_BIT, ETHTOOL_LINK_MODE_10000baseCR_Full_BIT},
+	{QED_LM_10000baseSR_Full_BIT, ETHTOOL_LINK_MODE_10000baseSR_Full_BIT},
+	{QED_LM_10000baseLR_Full_BIT, ETHTOOL_LINK_MODE_10000baseLR_Full_BIT},
+	{QED_LM_10000baseLRM_Full_BIT, ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT},
 };
 
 #define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name)	\
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index dee3c9c744f7..a47321a0d572 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -667,15 +667,35 @@ enum qed_link_mode_bits {
 	QED_LM_Autoneg_BIT = BIT(1),
 	QED_LM_Asym_Pause_BIT = BIT(2),
 	QED_LM_Pause_BIT = BIT(3),
-	QED_LM_1000baseT_Half_BIT = BIT(4),
-	QED_LM_1000baseT_Full_BIT = BIT(5),
+	QED_LM_1000baseT_Full_BIT = BIT(4),
+	QED_LM_10000baseT_Full_BIT = BIT(5),
 	QED_LM_10000baseKR_Full_BIT = BIT(6),
 	QED_LM_20000baseKR2_Full_BIT = BIT(7),
 	QED_LM_25000baseKR_Full_BIT = BIT(8),
 	QED_LM_40000baseLR4_Full_BIT = BIT(9),
 	QED_LM_50000baseKR2_Full_BIT = BIT(10),
 	QED_LM_100000baseKR4_Full_BIT = BIT(11),
-	QED_LM_COUNT = 11
+	QED_LM_2500baseX_Full_BIT = BIT(12),
+	QED_LM_Backplane_BIT = BIT(13),
+	QED_LM_1000baseKX_Full_BIT = BIT(14),
+	QED_LM_10000baseKX4_Full_BIT = BIT(15),
+	QED_LM_10000baseR_FEC_BIT = BIT(16),
+	QED_LM_40000baseKR4_Full_BIT = BIT(17),
+	QED_LM_40000baseCR4_Full_BIT = BIT(18),
+	QED_LM_40000baseSR4_Full_BIT = BIT(19),
+	QED_LM_25000baseCR_Full_BIT = BIT(20),
+	QED_LM_25000baseSR_Full_BIT = BIT(21),
+	QED_LM_50000baseCR2_Full_BIT = BIT(22),
+	QED_LM_100000baseSR4_Full_BIT = BIT(23),
+	QED_LM_100000baseCR4_Full_BIT = BIT(24),
+	QED_LM_100000baseLR4_ER4_Full_BIT = BIT(25),
+	QED_LM_50000baseSR2_Full_BIT = BIT(26),
+	QED_LM_1000baseX_Full_BIT = BIT(27),
+	QED_LM_10000baseCR_Full_BIT = BIT(28),
+	QED_LM_10000baseSR_Full_BIT = BIT(29),
+	QED_LM_10000baseLR_Full_BIT = BIT(30),
+	QED_LM_10000baseLRM_Full_BIT = BIT(31),
+	QED_LM_COUNT = 32
 };
 
 struct qed_link_params {
-- 
cgit v1.2.3


From 94a04d1d3d3681adde1a3e022b25dbac7b345b7e Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Tue, 9 Oct 2018 12:05:12 +0300
Subject: net/mlx5: Expose DC scatter to CQE capability bit

dc_req_scat_data_cqe capability bit determines
if requester scatter to cqe is available for 64 bytes CQE over
DC transport type.

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 68f4d5f9d929..0f460fb22c31 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1005,7 +1005,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         umr_modify_atomic_disabled[0x1];
 	u8         umr_indirect_mkey_disabled[0x1];
 	u8         umr_fence[0x2];
-	u8         reserved_at_20c[0x3];
+	u8         dc_req_scat_data_cqe[0x1];
+	u8         reserved_at_20d[0x2];
 	u8         drain_sigerr[0x1];
 	u8         cmdif_checksum[0x2];
 	u8         sigerr_cqe[0x1];
-- 
cgit v1.2.3


From 616e45df7c4aa71279c07cc803a1d51f43f89f37 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Fri, 31 Aug 2018 12:45:54 +0800
Subject: clk: add new APIs to operate on all available clocks

This patch introduces of_clk_bulk_get_all and clk_bulk_x_all APIs
to users who just want to handle all available clocks from device tree
without need to know the detailed clock information likes clock numbers
and names. This is useful in writing some generic drivers to handle clock
part.

Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-bulk.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/clk.h    | 42 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 92 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-bulk.c b/drivers/clk/clk-bulk.c
index 4460ac52199f..6a7118d4250a 100644
--- a/drivers/clk/clk-bulk.c
+++ b/drivers/clk/clk-bulk.c
@@ -17,9 +17,11 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/device.h>
 #include <linux/export.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 
 static int __must_check of_clk_bulk_get(struct device_node *np, int num_clks,
 					struct clk_bulk_data *clks)
@@ -49,6 +51,32 @@ err:
 	return ret;
 }
 
+static int __must_check of_clk_bulk_get_all(struct device_node *np,
+					    struct clk_bulk_data **clks)
+{
+	struct clk_bulk_data *clk_bulk;
+	int num_clks;
+	int ret;
+
+	num_clks = of_clk_get_parent_count(np);
+	if (!num_clks)
+		return 0;
+
+	clk_bulk = kmalloc_array(num_clks, sizeof(*clk_bulk), GFP_KERNEL);
+	if (!clk_bulk)
+		return -ENOMEM;
+
+	ret = of_clk_bulk_get(np, num_clks, clk_bulk);
+	if (ret) {
+		kfree(clk_bulk);
+		return ret;
+	}
+
+	*clks = clk_bulk;
+
+	return num_clks;
+}
+
 void clk_bulk_put(int num_clks, struct clk_bulk_data *clks)
 {
 	while (--num_clks >= 0) {
@@ -88,6 +116,29 @@ err:
 }
 EXPORT_SYMBOL(clk_bulk_get);
 
+void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks)
+{
+	if (IS_ERR_OR_NULL(clks))
+		return;
+
+	clk_bulk_put(num_clks, clks);
+
+	kfree(clks);
+}
+EXPORT_SYMBOL(clk_bulk_put_all);
+
+int __must_check clk_bulk_get_all(struct device *dev,
+				  struct clk_bulk_data **clks)
+{
+	struct device_node *np = dev_of_node(dev);
+
+	if (!np)
+		return 0;
+
+	return of_clk_bulk_get_all(np, clks);
+}
+EXPORT_SYMBOL(clk_bulk_get_all);
+
 #ifdef CONFIG_HAVE_CLK_PREPARE
 
 /**
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 4f750c481b82..e9433c76757b 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -312,7 +312,26 @@ struct clk *clk_get(struct device *dev, const char *id);
  */
 int __must_check clk_bulk_get(struct device *dev, int num_clks,
 			      struct clk_bulk_data *clks);
-
+/**
+ * clk_bulk_get_all - lookup and obtain all available references to clock
+ *		      producer.
+ * @dev: device for clock "consumer"
+ * @clks: pointer to the clk_bulk_data table of consumer
+ *
+ * This helper function allows drivers to get all clk consumers in one
+ * operation. If any of the clk cannot be acquired then any clks
+ * that were obtained will be freed before returning to the caller.
+ *
+ * Returns a positive value for the number of clocks obtained while the
+ * clock references are stored in the clk_bulk_data table in @clks field.
+ * Returns 0 if there're none and a negative value if something failed.
+ *
+ * Drivers must assume that the clock source is not enabled.
+ *
+ * clk_bulk_get should not be called from within interrupt context.
+ */
+int __must_check clk_bulk_get_all(struct device *dev,
+				  struct clk_bulk_data **clks);
 /**
  * devm_clk_bulk_get - managed get multiple clk consumers
  * @dev: device for clock "consumer"
@@ -487,6 +506,19 @@ void clk_put(struct clk *clk);
  */
 void clk_bulk_put(int num_clks, struct clk_bulk_data *clks);
 
+/**
+ * clk_bulk_put_all - "free" all the clock source
+ * @num_clks: the number of clk_bulk_data
+ * @clks: the clk_bulk_data table of consumer
+ *
+ * Note: drivers must ensure that all clk_bulk_enable calls made on this
+ * clock source are balanced by clk_bulk_disable calls prior to calling
+ * this function.
+ *
+ * clk_bulk_put_all should not be called from within interrupt context.
+ */
+void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks);
+
 /**
  * devm_clk_put	- "free" a managed clock source
  * @dev: device used to acquire the clock
@@ -642,6 +674,12 @@ static inline int __must_check clk_bulk_get(struct device *dev, int num_clks,
 	return 0;
 }
 
+static inline int __must_check clk_bulk_get_all(struct device *dev,
+					 struct clk_bulk_data **clks)
+{
+	return 0;
+}
+
 static inline struct clk *devm_clk_get(struct device *dev, const char *id)
 {
 	return NULL;
@@ -663,6 +701,8 @@ static inline void clk_put(struct clk *clk) {}
 
 static inline void clk_bulk_put(int num_clks, struct clk_bulk_data *clks) {}
 
+static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {}
+
 static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
 
 
-- 
cgit v1.2.3


From f08c2e2865f6f9b172f37d7bbf24716f9ebad553 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <aisheng.dong@nxp.com>
Date: Fri, 31 Aug 2018 12:45:55 +0800
Subject: clk: add managed version of clk_bulk_get_all

This patch introduces the managed version of clk_bulk_get_all.

Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Dong Aisheng <aisheng.dong@nxp.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-devres.c | 24 ++++++++++++++++++++++++
 include/linux/clk.h      | 23 +++++++++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index d854e26a8ddb..12c87457eca1 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -70,6 +70,30 @@ int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
 }
 EXPORT_SYMBOL_GPL(devm_clk_bulk_get);
 
+int __must_check devm_clk_bulk_get_all(struct device *dev,
+				       struct clk_bulk_data **clks)
+{
+	struct clk_bulk_devres *devres;
+	int ret;
+
+	devres = devres_alloc(devm_clk_bulk_release,
+			      sizeof(*devres), GFP_KERNEL);
+	if (!devres)
+		return -ENOMEM;
+
+	ret = clk_bulk_get_all(dev, &devres->clks);
+	if (ret > 0) {
+		*clks = devres->clks;
+		devres->num_clks = ret;
+		devres_add(dev, devres);
+	} else {
+		devres_free(devres);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_clk_bulk_get_all);
+
 static int devm_clk_match(struct device *dev, void *res, void *data)
 {
 	struct clk **c = res;
diff --git a/include/linux/clk.h b/include/linux/clk.h
index e9433c76757b..c705271dc1f2 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -346,6 +346,22 @@ int __must_check clk_bulk_get_all(struct device *dev,
  */
 int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
 				   struct clk_bulk_data *clks);
+/**
+ * devm_clk_bulk_get_all - managed get multiple clk consumers
+ * @dev: device for clock "consumer"
+ * @clks: pointer to the clk_bulk_data table of consumer
+ *
+ * Returns a positive value for the number of clocks obtained while the
+ * clock references are stored in the clk_bulk_data table in @clks field.
+ * Returns 0 if there're none and a negative value if something failed.
+ *
+ * This helper function allows drivers to get several clk
+ * consumers in one operation with management, the clks will
+ * automatically be freed when the device is unbound.
+ */
+
+int __must_check devm_clk_bulk_get_all(struct device *dev,
+				       struct clk_bulk_data **clks);
 
 /**
  * devm_clk_get - lookup and obtain a managed reference to a clock producer.
@@ -691,6 +707,13 @@ static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clk
 	return 0;
 }
 
+static inline int __must_check devm_clk_bulk_get_all(struct device *dev,
+						     struct clk_bulk_data **clks)
+{
+
+	return 0;
+}
+
 static inline struct clk *devm_get_clk_from_child(struct device *dev,
 				struct device_node *np, const char *con_id)
 {
-- 
cgit v1.2.3


From 3f4c3127d332000530349db4843deece27fe5e0c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Oct 2018 10:36:01 -0700
Subject: bpf: sockmap, fix skmsg recvmsg handler to track size correctly

When converting sockmap to new skmsg generic data structures we missed
that the recvmsg handler did not correctly use sg.size and instead was
using individual elements length. The result is if a sock is closed
with outstanding data we omit the call to sk_mem_uncharge() and can
get the warning below.

[   66.728282] WARNING: CPU: 6 PID: 5783 at net/core/stream.c:206 sk_stream_kill_queues+0x1fa/0x210

To fix this correct the redirect handler to xfer the size along with
the scatterlist and also decrement the size from the recvmsg handler.
Now when a sock is closed the remaining 'size' will be decremented
with sk_mem_uncharge().

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h | 1 +
 net/ipv4/tcp_bpf.c    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 0b919f0bc6d6..31df0d9fa536 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -176,6 +176,7 @@ static inline void sk_msg_xfer(struct sk_msg *dst, struct sk_msg *src,
 {
 	dst->sg.data[which] = src->sg.data[which];
 	dst->sg.data[which].length  = size;
+	dst->sg.size		   += size;
 	src->sg.data[which].length -= size;
 	src->sg.data[which].offset += size;
 }
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 80debb0daf37..f9d3cf185827 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -73,6 +73,7 @@ int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 			sge->offset += copy;
 			sge->length -= copy;
 			sk_mem_uncharge(sk, copy);
+			msg_rx->sg.size -= copy;
 			if (!sge->length) {
 				i++;
 				if (i == MAX_SKB_FRAGS)
-- 
cgit v1.2.3


From 8734a162c13b1a893e7dff8de0df81fed04c51a6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Tue, 16 Oct 2018 11:07:59 -0700
Subject: bpf: skmsg, improve sk_msg_used_element to work in cork context

Currently sk_msg_used_element is only called in zerocopy context where
cork is not possible and if this case happens we fallback to copy
mode. However the helper is more useful if it works in all contexts.

This patch resolved the case where if end == head indicating a full
or empty ring the helper always reports an empty ring. To fix this
add a test for the full ring case to avoid reporting a full ring
has 0 elements. This additional functionality will be used in the
next patches from recvmsg context where end = head with a full ring
is a valid case.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 31df0d9fa536..22347b08e1f8 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -187,18 +187,21 @@ static inline void sk_msg_xfer_full(struct sk_msg *dst, struct sk_msg *src)
 	sk_msg_init(src);
 }
 
+static inline bool sk_msg_full(const struct sk_msg *msg)
+{
+	return (msg->sg.end == msg->sg.start) && msg->sg.size;
+}
+
 static inline u32 sk_msg_elem_used(const struct sk_msg *msg)
 {
+	if (sk_msg_full(msg))
+		return MAX_MSG_FRAGS;
+
 	return msg->sg.end >= msg->sg.start ?
 		msg->sg.end - msg->sg.start :
 		msg->sg.end + (MAX_MSG_FRAGS - msg->sg.start);
 }
 
-static inline bool sk_msg_full(const struct sk_msg *msg)
-{
-	return (msg->sg.end == msg->sg.start) && msg->sg.size;
-}
-
 static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
 {
 	return &msg->sg.data[which];
-- 
cgit v1.2.3


From a60109dc9a954ef9eddba6577e2d2e9e7952e487 Mon Sep 17 00:00:00 2001
From: Yonatan Cohen <yonatanc@mellanox.com>
Date: Wed, 10 Oct 2018 09:25:16 +0300
Subject: IB/mlx5: Add support for extended atomic operations

Extended atomic operations cmp&swp and fetch&add is a Mellanox
feature extending the standard atomic operation to use, varied
operand sizes, as apposed to normal atomic operation that use
an 8 byte operand only.
Extended atomics allows masking the results and arguments.

This patch configures QP to support extended atomic operation
with the maximum size possible, as exposed by HCA capabilities.

Signed-off-by: Yonatan Cohen <yonatanc@mellanox.com>
Reviewed-by: Guy Levi <guyle@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
---
 drivers/infiniband/hw/mlx5/qp.c | 96 +++++++++++++++++++++++++++++++++++------
 include/linux/mlx5/driver.h     | 23 +++++-----
 2 files changed, 95 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 368728e6f980..6841c0f9237f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1733,6 +1733,53 @@ static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
 		MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE);
 }
 
+static int atomic_size_to_mode(int size_mask)
+{
+	/* driver does not support atomic_size > 256B
+	 * and does not know how to translate bigger sizes
+	 */
+	int supported_size_mask = size_mask & 0x1ff;
+	int log_max_size;
+
+	if (!supported_size_mask)
+		return -EOPNOTSUPP;
+
+	log_max_size = __fls(supported_size_mask);
+
+	if (log_max_size > 3)
+		return log_max_size;
+
+	return MLX5_ATOMIC_MODE_8B;
+}
+
+static int get_atomic_mode(struct mlx5_ib_dev *dev,
+			   enum ib_qp_type qp_type)
+{
+	u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
+	u8 atomic = MLX5_CAP_GEN(dev->mdev, atomic);
+	int atomic_mode = -EOPNOTSUPP;
+	int atomic_size_mask;
+
+	if (!atomic)
+		return -EOPNOTSUPP;
+
+	if (qp_type == MLX5_IB_QPT_DCT)
+		atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
+	else
+		atomic_size_mask = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
+
+	if ((atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP) ||
+	    (atomic_operations & MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD))
+		atomic_mode = atomic_size_to_mode(atomic_size_mask);
+
+	if (atomic_mode <= 0 &&
+	    (atomic_operations & MLX5_ATOMIC_OPS_CMP_SWAP &&
+	     atomic_operations & MLX5_ATOMIC_OPS_FETCH_ADD))
+		atomic_mode = MLX5_ATOMIC_MODE_IB_COMP;
+
+	return atomic_mode;
+}
+
 static inline bool check_flags_mask(uint64_t input, uint64_t supported)
 {
 	return (input & ~supported) == 0;
@@ -2562,13 +2609,15 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
 	return 0;
 }
 
-static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_attr *attr,
-				   int attr_mask)
+static int to_mlx5_access_flags(struct mlx5_ib_qp *qp,
+				const struct ib_qp_attr *attr,
+				int attr_mask, __be32 *hw_access_flags)
 {
-	u32 hw_access_flags = 0;
 	u8 dest_rd_atomic;
 	u32 access_flags;
 
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device);
+
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
 		dest_rd_atomic = attr->max_dest_rd_atomic;
 	else
@@ -2583,13 +2632,25 @@ static __be32 to_mlx5_access_flags(struct mlx5_ib_qp *qp, const struct ib_qp_att
 		access_flags &= IB_ACCESS_REMOTE_WRITE;
 
 	if (access_flags & IB_ACCESS_REMOTE_READ)
-		hw_access_flags |= MLX5_QP_BIT_RRE;
-	if (access_flags & IB_ACCESS_REMOTE_ATOMIC)
-		hw_access_flags |= (MLX5_QP_BIT_RAE | MLX5_ATOMIC_MODE_CX);
+		*hw_access_flags |= MLX5_QP_BIT_RRE;
+	if ((access_flags & IB_ACCESS_REMOTE_ATOMIC) &&
+	    qp->ibqp.qp_type == IB_QPT_RC) {
+		int atomic_mode;
+
+		atomic_mode = get_atomic_mode(dev, qp->ibqp.qp_type);
+		if (atomic_mode < 0)
+			return -EOPNOTSUPP;
+
+		*hw_access_flags |= MLX5_QP_BIT_RAE;
+		*hw_access_flags |= atomic_mode << MLX5_ATOMIC_MODE_OFFSET;
+	}
+
 	if (access_flags & IB_ACCESS_REMOTE_WRITE)
-		hw_access_flags |= MLX5_QP_BIT_RWE;
+		*hw_access_flags |= MLX5_QP_BIT_RWE;
+
+	*hw_access_flags = cpu_to_be32(*hw_access_flags);
 
-	return cpu_to_be32(hw_access_flags);
+	return 0;
 }
 
 enum {
@@ -3287,8 +3348,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 				cpu_to_be32(fls(attr->max_dest_rd_atomic - 1) << 21);
 	}
 
-	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC))
-		context->params2 |= to_mlx5_access_flags(qp, attr, attr_mask);
+	if (attr_mask & (IB_QP_ACCESS_FLAGS | IB_QP_MAX_DEST_RD_ATOMIC)) {
+		__be32 access_flags = 0;
+
+		err = to_mlx5_access_flags(qp, attr, attr_mask, &access_flags);
+		if (err)
+			goto out;
+
+		context->params2 |= access_flags;
+	}
 
 	if (attr_mask & IB_QP_MIN_RNR_TIMER)
 		context->rnr_nextrecvpsn |= cpu_to_be32(attr->min_rnr_timer << 24);
@@ -3504,10 +3572,14 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
 			MLX5_SET(dctc, dctc, rwe, 1);
 		if (attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) {
-			if (!mlx5_ib_dc_atomic_is_supported(dev))
+			int atomic_mode;
+
+			atomic_mode = get_atomic_mode(dev, MLX5_IB_QPT_DCT);
+			if (atomic_mode < 0)
 				return -EOPNOTSUPP;
+
+			MLX5_SET(dctc, dctc, atomic_mode, atomic_mode);
 			MLX5_SET(dctc, dctc, rae, 1);
-			MLX5_SET(dctc, dctc, atomic_mode, MLX5_ATOMIC_MODE_DCT_CX);
 		}
 		MLX5_SET(dctc, dctc, pkey_index, attr->pkey_index);
 		MLX5_SET(dctc, dctc, port, attr->port_num);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 8fb072aa8671..a73c701edd16 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -97,14 +97,15 @@ enum {
 };
 
 enum {
-	MLX5_ATOMIC_MODE_IB_COMP	= 1 << 16,
-	MLX5_ATOMIC_MODE_CX		= 2 << 16,
-	MLX5_ATOMIC_MODE_8B		= 3 << 16,
-	MLX5_ATOMIC_MODE_16B		= 4 << 16,
-	MLX5_ATOMIC_MODE_32B		= 5 << 16,
-	MLX5_ATOMIC_MODE_64B		= 6 << 16,
-	MLX5_ATOMIC_MODE_128B		= 7 << 16,
-	MLX5_ATOMIC_MODE_256B		= 8 << 16,
+	MLX5_ATOMIC_MODE_OFFSET = 16,
+	MLX5_ATOMIC_MODE_IB_COMP = 1,
+	MLX5_ATOMIC_MODE_CX = 2,
+	MLX5_ATOMIC_MODE_8B = 3,
+	MLX5_ATOMIC_MODE_16B = 4,
+	MLX5_ATOMIC_MODE_32B = 5,
+	MLX5_ATOMIC_MODE_64B = 6,
+	MLX5_ATOMIC_MODE_128B = 7,
+	MLX5_ATOMIC_MODE_256B = 8,
 };
 
 enum {
@@ -162,13 +163,11 @@ enum mlx5_dcbx_oper_mode {
 	MLX5E_DCBX_PARAM_VER_OPER_AUTO  = 0x3,
 };
 
-enum mlx5_dct_atomic_mode {
-	MLX5_ATOMIC_MODE_DCT_CX         = 2,
-};
-
 enum {
 	MLX5_ATOMIC_OPS_CMP_SWAP	= 1 << 0,
 	MLX5_ATOMIC_OPS_FETCH_ADD	= 1 << 1,
+	MLX5_ATOMIC_OPS_EXTENDED_CMP_SWAP = 1 << 2,
+	MLX5_ATOMIC_OPS_EXTENDED_FETCH_ADD = 1 << 3,
 };
 
 enum mlx5_page_fault_resume_flags {
-- 
cgit v1.2.3


From 977196b8c5b20b901acb0042579e30d7fa55790a Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 4 Oct 2018 15:27:37 -0600
Subject: PCI/P2PDMA: Add PCI p2pmem DMA mappings to adjust the bus offset

The DMA address used when mapping PCI P2P memory must be the PCI bus
address.  Thus, introduce pci_p2pmem_map_sg() to map the correct addresses
when using P2P memory.

Memory mapped in this way does not need to be unmapped and thus if we
provided pci_p2pmem_unmap_sg() it would be empty.  This breaks the expected
balance between map/unmap but was left out as an empty function doesn't
really provide any benefit.  In the future, if this call becomes necessary
it can be added without much difficulty.

For this, we assume that an SGL passed to these functions contain all P2P
memory or no P2P memory.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/pci/p2pdma.c       | 43 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/memremap.h   |  1 +
 include/linux/pci-p2pdma.h |  7 +++++++
 3 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index a8d484ddc5ad..09b3146c145c 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -194,6 +194,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 	pgmap->res.flags = pci_resource_flags(pdev, bar);
 	pgmap->ref = &pdev->p2pdma->devmap_ref;
 	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
+	pgmap->pci_p2pdma_bus_offset = pci_bus_address(pdev, bar) -
+		pci_resource_start(pdev, bar);
 
 	addr = devm_memremap_pages(&pdev->dev, pgmap);
 	if (IS_ERR(addr)) {
@@ -678,3 +680,44 @@ void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 		pdev->p2pdma->p2pmem_published = publish;
 }
 EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
+
+/**
+ * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
+ * @dev: device doing the DMA request
+ * @sg: scatter list to map
+ * @nents: elements in the scatterlist
+ * @dir: DMA direction
+ *
+ * Scatterlists mapped with this function should not be unmapped in any way.
+ *
+ * Returns the number of SG entries mapped or 0 on error.
+ */
+int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		      enum dma_data_direction dir)
+{
+	struct dev_pagemap *pgmap;
+	struct scatterlist *s;
+	phys_addr_t paddr;
+	int i;
+
+	/*
+	 * p2pdma mappings are not compatible with devices that use
+	 * dma_virt_ops. If the upper layers do the right thing
+	 * this should never happen because it will be prevented
+	 * by the check in pci_p2pdma_add_client()
+	 */
+	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_DMA_VIRT_OPS) &&
+			 dev->dma_ops == &dma_virt_ops))
+		return 0;
+
+	for_each_sg(sg, s, nents, i) {
+		pgmap = sg_page(s)->pgmap;
+		paddr = sg_phys(s);
+
+		s->dma_address = paddr - pgmap->pci_p2pdma_bus_offset;
+		sg_dma_len(s) = s->length;
+	}
+
+	return nents;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 9553370ebdad..0ac69ddf5fc4 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -125,6 +125,7 @@ struct dev_pagemap {
 	struct device *dev;
 	void *data;
 	enum memory_type type;
+	u64 pci_p2pdma_bus_offset;
 };
 
 #ifdef CONFIG_ZONE_DEVICE
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index 7bdaacfd5892..b6dfb6dc2e53 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -30,6 +30,8 @@ struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
 					 unsigned int *nents, u32 length);
 void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
 void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
+int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
+		      enum dma_data_direction dir);
 #else /* CONFIG_PCI_P2PDMA */
 static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
 		size_t size, u64 offset)
@@ -75,6 +77,11 @@ static inline void pci_p2pmem_free_sgl(struct pci_dev *pdev,
 static inline void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
 {
 }
+static inline int pci_p2pdma_map_sg(struct device *dev,
+		struct scatterlist *sg, int nents, enum dma_data_direction dir)
+{
+	return 0;
+}
 #endif /* CONFIG_PCI_P2PDMA */
 
 
-- 
cgit v1.2.3


From 2d7bc010f450d803db9fed1a25da6144ff6140d3 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 4 Oct 2018 15:27:38 -0600
Subject: PCI/P2PDMA: Introduce configfs/sysfs enable attribute helpers

Users of the P2PDMA infrastructure will typically need a way for the user
to tell the kernel to use P2P resources.  Typically this will be a simple
on/off boolean operation but sometimes it may be desirable for the user to
specify the exact device to use for the P2P operation.

Add new helpers for attributes which take a boolean or a PCI device.  Any
boolean as accepted by strtobool() turn P2P on or off (such as 'y', 'n',
'1', '0', etc).  Specifying a full PCI device name/BDF will select the
specific device.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/pci/p2pdma.c       | 82 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pci-p2pdma.h | 15 +++++++++
 2 files changed, 97 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 09b3146c145c..ae3c5b25dcc7 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -8,6 +8,8 @@
  * Copyright (c) 2018, Eideticom Inc.
  */
 
+#define pr_fmt(fmt) "pci-p2pdma: " fmt
+#include <linux/ctype.h>
 #include <linux/pci-p2pdma.h>
 #include <linux/module.h>
 #include <linux/slab.h>
@@ -721,3 +723,83 @@ int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 	return nents;
 }
 EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg);
+
+/**
+ * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
+ *		to enable p2pdma
+ * @page: contents of the value to be stored
+ * @p2p_dev: returns the PCI device that was selected to be used
+ *		(if one was specified in the stored value)
+ * @use_p2pdma: returns whether to enable p2pdma or not
+ *
+ * Parses an attribute value to decide whether to enable p2pdma.
+ * The value can select a PCI device (using it's full BDF device
+ * name) or a boolean (in any format strtobool() accepts). A false
+ * value disables p2pdma, a true value expects the caller
+ * to automatically find a compatible device and specifying a PCI device
+ * expects the caller to use the specific provider.
+ *
+ * pci_p2pdma_enable_show() should be used as the show operation for
+ * the attribute.
+ *
+ * Returns 0 on success
+ */
+int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
+			    bool *use_p2pdma)
+{
+	struct device *dev;
+
+	dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
+	if (dev) {
+		*use_p2pdma = true;
+		*p2p_dev = to_pci_dev(dev);
+
+		if (!pci_has_p2pmem(*p2p_dev)) {
+			pci_err(*p2p_dev,
+				"PCI device has no peer-to-peer memory: %s\n",
+				page);
+			pci_dev_put(*p2p_dev);
+			return -ENODEV;
+		}
+
+		return 0;
+	} else if ((page[0] == '0' || page[0] == '1') && !iscntrl(page[1])) {
+		/*
+		 * If the user enters a PCI device that  doesn't exist
+		 * like "0000:01:00.1", we don't want strtobool to think
+		 * it's a '0' when it's clearly not what the user wanted.
+		 * So we require 0's and 1's to be exactly one character.
+		 */
+	} else if (!strtobool(page, use_p2pdma)) {
+		return 0;
+	}
+
+	pr_err("No such PCI device: %.*s\n", (int)strcspn(page, "\n"), page);
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_enable_store);
+
+/**
+ * pci_p2pdma_enable_show - show a configfs/sysfs attribute indicating
+ *		whether p2pdma is enabled
+ * @page: contents of the stored value
+ * @p2p_dev: the selected p2p device (NULL if no device is selected)
+ * @use_p2pdma: whether p2pdme has been enabled
+ *
+ * Attributes that use pci_p2pdma_enable_store() should use this function
+ * to show the value of the attribute.
+ *
+ * Returns 0 on success
+ */
+ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
+			       bool use_p2pdma)
+{
+	if (!use_p2pdma)
+		return sprintf(page, "0\n");
+
+	if (!p2p_dev)
+		return sprintf(page, "1\n");
+
+	return sprintf(page, "%s\n", pci_name(p2p_dev));
+}
+EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show);
diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h
index b6dfb6dc2e53..bca9bc3e5be7 100644
--- a/include/linux/pci-p2pdma.h
+++ b/include/linux/pci-p2pdma.h
@@ -32,6 +32,10 @@ void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl);
 void pci_p2pmem_publish(struct pci_dev *pdev, bool publish);
 int pci_p2pdma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
 		      enum dma_data_direction dir);
+int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
+			    bool *use_p2pdma);
+ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
+			       bool use_p2pdma);
 #else /* CONFIG_PCI_P2PDMA */
 static inline int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar,
 		size_t size, u64 offset)
@@ -82,6 +86,17 @@ static inline int pci_p2pdma_map_sg(struct device *dev,
 {
 	return 0;
 }
+static inline int pci_p2pdma_enable_store(const char *page,
+		struct pci_dev **p2p_dev, bool *use_p2pdma)
+{
+	*use_p2pdma = false;
+	return 0;
+}
+static inline ssize_t pci_p2pdma_enable_show(char *page,
+		struct pci_dev *p2p_dev, bool use_p2pdma)
+{
+	return sprintf(page, "none\n");
+}
 #endif /* CONFIG_PCI_P2PDMA */
 
 
-- 
cgit v1.2.3


From 49d92c0dd64ae769c2d67fe27ac260ae31259ba6 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Thu, 4 Oct 2018 15:27:41 -0600
Subject: block: Add PCI P2P flag for request queue

Add QUEUE_FLAG_PCI_P2P, meaning a driver's request queue supports targeting
P2P memory.  This will be used by P2P providers and orchestrators (in
subsequent patches) to ensure block devices can support P2P memory before
submitting P2P-backed pages to submit_bio().

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6980014357d4..c32f7171899b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -699,6 +699,7 @@ struct request_queue {
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 27	/* queue supports SCSI commands */
 #define QUEUE_FLAG_QUIESCED    28	/* queue has been quiesced */
 #define QUEUE_FLAG_PREEMPT_ONLY	29	/* only process REQ_PREEMPT requests */
+#define QUEUE_FLAG_PCI_P2PDMA  30	/* device supports PCI p2p requests */
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_SAME_COMP)	|	\
@@ -731,6 +732,8 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
 #define blk_queue_scsi_passthrough(q)	\
 	test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
+#define blk_queue_pci_p2pdma(q)	\
+	test_bit(QUEUE_FLAG_PCI_P2PDMA, &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
-- 
cgit v1.2.3


From 9c0be3f6b5d776dfe3ed249862c244a4486414dc Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 13 Oct 2018 15:10:50 -0400
Subject: tracepoint: Fix tracepoint array element size mismatch

commit 46e0c9be206f ("kernel: tracepoints: add support for relative
references") changes the layout of the __tracepoint_ptrs section on
architectures supporting relative references. However, it does so
without turning struct tracepoint * const into const int elsewhere in
the tracepoint code, which has the following side-effect:

Setting mod->num_tracepoints is done in by module.c:

    mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
                                         sizeof(*mod->tracepoints_ptrs),
                                         &mod->num_tracepoints);

Basically, since sizeof(*mod->tracepoints_ptrs) is a pointer size
(rather than sizeof(int)), num_tracepoints is erroneously set to half the
size it should be on 64-bit arch. So a module with an odd number of
tracepoints misses the last tracepoint due to effect of integer
division.

So in the module going notifier:

        for_each_tracepoint_range(mod->tracepoints_ptrs,
                mod->tracepoints_ptrs + mod->num_tracepoints,
                tp_module_going_check_quiescent, NULL);

the expression (mod->tracepoints_ptrs + mod->num_tracepoints) actually
evaluates to something within the bounds of the array, but miss the
last tracepoint if the number of tracepoints is odd on 64-bit arch.

Fix this by introducing a new typedef: tracepoint_ptr_t, which
is either "const int" on architectures that have PREL32 relocations,
or "struct tracepoint * const" on architectures that does not have
this feature.

Also provide a new tracepoint_ptr_defer() static inline to
encapsulate deferencing this type rather than duplicate code and
ugly idefs within the for_each_tracepoint_range() implementation.

This issue appears in 4.19-rc kernels, and should ideally be fixed
before the end of the rc cycle.

Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Jessica Yu <jeyu@kernel.org>
Link: http://lkml.kernel.org/r/20181013191050.22389-1-mathieu.desnoyers@efficios.com
Link: http://lkml.kernel.org/r/20180704083651.24360-7-ard.biesheuvel@linaro.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: James Morris <james.morris@microsoft.com>
Cc: James Morris <jmorris@namei.org>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nicolas Pitre <nico@linaro.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Russell King <linux@armlinux.org.uk>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Thomas Garnier <thgarnie@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/module.h          |  3 ++-
 include/linux/tracepoint-defs.h |  6 ++++++
 include/linux/tracepoint.h      | 36 +++++++++++++++++++++++-------------
 kernel/tracepoint.c             | 24 ++++++++----------------
 4 files changed, 39 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index f807f15bebbe..e19ae08c7fb8 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -20,6 +20,7 @@
 #include <linux/export.h>
 #include <linux/rbtree_latch.h>
 #include <linux/error-injection.h>
+#include <linux/tracepoint-defs.h>
 
 #include <linux/percpu.h>
 #include <asm/module.h>
@@ -430,7 +431,7 @@ struct module {
 
 #ifdef CONFIG_TRACEPOINTS
 	unsigned int num_tracepoints;
-	struct tracepoint * const *tracepoints_ptrs;
+	tracepoint_ptr_t *tracepoints_ptrs;
 #endif
 #ifdef HAVE_JUMP_LABEL
 	struct jump_entry *jump_entries;
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 22c5a46e9693..49ba9cde7e4b 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -35,6 +35,12 @@ struct tracepoint {
 	struct tracepoint_func __rcu *funcs;
 };
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+typedef const int tracepoint_ptr_t;
+#else
+typedef struct tracepoint * const tracepoint_ptr_t;
+#endif
+
 struct bpf_raw_event_map {
 	struct tracepoint	*tp;
 	void			*bpf_func;
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 041f7e56a289..538ba1a58f5b 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -99,6 +99,29 @@ extern void syscall_unregfunc(void);
 #define TRACE_DEFINE_ENUM(x)
 #define TRACE_DEFINE_SIZEOF(x)
 
+#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
+static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+{
+	return offset_to_ptr(p);
+}
+
+#define __TRACEPOINT_ENTRY(name)					\
+	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
+	    "	.balign 4					\n"	\
+	    "	.long 	__tracepoint_" #name " - .		\n"	\
+	    "	.previous					\n")
+#else
+static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
+{
+	return *p;
+}
+
+#define __TRACEPOINT_ENTRY(name)					 \
+	static tracepoint_ptr_t __tracepoint_ptr_##name __used		 \
+	__attribute__((section("__tracepoints_ptrs"))) =		 \
+		&__tracepoint_##name
+#endif
+
 #endif /* _LINUX_TRACEPOINT_H */
 
 /*
@@ -253,19 +276,6 @@ extern void syscall_unregfunc(void);
 		return static_key_false(&__tracepoint_##name.key);	\
 	}
 
-#ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-#define __TRACEPOINT_ENTRY(name)					\
-	asm("	.section \"__tracepoints_ptrs\", \"a\"		\n"	\
-	    "	.balign 4					\n"	\
-	    "	.long 	__tracepoint_" #name " - .		\n"	\
-	    "	.previous					\n")
-#else
-#define __TRACEPOINT_ENTRY(name)					 \
-	static struct tracepoint * const __tracepoint_ptr_##name __used	 \
-	__attribute__((section("__tracepoints_ptrs"))) =		 \
-		&__tracepoint_##name
-#endif
-
 /*
  * We have no guarantee that gcc and the linker won't up-align the tracepoint
  * structures, so we create an array of pointers that will be used for iteration
diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index bf2c06ef9afc..a3be42304485 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -28,8 +28,8 @@
 #include <linux/sched/task.h>
 #include <linux/static_key.h>
 
-extern struct tracepoint * const __start___tracepoints_ptrs[];
-extern struct tracepoint * const __stop___tracepoints_ptrs[];
+extern tracepoint_ptr_t __start___tracepoints_ptrs[];
+extern tracepoint_ptr_t __stop___tracepoints_ptrs[];
 
 DEFINE_SRCU(tracepoint_srcu);
 EXPORT_SYMBOL_GPL(tracepoint_srcu);
@@ -371,25 +371,17 @@ int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data)
 }
 EXPORT_SYMBOL_GPL(tracepoint_probe_unregister);
 
-static void for_each_tracepoint_range(struct tracepoint * const *begin,
-		struct tracepoint * const *end,
+static void for_each_tracepoint_range(
+		tracepoint_ptr_t *begin, tracepoint_ptr_t *end,
 		void (*fct)(struct tracepoint *tp, void *priv),
 		void *priv)
 {
+	tracepoint_ptr_t *iter;
+
 	if (!begin)
 		return;
-
-	if (IS_ENABLED(CONFIG_HAVE_ARCH_PREL32_RELOCATIONS)) {
-		const int *iter;
-
-		for (iter = (const int *)begin; iter < (const int *)end; iter++)
-			fct(offset_to_ptr(iter), priv);
-	} else {
-		struct tracepoint * const *iter;
-
-		for (iter = begin; iter < end; iter++)
-			fct(*iter, priv);
-	}
+	for (iter = begin; iter < end; iter++)
+		fct(tracepoint_ptr_deref(iter), priv);
 }
 
 #ifdef CONFIG_MODULES
-- 
cgit v1.2.3


From f366d322aea782cf786aa821d5accdc1609f9e10 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 6 Sep 2018 10:19:30 +0100
Subject: UAPI: ndctl: Remove use of PAGE_SIZE

The macro PAGE_SIZE isn't valid outside of the kernel, so it should not
appear in UAPI headers.

Furthermore, the actual machine page size could theoretically change from
an application's point of view if it's running in a container that gets
migrated to another machine (say 4K/ppc64 to 64K/ppc64).

Fixes: f2ba5a5baecf ("libnvdimm, namespace: make min namespace size 4K")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/ndctl.h      | 22 ++++++++++++++++++++++
 include/uapi/linux/ndctl.h |  4 ----
 2 files changed, 22 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/ndctl.h

(limited to 'include/linux')

diff --git a/include/linux/ndctl.h b/include/linux/ndctl.h
new file mode 100644
index 000000000000..cd5a293ce3ae
--- /dev/null
+++ b/include/linux/ndctl.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#ifndef _LINUX_NDCTL_H
+#define _LINUX_NDCTL_H
+
+#include <uapi/linux/ndctl.h>
+
+enum {
+	ND_MIN_NAMESPACE_SIZE = PAGE_SIZE,
+};
+
+#endif /* _LINUX_NDCTL_H */
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 2f2c43d633c5..f57c9e434d2d 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -202,10 +202,6 @@ enum nd_driver_flags {
 	ND_DRIVER_DAX_PMEM	  = 1 << ND_DEVICE_DAX_PMEM,
 };
 
-enum {
-	ND_MIN_NAMESPACE_SIZE = PAGE_SIZE,
-};
-
 enum ars_masks {
 	ARS_STATUS_MASK = 0x0000FFFF,
 	ARS_EXT_STATUS_SHIFT = 16,
-- 
cgit v1.2.3


From b8aee82250b7d90a32b11ba208656f52dbaca342 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Tue, 2 Oct 2018 22:57:24 +0000
Subject: net/mlx5: E-Switch, Get counters for offloaded flows from callers

There's no real reason for the e-switch logic to manage the creation of
counters for offloaded flows. The API already has the directive for the
caller to denote they want to attach a counter to the created flow.
As such, we go and move the management of flow counters to the mlx5e
tc offload logic. This also lets us remove an inelegant interface where
the FS layer had to provide a way to retrieve a counter from a flow rule.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 32 ++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  1 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 20 ++------------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c  | 15 ----------
 include/linux/mlx5/fs.h                            |  1 -
 5 files changed, 33 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index acf7a847f561..8a27c0813a18 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -61,6 +61,7 @@ struct mlx5_nic_flow_attr {
 	u32 hairpin_tirn;
 	u8 match_level;
 	struct mlx5_flow_table	*hairpin_ft;
+	struct mlx5_fc		*counter;
 };
 
 #define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
@@ -721,6 +722,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 		dest[dest_ix].counter = counter;
 		dest_ix++;
+		attr->counter = counter;
 	}
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
@@ -797,7 +799,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5_nic_flow_attr *attr = flow->nic_attr;
 	struct mlx5_fc *counter = NULL;
 
-	counter = mlx5_flow_rule_counter(flow->rule[0]);
+	counter = attr->counter;
 	mlx5_del_flow_rules(flow->rule[0]);
 	mlx5_fc_destroy(priv->mdev, counter);
 
@@ -833,6 +835,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct net_device *out_dev, *encap_dev = NULL;
 	struct mlx5_flow_handle *rule = NULL;
+	struct mlx5_fc *counter = NULL;
 	struct mlx5e_rep_priv *rpriv;
 	struct mlx5e_priv *out_priv;
 	int err;
@@ -868,6 +871,16 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		}
 	}
 
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		counter = mlx5_fc_create(esw->dev, true);
+		if (IS_ERR(counter)) {
+			rule = ERR_CAST(counter);
+			goto err_create_counter;
+		}
+
+		attr->counter = counter;
+	}
+
 	/* we get here if (1) there's no error (rule being null) or when
 	 * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
 	 */
@@ -888,6 +901,8 @@ err_fwd_rule:
 	mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
 	rule = flow->rule[1];
 err_add_rule:
+	mlx5_fc_destroy(esw->dev, counter);
+err_create_counter:
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		mlx5e_detach_mod_hdr(priv, flow);
 err_mod_hdr:
@@ -921,6 +936,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
 		mlx5e_detach_mod_hdr(priv, flow);
+
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+		mlx5_fc_destroy(esw->dev, attr->counter);
 }
 
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -992,6 +1010,14 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 	}
 }
 
+static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+{
+	if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+		return flow->esw_attr->counter;
+	else
+		return flow->nic_attr->counter;
+}
+
 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 {
 	struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
@@ -1017,7 +1043,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
 			continue;
 		list_for_each_entry(flow, &e->flows, encap) {
 			if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
-				counter = mlx5_flow_rule_counter(flow->rule[0]);
+				counter = mlx5e_tc_get_counter(flow);
 				mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
 				if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
 					neigh_used = true;
@@ -3019,7 +3045,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
 	if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
 		return 0;
 
-	counter = mlx5_flow_rule_counter(flow->rule[0]);
+	counter = mlx5e_tc_get_counter(flow);
 	if (!counter)
 		return 0;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index dfc642de4e6d..c1b627577003 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -266,6 +266,7 @@ struct mlx5_esw_flow_attr {
 	u32	encap_id;
 	u32	mod_hdr_id;
 	u8	match_level;
+	struct mlx5_fc *counter;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 0741683f7d70..a2f2d726c99b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -51,7 +51,6 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
 	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_table *ft = NULL;
-	struct mlx5_fc *counter = NULL;
 	struct mlx5_flow_handle *rule;
 	int j, i = 0;
 	void *misc;
@@ -91,13 +90,8 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 		}
 	}
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
-		counter = mlx5_fc_create(esw->dev, true);
-		if (IS_ERR(counter)) {
-			rule = ERR_CAST(counter);
-			goto err_counter_alloc;
-		}
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest[i].counter = counter;
+		dest[i].counter = attr->counter;
 		i++;
 	}
 
@@ -132,15 +126,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 
 	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
-		goto err_add_rule;
+		goto out;
 	else
 		esw->offloads.num_flows++;
 
-	return rule;
-
-err_add_rule:
-	mlx5_fc_destroy(esw->dev, counter);
-err_counter_alloc:
+out:
 	return rule;
 }
 
@@ -200,11 +190,7 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_handle *rule,
 				struct mlx5_esw_flow_attr *attr)
 {
-	struct mlx5_fc *counter = NULL;
-
-	counter = mlx5_flow_rule_counter(rule);
 	mlx5_del_flow_rules(rule);
-	mlx5_fc_destroy(esw->dev, counter);
 	esw->offloads.num_flows--;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 8d340e5181f8..9e18e6c0a8b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1474,21 +1474,6 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	return handle;
 }
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
-{
-	struct mlx5_flow_rule *dst;
-	struct fs_fte *fte;
-
-	fs_get_obj(fte, handle->rule[0]->node.parent);
-
-	fs_for_each_dst(dst, fte) {
-		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
-			return dst->dest_attr.counter;
-	}
-
-	return NULL;
-}
-
 static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
 {
 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index b1c026f1c8ba..74d0ea146c9a 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -186,7 +186,6 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler,
 				 struct mlx5_flow_destination *new_dest,
 				 struct mlx5_flow_destination *old_dest);
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handler);
 struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging);
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter);
 void mlx5_fc_query_cached(struct mlx5_fc *counter,
-- 
cgit v1.2.3


From 171c7625bef999848ee6032c6dde96e7330c4d15 Mon Sep 17 00:00:00 2001
From: Mark Bloch <markb@mellanox.com>
Date: Wed, 3 Oct 2018 00:03:35 +0000
Subject: net/mlx5: Use flow counter IDs and not the wrapping cache object

Currently, when a flow rule is created using the FS core layer, the caller
has to pass the entire flow counter object and not just the counter HW
handle (ID). This requires both the FS core and the caller to have
knowledge about the inner implementation of the FS layer flow counters
cache and limits the possible users.

Move to use the counter ID across the place when dealing with flows.

Doing this decoupling, now can we privatize the inner implementation
of the flow counters.

Signed-off-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                            |  7 +++++--
 drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c              |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c            |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c             |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c            | 10 ++--------
 drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c        |  6 ++++++
 include/linux/mlx5/fs.h                                      |  3 ++-
 9 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5d9b7f62a0ba..5ced0cc46ba1 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3320,15 +3320,18 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 	}
 
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+		struct mlx5_ib_mcounters *mcounters;
+
 		err = flow_counters_set_data(flow_act.counters, ucmd);
 		if (err)
 			goto free;
 
+		mcounters = to_mcounters(flow_act.counters);
 		handler->ibcounters = flow_act.counters;
 		dest_arr[dest_num].type =
 			MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest_arr[dest_num].counter =
-			to_mcounters(flow_act.counters)->hw_cntrs_hndl;
+		dest_arr[dest_num].counter_id =
+			mlx5_fc_id(mcounters->hw_cntrs_hndl);
 		dest_num++;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index e83dda441a81..d027ce00c8ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -252,10 +252,10 @@ TRACE_EVENT(mlx5_fs_add_rule,
 			   memcpy(__entry->destination,
 				  &rule->dest_attr,
 				  sizeof(__entry->destination));
-			   if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
-			       rule->dest_attr.counter)
+			   if (rule->dest_attr.type &
+			       MLX5_FLOW_DESTINATION_TYPE_COUNTER)
 				__entry->counter_id =
-				rule->dest_attr.counter->id;
+					rule->dest_attr.counter_id;
 	    ),
 	    TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
 		      __entry->rule, __entry->fte, __entry->index,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 8a27c0813a18..5ce87f54852d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -720,7 +720,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 			goto err_fc_create;
 		}
 		dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest[dest_ix].counter = counter;
+		dest[dest_ix].counter_id = mlx5_fc_id(counter);
 		dest_ix++;
 		attr->counter = counter;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index e1d47fa5ab83..9c893d7d273e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1198,7 +1198,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 	if (counter) {
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		drop_ctr_dst.counter = counter;
+		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
 		dst = &drop_ctr_dst;
 		dest_num++;
 	}
@@ -1285,7 +1285,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	if (counter) {
 		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
 		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		drop_ctr_dst.counter = counter;
+		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
 		dst = &drop_ctr_dst;
 		dest_num++;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index a2f2d726c99b..39932dce15cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -91,7 +91,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 	}
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest[i].counter = attr->counter;
+		dest[i].counter_id = mlx5_fc_id(attr->counter);
 		i++;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index dc8d7f6b52c2..08a891f9aade 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -419,7 +419,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 				continue;
 
 			MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
-				 dst->dest_attr.counter->id);
+				 dst->dest_attr.counter_id);
 			in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
 			list_size++;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 9e18e6c0a8b3..cdcbf9d0ae6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1474,14 +1474,8 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
 	return handle;
 }
 
-static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
+static bool counter_is_valid(u32 action)
 {
-	if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
-		return !counter;
-
-	if (!counter)
-		return false;
-
 	return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
 			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
 }
@@ -1491,7 +1485,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 			  struct mlx5_flow_table *ft)
 {
 	if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
-		return counter_is_valid(dest->counter, action);
+		return counter_is_valid(action);
 
 	if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
 		return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 09206c4acd9a..1329bc5b7969 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -258,6 +258,12 @@ err_out:
 }
 EXPORT_SYMBOL(mlx5_fc_create);
 
+u32 mlx5_fc_id(struct mlx5_fc *counter)
+{
+	return counter->id;
+}
+EXPORT_SYMBOL(mlx5_fc_id);
+
 void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
 {
 	struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 74d0ea146c9a..a5fc62184195 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -92,7 +92,7 @@ struct mlx5_flow_destination {
 		u32			tir_num;
 		u32			ft_num;
 		struct mlx5_flow_table	*ft;
-		struct mlx5_fc		*counter;
+		u32			counter_id;
 		struct {
 			u16		num;
 			u16		vhca_id;
@@ -192,6 +192,7 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter,
 			  u64 *bytes, u64 *packets, u64 *lastuse);
 int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
 		  u64 *packets, u64 *bytes);
+u32 mlx5_fc_id(struct mlx5_fc *counter);
 
 int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
 int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn);
-- 
cgit v1.2.3


From b9aa0ba17af5afa13605eb6ea91f1974da97a2e2 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Thu, 31 May 2018 11:50:23 +0300
Subject: net/mlx5: Add cap bits for multi fdb encap

If set, the firmware supports creating of flow tables with encap
enabled while VFs are configured, if we already created one
(restriction still applies on the first creation).

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 15e36198f85f..963611820006 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -584,7 +584,9 @@ struct mlx5_ifc_flow_table_nic_cap_bits {
 struct mlx5_ifc_flow_table_eswitch_cap_bits {
 	u8      reserved_at_0[0x1c];
 	u8      fdb_multi_path_to_table[0x1];
-	u8      reserved_at_1d[0x1e3];
+	u8      reserved_at_1d[0x1];
+	u8      multi_fdb_encap[0x1];
+	u8      reserved_at_1e[0x1e1];
 
 	struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_esw_fdb;
 
-- 
cgit v1.2.3


From 328edb499f99126946845ece477c9c1afe8631af Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Tue, 3 Jul 2018 11:13:00 +0300
Subject: net/mlx5: Split FDB fast path prio to multiple namespaces

Towards supporting multi-chains and priorities, split the FDB fast path
to multiple namespaces (sub namespaces), each with multiple priorities.

This patch adds a new flow steering type, FS_TYPE_PRIO_CHAINS, which is
like current FS_TYPE_PRIO, but may contain only namespaces, and those
will be in parallel to one another in terms of managing of the flow
tables connections inside them. Meaning, while searching for the next
or previous flow table to connect for a new table inside such namespace
we skip the parallel namespaces in the same level under the
FS_TYPE_PRIO_CHAINS prio we originated from.

We use this new type for splitting the fast path prio into multiple
parallel namespaces, each containing normal prios.
The prios inside them (and their tables) will be connected to one
another, but not from one parallel namespace to another, instead the
last prio in each namespace will be connected to the next prio in
the containing FDB namespace, which is the slow path prio.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Acked-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h |  7 ++
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 88 ++++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 13 ++++
 include/linux/mlx5/fs.h                           |  2 +
 5 files changed, 101 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 9c893d7d273e..d004957328f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -263,7 +263,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
 	esw_debug(dev, "Create FDB log_max_size(%d)\n",
 		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
-	root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	root_ns = mlx5_get_fdb_sub_ns(dev, 0);
 	if (!root_ns) {
 		esw_warn(dev, "Failed to get FDB flow namespace\n");
 		return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index c1b627577003..1698a322a7c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -59,6 +59,9 @@
 #define mlx5_esw_has_fwd_fdb(dev) \
 	MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
 
+#define FDB_MAX_CHAIN 3
+#define FDB_MAX_PRIO 16
+
 struct vport_ingress {
 	struct mlx5_flow_table *acl;
 	struct mlx5_flow_group *allow_untagged_spoofchk_grp;
@@ -319,6 +322,10 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
 static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
 static inline int  mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
 static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+
+#define FDB_MAX_CHAIN 1
+#define FDB_MAX_PRIO 1
+
 #endif /* CONFIG_MLX5_ESWITCH */
 
 #endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index cdcbf9d0ae6c..7eb6d58733ac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -40,6 +40,7 @@
 #include "diag/fs_tracepoint.h"
 #include "accel/ipsec.h"
 #include "fpga/ipsec.h"
+#include "eswitch.h"
 
 #define INIT_TREE_NODE_ARRAY_SIZE(...)	(sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
 					 sizeof(struct init_tree_node))
@@ -713,7 +714,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
 	struct fs_node *iter = list_entry(start, struct fs_node, list);
 	struct mlx5_flow_table *ft = NULL;
 
-	if (!root)
+	if (!root || root->type == FS_TYPE_PRIO_CHAINS)
 		return NULL;
 
 	list_for_each_advance_continue(iter, &root->children, reverse) {
@@ -1973,6 +1974,18 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
 			       fg->id);
 }
 
+struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
+						int n)
+{
+	struct mlx5_flow_steering *steering = dev->priv.steering;
+
+	if (!steering || !steering->fdb_sub_ns)
+		return NULL;
+
+	return steering->fdb_sub_ns[n];
+}
+EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
+
 struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 						    enum mlx5_flow_namespace_type type)
 {
@@ -2051,8 +2064,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
 	}
 }
 
-static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
-				      unsigned int prio, int num_levels)
+static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
+				       unsigned int prio,
+				       int num_levels,
+				       enum fs_node_type type)
 {
 	struct fs_prio *fs_prio;
 
@@ -2060,7 +2075,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 	if (!fs_prio)
 		return ERR_PTR(-ENOMEM);
 
-	fs_prio->node.type = FS_TYPE_PRIO;
+	fs_prio->node.type = type;
 	tree_init_node(&fs_prio->node, NULL, del_sw_prio);
 	tree_add_node(&fs_prio->node, &ns->node);
 	fs_prio->num_levels = num_levels;
@@ -2070,6 +2085,19 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
 	return fs_prio;
 }
 
+static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
+					      unsigned int prio,
+					      int num_levels)
+{
+	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+				      unsigned int prio, int num_levels)
+{
+	return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
+}
+
 static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
 						     *ns)
 {
@@ -2374,6 +2402,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 	cleanup_egress_acls_root_ns(dev);
 	cleanup_ingress_acls_root_ns(dev);
 	cleanup_root_ns(steering->fdb_root_ns);
+	steering->fdb_root_ns = NULL;
+	kfree(steering->fdb_sub_ns);
+	steering->fdb_sub_ns = NULL;
 	cleanup_root_ns(steering->sniffer_rx_root_ns);
 	cleanup_root_ns(steering->sniffer_tx_root_ns);
 	cleanup_root_ns(steering->egress_root_ns);
@@ -2419,27 +2450,64 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
 
 static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
 {
-	struct fs_prio *prio;
+	struct mlx5_flow_namespace *ns;
+	struct fs_prio *maj_prio;
+	struct fs_prio *min_prio;
+	int levels;
+	int chain;
+	int prio;
+	int err;
 
 	steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
 	if (!steering->fdb_root_ns)
 		return -ENOMEM;
 
-	prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2);
-	if (IS_ERR(prio))
+	steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) *
+				       FDB_MAX_CHAIN + 1, GFP_KERNEL);
+	if (!steering->fdb_sub_ns)
+		return -ENOMEM;
+
+	levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
+	maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0,
+					  levels);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
 		goto out_err;
+	}
 
-	prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
-	if (IS_ERR(prio))
+	for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) {
+		ns = fs_create_namespace(maj_prio);
+		if (IS_ERR(ns)) {
+			err = PTR_ERR(ns);
+			goto out_err;
+		}
+
+		for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) {
+			min_prio = fs_create_prio(ns, prio, 2);
+			if (IS_ERR(min_prio)) {
+				err = PTR_ERR(min_prio);
+				goto out_err;
+			}
+		}
+
+		steering->fdb_sub_ns[chain] = ns;
+	}
+
+	maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+	if (IS_ERR(maj_prio)) {
+		err = PTR_ERR(maj_prio);
 		goto out_err;
+	}
 
 	set_prio_attrs(steering->fdb_root_ns);
 	return 0;
 
 out_err:
 	cleanup_root_ns(steering->fdb_root_ns);
+	kfree(steering->fdb_sub_ns);
+	steering->fdb_sub_ns = NULL;
 	steering->fdb_root_ns = NULL;
-	return PTR_ERR(prio);
+	return err;
 }
 
 static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index a06f83c0c2b6..b51ad217da32 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -38,9 +38,21 @@
 #include <linux/rhashtable.h>
 #include <linux/llist.h>
 
+/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only,
+ * and those are in parallel to one another when going over them to connect
+ * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one
+ * parallel namespace will not automatically connect to the first flow table
+ * found in any prio in any next namespace, but skip the entire containing
+ * TYPE_PRIO_CHAINS prio.
+ *
+ * This is used to implement tc chains, each chain of prios is a different
+ * namespace inside a containing TYPE_PRIO_CHAINS prio.
+ */
+
 enum fs_node_type {
 	FS_TYPE_NAMESPACE,
 	FS_TYPE_PRIO,
+	FS_TYPE_PRIO_CHAINS,
 	FS_TYPE_FLOW_TABLE,
 	FS_TYPE_FLOW_GROUP,
 	FS_TYPE_FLOW_ENTRY,
@@ -73,6 +85,7 @@ struct mlx5_flow_steering {
 	struct kmem_cache               *ftes_cache;
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_root_namespace *fdb_root_ns;
+	struct mlx5_flow_namespace	**fdb_sub_ns;
 	struct mlx5_flow_root_namespace **esw_egress_root_ns;
 	struct mlx5_flow_root_namespace **esw_ingress_root_ns;
 	struct mlx5_flow_root_namespace	*sniffer_tx_root_ns;
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index a5fc62184195..f8d00872c7d3 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -101,6 +101,8 @@ struct mlx5_flow_destination {
 	};
 };
 
+struct mlx5_flow_namespace *
+mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, int n);
 struct mlx5_flow_namespace *
 mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
 			enum mlx5_flow_namespace_type type);
-- 
cgit v1.2.3


From d5634fee245f9e92787e3a34ef621fc12b2cbf16 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Thu, 20 Sep 2018 12:17:48 +0200
Subject: net/mlx5: Add a no-append flow insertion mode

If no-append flag is set, we will add a new FTE, instead of appending
the actions of the inserted rule when the same match already exists.

While here, move the has_flow_tag boolean indicator to be a flag too.

This patch doesn't change any functionality.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanmox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                    |  6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c    |  9 ++++++++-
 include/linux/mlx5/fs.h                              | 14 +++++++++++---
 5 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 5ced0cc46ba1..af32899bb72a 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2793,7 +2793,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
 			return -EINVAL;
 
 		action->flow_tag = ib_spec->flow_tag.tag_id;
-		action->has_flow_tag = true;
+		action->flags |= FLOW_ACT_HAS_TAG;
 		break;
 	case IB_FLOW_SPEC_ACTION_DROP:
 		if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
@@ -2886,7 +2886,7 @@ is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev,
 		return egress ? VALID_SPEC_INVALID : VALID_SPEC_NA;
 
 	return is_crypto && is_ipsec &&
-		(!egress || (!is_drop && !flow_act->has_flow_tag)) ?
+		(!egress || (!is_drop && !(flow_act->flags & FLOW_ACT_HAS_TAG))) ?
 		VALID_SPEC_VALID : VALID_SPEC_INVALID;
 }
 
@@ -3349,7 +3349,7 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 					MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	}
 
-	if (flow_act.has_flow_tag &&
+	if ((flow_act.flags & FLOW_ACT_HAS_TAG)  &&
 	    (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 	     flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
 		mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 6c04e11f9a05..a9c68b7859b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -684,9 +684,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	struct mlx5_flow_destination dest[2] = {};
 	struct mlx5_flow_act flow_act = {
 		.action = attr->action,
-		.has_flow_tag = true,
 		.flow_tag = attr->flow_tag,
 		.reformat_id = 0,
+		.flags    = FLOW_ACT_HAS_TAG,
 	};
 	struct mlx5_fc *counter = NULL;
 	bool table_created = false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 5645a4facad2..28aa8c968a80 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -650,7 +650,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
 	    (match_criteria_enable &
 	     ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
 	    (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
-	     flow_act->has_flow_tag)
+	     (flow_act->flags & FLOW_ACT_HAS_TAG))
 		return false;
 
 	return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 7eb6d58733ac..67ba4c975d81 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1428,7 +1428,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
 		return -EEXIST;
 	}
 
-	if (flow_act->has_flow_tag &&
+	if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
 	    fte->action.flow_tag != flow_act->flow_tag) {
 		mlx5_core_warn(get_dev(&fte->node),
 			       "FTE flow tag %u already exists with different flow tag %u\n",
@@ -1628,6 +1628,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
 
 search_again_locked:
 	version = matched_fgs_get_version(match_head);
+	if (flow_act->flags & FLOW_ACT_NO_APPEND)
+		goto skip_search;
 	/* Try to find a fg that already contains a matching fte */
 	list_for_each_entry(iter, match_head, list) {
 		struct fs_fte *fte_tmp;
@@ -1644,6 +1646,11 @@ search_again_locked:
 		return rule;
 	}
 
+skip_search:
+	/* No group with matching fte found, or we skipped the search.
+	 * Try to add a new fte to any matching fg.
+	 */
+
 	/* Check the ft version, for case that new flow group
 	 * was added while the fgs weren't locked
 	 */
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index f8d00872c7d3..5660f07d3be0 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -158,20 +158,28 @@ struct mlx5_fs_vlan {
 
 #define MLX5_FS_VLAN_DEPTH	2
 
+enum {
+	FLOW_ACT_HAS_TAG   = BIT(0),
+	FLOW_ACT_NO_APPEND = BIT(1),
+};
+
 struct mlx5_flow_act {
 	u32 action;
-	bool has_flow_tag;
 	u32 flow_tag;
 	u32 reformat_id;
 	u32 modify_id;
 	uintptr_t esp_id;
+	u32 flags;
 	struct mlx5_fs_vlan vlan[MLX5_FS_VLAN_DEPTH];
 	struct ib_counters *counters;
 };
 
 #define MLX5_DECLARE_FLOW_ACT(name) \
-	struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
-				     MLX5_FS_DEFAULT_FLOW_TAG, 0, 0}
+	struct mlx5_flow_act name = { .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\
+				      .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, \
+				      .reformat_id = 0, \
+				      .modify_id = 0, \
+				      .flags =  0, }
 
 /* Single destination per rule.
  * Group ID is implied by the match criteria.
-- 
cgit v1.2.3


From 82385b0d2d2504aee51aa3fb40ebfb03603f64c3 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Wed, 17 Oct 2018 15:01:37 +0200
Subject: net: skbuff.h: Mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases
where we are expecting to fall through.

Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 119d092c6b13..0ba687454267 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3505,13 +3505,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
 #define __it(x, op) (x -= sizeof(u##op))
 #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
 	case 32: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 24: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 16: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case  8: diffs |= __it_diff(a, b, 64);
 		break;
 	case 28: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 20: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case 12: diffs |= __it_diff(a, b, 64);
+		 /* fall through */
 	case  4: diffs |= __it_diff(a, b, 32);
 		break;
 	}
-- 
cgit v1.2.3


From eb66ae030829605d61fbef1909ce310e29f78821 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 12 Oct 2018 15:22:59 -0700
Subject: mremap: properly flush TLB before releasing the page

Jann Horn points out that our TLB flushing was subtly wrong for the
mremap() case.  What makes mremap() special is that we don't follow the
usual "add page to list of pages to be freed, then flush tlb, and then
free pages".  No, mremap() obviously just _moves_ the page from one page
table location to another.

That matters, because mremap() thus doesn't directly control the
lifetime of the moved page with a freelist: instead, the lifetime of the
page is controlled by the page table locking, that serializes access to
the entry.

As a result, we need to flush the TLB not just before releasing the lock
for the source location (to avoid any concurrent accesses to the entry),
but also before we release the destination page table lock (to avoid the
TLB being flushed after somebody else has already done something to that
page).

This also makes the whole "need_flush" logic unnecessary, since we now
always end up flushing the TLB for every valid entry.

Reported-and-tested-by: Jann Horn <jannh@google.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Tested-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/huge_mm.h |  2 +-
 mm/huge_memory.c        | 10 ++++------
 mm/mremap.c             | 30 +++++++++++++-----------------
 3 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 99c19b06d9a4..fdcb45999b26 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -43,7 +43,7 @@ extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned char *vec);
 extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 			 unsigned long new_addr, unsigned long old_end,
-			 pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush);
+			 pmd_t *old_pmd, pmd_t *new_pmd);
 extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 			unsigned long addr, pgprot_t newprot,
 			int prot_numa);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 58269f8ba7c4..deed97fba979 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1780,7 +1780,7 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
 
 bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		  unsigned long new_addr, unsigned long old_end,
-		  pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush)
+		  pmd_t *old_pmd, pmd_t *new_pmd)
 {
 	spinlock_t *old_ptl, *new_ptl;
 	pmd_t pmd;
@@ -1811,7 +1811,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		if (new_ptl != old_ptl)
 			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
 		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
-		if (pmd_present(pmd) && pmd_dirty(pmd))
+		if (pmd_present(pmd))
 			force_flush = true;
 		VM_BUG_ON(!pmd_none(*new_pmd));
 
@@ -1822,12 +1822,10 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
 		}
 		pmd = move_soft_dirty_pmd(pmd);
 		set_pmd_at(mm, new_addr, new_pmd, pmd);
-		if (new_ptl != old_ptl)
-			spin_unlock(new_ptl);
 		if (force_flush)
 			flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE);
-		else
-			*need_flush = true;
+		if (new_ptl != old_ptl)
+			spin_unlock(new_ptl);
 		spin_unlock(old_ptl);
 		return true;
 	}
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..a9617e72e6b7 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -115,7 +115,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
 static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 		unsigned long old_addr, unsigned long old_end,
 		struct vm_area_struct *new_vma, pmd_t *new_pmd,
-		unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
+		unsigned long new_addr, bool need_rmap_locks)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pte_t *old_pte, *new_pte, pte;
@@ -163,15 +163,17 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 
 		pte = ptep_get_and_clear(mm, old_addr, old_pte);
 		/*
-		 * If we are remapping a dirty PTE, make sure
+		 * If we are remapping a valid PTE, make sure
 		 * to flush TLB before we drop the PTL for the
-		 * old PTE or we may race with page_mkclean().
+		 * PTE.
 		 *
-		 * This check has to be done after we removed the
-		 * old PTE from page tables or another thread may
-		 * dirty it after the check and before the removal.
+		 * NOTE! Both old and new PTL matter: the old one
+		 * for racing with page_mkclean(), the new one to
+		 * make sure the physical page stays valid until
+		 * the TLB entry for the old mapping has been
+		 * flushed.
 		 */
-		if (pte_present(pte) && pte_dirty(pte))
+		if (pte_present(pte))
 			force_flush = true;
 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
 		pte = move_soft_dirty_pte(pte);
@@ -179,13 +181,11 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	}
 
 	arch_leave_lazy_mmu_mode();
+	if (force_flush)
+		flush_tlb_range(vma, old_end - len, old_end);
 	if (new_ptl != old_ptl)
 		spin_unlock(new_ptl);
 	pte_unmap(new_pte - 1);
-	if (force_flush)
-		flush_tlb_range(vma, old_end - len, old_end);
-	else
-		*need_flush = true;
 	pte_unmap_unlock(old_pte - 1, old_ptl);
 	if (need_rmap_locks)
 		drop_rmap_locks(vma);
@@ -198,7 +198,6 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 {
 	unsigned long extent, next, old_end;
 	pmd_t *old_pmd, *new_pmd;
-	bool need_flush = false;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
@@ -229,8 +228,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 				if (need_rmap_locks)
 					take_rmap_locks(vma);
 				moved = move_huge_pmd(vma, old_addr, new_addr,
-						    old_end, old_pmd, new_pmd,
-						    &need_flush);
+						    old_end, old_pmd, new_pmd);
 				if (need_rmap_locks)
 					drop_rmap_locks(vma);
 				if (moved)
@@ -246,10 +244,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 		if (extent > next - new_addr)
 			extent = next - new_addr;
 		move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
-			  new_pmd, new_addr, need_rmap_locks, &need_flush);
+			  new_pmd, new_addr, need_rmap_locks);
 	}
-	if (need_flush)
-		flush_tlb_range(vma, old_end-len, old_addr);
 
 	mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
 
-- 
cgit v1.2.3


From e5089c2c73a157997eb6ecca982adea9d0673075 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Oct 2018 16:38:16 +0200
Subject: PM / Domains: Document flags for genpd

The current documented description of the GENPD_FLAG_* flags, are too
simplified, so let's extend them.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_domain.h | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 776c546d581a..3b5d7280e52e 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -17,11 +17,36 @@
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
 
-/* Defines used for the flags field in the struct generic_pm_domain */
-#define GENPD_FLAG_PM_CLK	 (1U << 0) /* PM domain uses PM clk */
-#define GENPD_FLAG_IRQ_SAFE	 (1U << 1) /* PM domain operates in atomic */
-#define GENPD_FLAG_ALWAYS_ON	 (1U << 2) /* PM domain is always powered on */
-#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) /* Keep devices active if wakeup */
+/*
+ * Flags to control the behaviour of a genpd.
+ *
+ * These flags may be set in the struct generic_pm_domain's flags field by a
+ * genpd backend driver. The flags must be set before it calls pm_genpd_init(),
+ * which initializes a genpd.
+ *
+ * GENPD_FLAG_PM_CLK:		Instructs genpd to use the PM clk framework,
+ *				while powering on/off attached devices.
+ *
+ * GENPD_FLAG_IRQ_SAFE:		This informs genpd that its backend callbacks,
+ *				->power_on|off(), doesn't sleep. Hence, these
+ *				can be invoked from within atomic context, which
+ *				enables genpd to power on/off the PM domain,
+ *				even when pm_runtime_is_irq_safe() returns true,
+ *				for any of its attached devices. Note that, a
+ *				genpd having this flag set, requires its
+ *				masterdomains to also have it set.
+ *
+ * GENPD_FLAG_ALWAYS_ON:	Instructs genpd to always keep the PM domain
+ *				powered on.
+ *
+ * GENPD_FLAG_ACTIVE_WAKEUP:	Instructs genpd to keep the PM domain powered
+ *				on, in case any of its attached devices is used
+ *				in the wakeup path to serve system wakeups.
+ */
+#define GENPD_FLAG_PM_CLK	 (1U << 0)
+#define GENPD_FLAG_IRQ_SAFE	 (1U << 1)
+#define GENPD_FLAG_ALWAYS_ON	 (1U << 2)
+#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3)
 
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
-- 
cgit v1.2.3


From f349b0a3e1f0d184374936f1b2a49352f8a4b1c8 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 9 Oct 2018 22:13:42 +0200
Subject: dm: add dm_table_device_name()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a shortcut for dm_device_name(dm_table_get_md(t)).

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 drivers/md/dm-table.c         | 6 ++++++
 include/linux/device-mapper.h | 1 +
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 618edfc3846f..49ab0cbef739 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2059,6 +2059,12 @@ struct mapped_device *dm_table_get_md(struct dm_table *t)
 }
 EXPORT_SYMBOL(dm_table_get_md);
 
+const char *dm_table_device_name(struct dm_table *t)
+{
+	return dm_device_name(t->md);
+}
+EXPORT_SYMBOL_GPL(dm_table_device_name);
+
 void dm_table_run_md_queue_async(struct dm_table *t)
 {
 	struct mapped_device *md;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 8d937754aa0c..d7bee8669f10 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -489,6 +489,7 @@ sector_t dm_table_get_size(struct dm_table *t);
 unsigned int dm_table_get_num_targets(struct dm_table *t);
 fmode_t dm_table_get_mode(struct dm_table *t);
 struct mapped_device *dm_table_get_md(struct dm_table *t);
+const char *dm_table_device_name(struct dm_table *t);
 
 /*
  * Trigger an event.
-- 
cgit v1.2.3


From 4972e6fa3a04032830bc3d6bb343d08ab3546773 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 12 Sep 2018 15:36:41 +0300
Subject: net/mlx5: Refactor fragmented buffer struct fields and init flow

Take struct mlx5_frag_buf out of mlx5_frag_buf_ctrl, as it is not
needed to manage and control the datapath of the fragmented buffers API.

struct mlx5_frag_buf contains control info to manage the allocation
and de-allocation of the fragmented buffer.
Its fields are not relevant for datapath, so here I take them out of the
struct mlx5_frag_buf_ctrl, except for the fragments array itself.

In addition, modified mlx5_fill_fbc to initialise the frags pointers
as well. This implies that the buffer must be allocated before the
function is called.

A set of type-specific *_get_byte_size() functions are replaced by
a generic one.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cq.c              |  31 +++----
 drivers/infiniband/hw/mlx5/mlx5_ib.h         |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/wq.c | 120 +++++++++++----------------
 include/linux/mlx5/driver.h                  |  22 ++---
 4 files changed, 69 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 088205d7f1a1..cca1820802b8 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -393,7 +393,7 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 
 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
 {
-	mlx5_frag_buf_free(dev->mdev, &buf->fbc.frag_buf);
+	mlx5_frag_buf_free(dev->mdev, &buf->frag_buf);
 }
 
 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
@@ -728,16 +728,11 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
 			     int nent,
 			     int cqe_size)
 {
-	struct mlx5_frag_buf_ctrl *c = &buf->fbc;
-	struct mlx5_frag_buf *frag_buf = &c->frag_buf;
-	u32 cqc_buff[MLX5_ST_SZ_DW(cqc)] = {0};
+	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
+	u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
+	u8 log_wq_sz     = ilog2(cqe_size);
 	int err;
 
-	MLX5_SET(cqc, cqc_buff, log_cq_size, ilog2(cqe_size));
-	MLX5_SET(cqc, cqc_buff, cqe_sz, (cqe_size == 128) ? 1 : 0);
-
-	mlx5_core_init_cq_frag_buf(&buf->fbc, cqc_buff);
-
 	err = mlx5_frag_buf_alloc_node(dev->mdev,
 				       nent * cqe_size,
 				       frag_buf,
@@ -745,6 +740,8 @@ static int alloc_cq_frag_buf(struct mlx5_ib_dev *dev,
 	if (err)
 		return err;
 
+	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
+
 	buf->cqe_size = cqe_size;
 	buf->nent = nent;
 
@@ -934,7 +931,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 
 	*inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
 		 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
-		 cq->buf.fbc.frag_buf.npages;
+		 cq->buf.frag_buf.npages;
 	*cqb = kvzalloc(*inlen, GFP_KERNEL);
 	if (!*cqb) {
 		err = -ENOMEM;
@@ -942,11 +939,11 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	}
 
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas);
-	mlx5_fill_page_frag_array(&cq->buf.fbc.frag_buf, pas);
+	mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
 
 	cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context);
 	MLX5_SET(cqc, cqc, log_page_size,
-		 cq->buf.fbc.frag_buf.page_shift -
+		 cq->buf.frag_buf.page_shift -
 		 MLX5_ADAPTER_PAGE_SHIFT);
 
 	*index = dev->mdev->priv.uar->index;
@@ -1365,11 +1362,10 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		cqe_size = 64;
 		err = resize_kernel(dev, cq, entries, cqe_size);
 		if (!err) {
-			struct mlx5_frag_buf_ctrl *c;
+			struct mlx5_frag_buf *frag_buf = &cq->resize_buf->frag_buf;
 
-			c = &cq->resize_buf->fbc;
-			npas = c->frag_buf.npages;
-			page_shift = c->frag_buf.page_shift;
+			npas = frag_buf->npages;
+			page_shift = frag_buf->page_shift;
 		}
 	}
 
@@ -1390,8 +1386,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
 				     pas, 0);
 	else
-		mlx5_fill_page_frag_array(&cq->resize_buf->fbc.frag_buf,
-					  pas);
+		mlx5_fill_page_frag_array(&cq->resize_buf->frag_buf, pas);
 
 	MLX5_SET(modify_cq_in, in,
 		 modify_field_select_resize_field_select.resize_field_select.resize_field_select,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 320d4dfe8c2f..289c18db2611 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -435,6 +435,7 @@ struct mlx5_ib_qp {
 
 struct mlx5_ib_cq_buf {
 	struct mlx5_frag_buf_ctrl fbc;
+	struct mlx5_frag_buf    frag_buf;
 	struct ib_umem		*umem;
 	int			cqe_size;
 	int			nent;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 68e7f8df2a6d..9007e91ad53f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -54,54 +54,37 @@ u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
 	return (u32)wq->fbc.sz_m1 + 1;
 }
 
-static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
+static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
 {
-	return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
-{
-	return mlx5_wq_cyc_get_byte_size(&wq->rq) +
-	       mlx5_wq_cyc_get_byte_size(&wq->sq);
-}
-
-static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
-{
-	return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
-{
-	return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride;
+	return ((u32)1 << log_sz) << log_stride;
 }
 
 int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		       void *wqc, struct mlx5_wq_cyc *wq,
 		       struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+	u8 log_wq_sz     = MLX5_GET(wq, wqc, log_wq_sz);
 	struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
 	int err;
 
-	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
-		      MLX5_GET(wq, wqc, log_wq_sz),
-		      fbc);
-	wq->sz    = wq->fbc.sz_m1 + 1;
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	fbc->frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
+	wq->sz = mlx5_wq_cyc_get_size(wq);
 
 	wq_ctrl->mdev = mdev;
 
@@ -113,46 +96,19 @@ err_db_free:
 	return err;
 }
 
-static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
-				 struct mlx5_wq_qp *qp)
-{
-	struct mlx5_frag_buf_ctrl *sq_fbc;
-	struct mlx5_frag_buf *rqb, *sqb;
-
-	rqb  = &qp->rq.fbc.frag_buf;
-	*rqb = *buf;
-	rqb->size   = mlx5_wq_cyc_get_byte_size(&qp->rq);
-	rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
-
-	sq_fbc = &qp->sq.fbc;
-	sqb    = &sq_fbc->frag_buf;
-	*sqb   = *buf;
-	sqb->size   = mlx5_wq_cyc_get_byte_size(&qp->sq);
-	sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
-	sqb->frags += rqb->npages; /* first part is for the rq */
-	if (sq_fbc->strides_offset)
-		sqb->frags--;
-}
-
 int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *qpc, struct mlx5_wq_qp *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
-	u16 sq_strides_offset;
-	u32 rq_pg_remainder;
-	int err;
+	u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+	u8 log_rq_sz     = MLX5_GET(qpc, qpc, log_rq_size);
+	u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB);
+	u8 log_sq_sz     = MLX5_GET(qpc, qpc, log_sq_size);
 
-	mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
-		      MLX5_GET(qpc, qpc, log_rq_size),
-		      &wq->rq.fbc);
+	u32 rq_byte_size;
+	int err;
 
-	rq_pg_remainder   = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE;
-	sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB;
 
-	mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
-			     MLX5_GET(qpc, qpc, log_sq_size),
-			     sq_strides_offset,
-			     &wq->sq.fbc);
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -160,14 +116,32 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+	err = mlx5_frag_buf_alloc_node(mdev,
+				       wq_get_byte_sz(log_rq_sz, log_rq_stride) +
+				       wq_get_byte_sz(log_sq_sz, log_sq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc);
+
+	rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride);
+
+	if (rq_byte_size < PAGE_SIZE) {
+		/* SQ starts within the same page of the RQ */
+		u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB;
+
+		mlx5_init_fbc_offset(wq_ctrl->buf.frags,
+				     log_sq_stride, log_sq_sz, sq_strides_offset,
+				     &wq->sq.fbc);
+	} else {
+		u16 rq_npages = rq_byte_size >> PAGE_SHIFT;
+
+		mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages,
+			      log_sq_stride, log_sq_sz, &wq->sq.fbc);
+	}
 
 	wq->rq.db  = &wq_ctrl->db.db[MLX5_RCV_DBR];
 	wq->sq.db  = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -186,17 +160,19 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
 		     struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6;
+	u8 log_wq_sz     = MLX5_GET(cqc, cqc, log_cq_size);
 	int err;
 
-	mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf,
 				       param->buf_numa_node);
 	if (err) {
@@ -205,8 +181,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		goto err_db_free;
 	}
 
-	wq->fbc.frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc);
 
 	wq_ctrl->mdev = mdev;
 
@@ -222,30 +197,29 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      void *wqc, struct mlx5_wq_ll *wq,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
+	u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+	u8 log_wq_sz     = MLX5_GET(wq, wqc, log_wq_sz);
 	struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
 	struct mlx5_wqe_srq_next_seg *next_seg;
 	int err;
 	int i;
 
-	mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
-		      MLX5_GET(wq, wqc, log_wq_sz),
-		      fbc);
-
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+	wq->db  = wq_ctrl->db.db;
+
+	err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
 				       &wq_ctrl->buf, param->buf_numa_node);
 	if (err) {
 		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
 		goto err_db_free;
 	}
 
-	wq->fbc.frag_buf = wq_ctrl->buf;
-	wq->db  = wq_ctrl->db.db;
+	mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
 
 	for (i = 0; i < fbc->sz_m1; i++) {
 		next_seg = mlx5_wq_ll_get_wqe(wq, i);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 94ffd02af7cd..e10f61a1f77d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -357,7 +357,7 @@ struct mlx5_frag_buf {
 };
 
 struct mlx5_frag_buf_ctrl {
-	struct mlx5_frag_buf	frag_buf;
+	struct mlx5_buf_list   *frags;
 	u32			sz_m1;
 	u16			frag_sz_m1;
 	u16			strides_offset;
@@ -994,10 +994,12 @@ static inline u32 mlx5_base_mkey(const u32 key)
 	return key & 0xffffff00u;
 }
 
-static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
+static inline void mlx5_init_fbc_offset(struct mlx5_buf_list *frags,
+					u8 log_stride, u8 log_sz,
 					u16 strides_offset,
 					struct mlx5_frag_buf_ctrl *fbc)
 {
+	fbc->frags      = frags;
 	fbc->log_stride = log_stride;
 	fbc->log_sz     = log_sz;
 	fbc->sz_m1	= (1 << fbc->log_sz) - 1;
@@ -1006,18 +1008,11 @@ static inline void mlx5_fill_fbc_offset(u8 log_stride, u8 log_sz,
 	fbc->strides_offset = strides_offset;
 }
 
-static inline void mlx5_fill_fbc(u8 log_stride, u8 log_sz,
+static inline void mlx5_init_fbc(struct mlx5_buf_list *frags,
+				 u8 log_stride, u8 log_sz,
 				 struct mlx5_frag_buf_ctrl *fbc)
 {
-	mlx5_fill_fbc_offset(log_stride, log_sz, 0, fbc);
-}
-
-static inline void mlx5_core_init_cq_frag_buf(struct mlx5_frag_buf_ctrl *fbc,
-					      void *cqc)
-{
-	mlx5_fill_fbc(6 + MLX5_GET(cqc, cqc, cqe_sz),
-		      MLX5_GET(cqc, cqc, log_cq_size),
-		      fbc);
+	mlx5_init_fbc_offset(frags, log_stride, log_sz, 0, fbc);
 }
 
 static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
@@ -1028,8 +1023,7 @@ static inline void *mlx5_frag_buf_get_wqe(struct mlx5_frag_buf_ctrl *fbc,
 	ix  += fbc->strides_offset;
 	frag = ix >> fbc->log_frag_strides;
 
-	return fbc->frag_buf.frags[frag].buf +
-		((fbc->frag_sz_m1 & ix) << fbc->log_stride);
+	return fbc->frags[frag].buf + ((fbc->frag_sz_m1 & ix) << fbc->log_stride);
 }
 
 int mlx5_cmd_init(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3


From 4b5b9c7d972e8a7b1e7691c7c921ec0d6dec33b9 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Tue, 9 Oct 2018 14:16:43 +0300
Subject: net/mlx5: Add FEC fields to Port Phy Link Mode (PPLM) reg

Added FEC related fields to PPLM layout.
These fields are needed to set and query FEC policy
for different link speeds.

Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 39 ++++++++++++++++++++++++++++-----------
 2 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index e10f61a1f77d..696ed3f7f894 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -133,6 +133,7 @@ enum {
 	MLX5_REG_PVLC		 = 0x500f,
 	MLX5_REG_PCMR		 = 0x5041,
 	MLX5_REG_PMLP		 = 0x5002,
+	MLX5_REG_PPLM		 = 0x5023,
 	MLX5_REG_PCAM		 = 0x507f,
 	MLX5_REG_NODE_DESC	 = 0x6001,
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 963611820006..47b09a742ae5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7828,20 +7828,34 @@ struct mlx5_ifc_pplr_reg_bits {
 
 struct mlx5_ifc_pplm_reg_bits {
 	u8         reserved_at_0[0x8];
-	u8         local_port[0x8];
-	u8         reserved_at_10[0x10];
+	u8	   local_port[0x8];
+	u8	   reserved_at_10[0x10];
 
-	u8         reserved_at_20[0x20];
+	u8	   reserved_at_20[0x20];
 
-	u8         port_profile_mode[0x8];
-	u8         static_port_profile[0x8];
-	u8         active_port_profile[0x8];
-	u8         reserved_at_58[0x8];
+	u8	   port_profile_mode[0x8];
+	u8	   static_port_profile[0x8];
+	u8	   active_port_profile[0x8];
+	u8	   reserved_at_58[0x8];
 
-	u8         retransmission_active[0x8];
-	u8         fec_mode_active[0x18];
+	u8	   retransmission_active[0x8];
+	u8	   fec_mode_active[0x18];
 
-	u8         reserved_at_80[0x20];
+	u8	   rs_fec_correction_bypass_cap[0x4];
+	u8	   reserved_at_84[0x8];
+	u8	   fec_override_cap_56g[0x4];
+	u8	   fec_override_cap_100g[0x4];
+	u8	   fec_override_cap_50g[0x4];
+	u8	   fec_override_cap_25g[0x4];
+	u8	   fec_override_cap_10g_40g[0x4];
+
+	u8	   rs_fec_correction_bypass_admin[0x4];
+	u8	   reserved_at_a4[0x8];
+	u8	   fec_override_admin_56g[0x4];
+	u8	   fec_override_admin_100g[0x4];
+	u8	   fec_override_admin_50g[0x4];
+	u8	   fec_override_admin_25g[0x4];
+	u8	   fec_override_admin_10g_40g[0x4];
 };
 
 struct mlx5_ifc_ppcnt_reg_bits {
@@ -8137,7 +8151,10 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
 struct mlx5_ifc_pcam_regs_5000_to_507f_bits {
 	u8         port_access_reg_cap_mask_127_to_96[0x20];
 	u8         port_access_reg_cap_mask_95_to_64[0x20];
-	u8         port_access_reg_cap_mask_63_to_32[0x20];
+
+	u8         port_access_reg_cap_mask_63_to_36[0x1c];
+	u8         pplm[0x1];
+	u8         port_access_reg_cap_mask_34_to_32[0x3];
 
 	u8         port_access_reg_cap_mask_31_to_13[0x13];
 	u8         pbmc[0x1];
-- 
cgit v1.2.3


From 67daf1186086ad4b2ec09b8078b835936977d06a Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayag@mellanox.com>
Date: Sun, 30 Sep 2018 09:58:08 +0300
Subject: net/mlx5: Added "per_lane_error_counters" cap bit to PCAM

Added "Per lane raw errors" capability bit in
Ports Capabilities Mask (PCAM) enhanced features
layout.

This bit determines if the fields "phy_raw_errors_laneX"
in "Physical Layer statistical" counters group are supported.

Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 47b09a742ae5..dbff9ff28f2c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -8140,7 +8140,8 @@ struct mlx5_ifc_pcam_enhanced_features_bits {
 	u8         rx_icrc_encapsulated_counter[0x1];
 	u8	   reserved_at_6e[0x8];
 	u8         pfcc_mask[0x1];
-	u8         reserved_at_77[0x4];
+	u8         reserved_at_77[0x3];
+	u8         per_lane_error_counters[0x1];
 	u8         rx_buffer_fullness_counters[0x1];
 	u8         ptys_connector_type[0x1];
 	u8         reserved_at_7d[0x1];
-- 
cgit v1.2.3


From 01a14bda11add9dcd4a59200f13834d634559935 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Thu, 18 Oct 2018 21:45:18 -0400
Subject: locking/lockdep: Make global debug_locks* variables read-mostly

Make the frequently used lockdep global variable debug_locks read-mostly.
As debug_locks_silent is sometime used together with debug_locks,
it is also made read-mostly so that they can be close together.

With false cacheline sharing, cacheline contention problem can happen
depending on what get put into the same cacheline as debug_locks.

Signed-off-by: Waiman Long <longman@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Link: http://lkml.kernel.org/r/1539913518-15598-2-git-send-email-longman@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/debug_locks.h | 4 ++--
 lib/debug_locks.c           | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index 120225e9a366..257ab3c92cb8 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -8,8 +8,8 @@
 
 struct task_struct;
 
-extern int debug_locks;
-extern int debug_locks_silent;
+extern int debug_locks __read_mostly;
+extern int debug_locks_silent __read_mostly;
 
 
 static inline int __debug_locks_off(void)
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 124fdf238b3d..ce51749cc145 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -21,7 +21,7 @@
  * that would just muddy the log. So we report the first one and
  * shut up after that.
  */
-int debug_locks = 1;
+int debug_locks __read_mostly = 1;
 EXPORT_SYMBOL_GPL(debug_locks);
 
 /*
@@ -29,7 +29,7 @@ EXPORT_SYMBOL_GPL(debug_locks);
  * 'silent failure': nothing is printed to the console when
  * a locking bug is detected.
  */
-int debug_locks_silent;
+int debug_locks_silent __read_mostly;
 EXPORT_SYMBOL_GPL(debug_locks_silent);
 
 /*
-- 
cgit v1.2.3


From b65125c6acf38388d3342b37c18c3b6cc97eeb75 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 12 Apr 2018 14:49:23 +0200
Subject: swiotlb: mark is_swiotlb_buffer static

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swiotlb.h | 1 -
 kernel/dma/swiotlb.c    | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 965be92c33b5..7ef541ce8f34 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -121,7 +121,6 @@ static inline unsigned int swiotlb_max_segment(void) { return 0; }
 #endif
 
 extern void swiotlb_print_info(void);
-extern int is_swiotlb_buffer(phys_addr_t paddr);
 extern void swiotlb_set_max_segment(unsigned int);
 
 extern const struct dma_map_ops swiotlb_dma_ops;
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 9062b14bc7f4..26d3af52956f 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -429,7 +429,7 @@ void __init swiotlb_exit(void)
 	max_segment = 0;
 }
 
-int is_swiotlb_buffer(phys_addr_t paddr)
+static int is_swiotlb_buffer(phys_addr_t paddr)
 {
 	return paddr >= io_tlb_start && paddr < io_tlb_end;
 }
-- 
cgit v1.2.3


From dff8d6c1ed584de65aac40494d3e7468c50980c3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 16 Aug 2018 15:30:39 +0300
Subject: swiotlb: remove the overflow buffer

Like all other dma mapping drivers just return an error code instead
of an actual memory buffer.  The reason for the overflow buffer was
that at the time swiotlb was invented there was no way to check for
dma mapping errors, but this has long been fixed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/arm64/mm/dma-mapping.c       |  2 +-
 arch/powerpc/kernel/dma-swiotlb.c |  4 +--
 include/linux/dma-direct.h        |  2 ++
 include/linux/swiotlb.h           |  3 --
 kernel/dma/direct.c               |  2 --
 kernel/dma/swiotlb.c              | 59 ++-------------------------------------
 6 files changed, 8 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 072c51fb07d7..8d91b927e09e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -324,7 +324,7 @@ static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
 static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
 {
 	if (swiotlb)
-		return swiotlb_dma_mapping_error(hwdev, addr);
+		return dma_direct_mapping_error(hwdev, addr);
 	return 0;
 }
 
diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c
index 88f3963ca30f..5fc335f4d9cd 100644
--- a/arch/powerpc/kernel/dma-swiotlb.c
+++ b/arch/powerpc/kernel/dma-swiotlb.c
@@ -11,7 +11,7 @@
  *
  */
 
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
 #include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/of_platform.h>
@@ -59,7 +59,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = {
 	.sync_single_for_device = swiotlb_sync_single_for_device,
 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
 	.sync_sg_for_device = swiotlb_sync_sg_for_device,
-	.mapping_error = swiotlb_dma_mapping_error,
+	.mapping_error = dma_direct_mapping_error,
 	.get_required_mask = swiotlb_powerpc_get_required,
 };
 
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index fbca184ff5a0..bd73e7a91410 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -5,6 +5,8 @@
 #include <linux/dma-mapping.h>
 #include <linux/mem_encrypt.h>
 
+#define DIRECT_MAPPING_ERROR		0
+
 #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
 #include <asm/dma-direct.h>
 #else
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 7ef541ce8f34..f847c1b265c4 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -106,9 +106,6 @@ extern void
 swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 			   int nelems, enum dma_data_direction dir);
 
-extern int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
-
 extern int
 swiotlb_dma_supported(struct device *hwdev, u64 mask);
 
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 87a6bc2a96c0..f14c376937e5 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -14,8 +14,6 @@
 #include <linux/pfn.h>
 #include <linux/set_memory.h>
 
-#define DIRECT_MAPPING_ERROR		0
-
 /*
  * Most architectures use ZONE_DMA for the first 16 Megabytes, but
  * some use it for entirely different regions:
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 69bf305ee5f8..11dbcd80b4a6 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -72,13 +72,6 @@ static phys_addr_t io_tlb_start, io_tlb_end;
  */
 static unsigned long io_tlb_nslabs;
 
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-static phys_addr_t io_tlb_overflow_buffer;
-
 /*
  * This is a free list describing the number of free entries available from
  * each index
@@ -126,7 +119,6 @@ setup_io_tlb_npages(char *str)
 	return 0;
 }
 early_param("swiotlb", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
 
 unsigned long swiotlb_nr_tbl(void)
 {
@@ -194,16 +186,10 @@ void __init swiotlb_update_mem_attributes(void)
 	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
 	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
-
-	vaddr = phys_to_virt(io_tlb_overflow_buffer);
-	bytes = PAGE_ALIGN(io_tlb_overflow);
-	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
 }
 
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 {
-	void *v_overflow_buffer;
 	unsigned long i, bytes;
 
 	bytes = nslabs << IO_TLB_SHIFT;
@@ -212,17 +198,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	io_tlb_start = __pa(tlb);
 	io_tlb_end = io_tlb_start + bytes;
 
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	v_overflow_buffer = memblock_virt_alloc_low_nopanic(
-						PAGE_ALIGN(io_tlb_overflow),
-						PAGE_SIZE);
-	if (!v_overflow_buffer)
-		return -ENOMEM;
-
-	io_tlb_overflow_buffer = __pa(v_overflow_buffer);
-
 	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
@@ -330,7 +305,6 @@ int
 swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 {
 	unsigned long i, bytes;
-	unsigned char *v_overflow_buffer;
 
 	bytes = nslabs << IO_TLB_SHIFT;
 
@@ -341,19 +315,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
 
-	/*
-	 * Get the overflow emergency buffer
-	 */
-	v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-						     get_order(io_tlb_overflow));
-	if (!v_overflow_buffer)
-		goto cleanup2;
-
-	set_memory_decrypted((unsigned long)v_overflow_buffer,
-			io_tlb_overflow >> PAGE_SHIFT);
-	memset(v_overflow_buffer, 0, io_tlb_overflow);
-	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
-
 	/*
 	 * Allocate and initialize the free list array.  This array is used
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
@@ -390,10 +351,6 @@ cleanup4:
 	                                                 sizeof(int)));
 	io_tlb_list = NULL;
 cleanup3:
-	free_pages((unsigned long)v_overflow_buffer,
-		   get_order(io_tlb_overflow));
-	io_tlb_overflow_buffer = 0;
-cleanup2:
 	io_tlb_end = 0;
 	io_tlb_start = 0;
 	io_tlb_nslabs = 0;
@@ -407,8 +364,6 @@ void __init swiotlb_exit(void)
 		return;
 
 	if (late_alloc) {
-		free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
-			   get_order(io_tlb_overflow));
 		free_pages((unsigned long)io_tlb_orig_addr,
 			   get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
 		free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -416,8 +371,6 @@ void __init swiotlb_exit(void)
 		free_pages((unsigned long)phys_to_virt(io_tlb_start),
 			   get_order(io_tlb_nslabs << IO_TLB_SHIFT));
 	} else {
-		memblock_free_late(io_tlb_overflow_buffer,
-				   PAGE_ALIGN(io_tlb_overflow));
 		memblock_free_late(__pa(io_tlb_orig_addr),
 				   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
 		memblock_free_late(__pa(io_tlb_list),
@@ -790,7 +743,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	/* Oh well, have to allocate and map a bounce buffer. */
 	map = map_single(dev, phys, size, dir, attrs);
 	if (map == SWIOTLB_MAP_ERROR)
-		return __phys_to_dma(dev, io_tlb_overflow_buffer);
+		return DIRECT_MAPPING_ERROR;
 
 	dev_addr = __phys_to_dma(dev, map);
 
@@ -801,7 +754,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
 	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
 
-	return __phys_to_dma(dev, io_tlb_overflow_buffer);
+	return DIRECT_MAPPING_ERROR;
 }
 
 /*
@@ -985,12 +938,6 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
 }
 
-int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
-	return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
-}
-
 /*
  * Return whether the given device DMA address mask can be supported
  * properly.  For example, if your device can only drive the low 24-bits
@@ -1033,7 +980,7 @@ void swiotlb_free(struct device *dev, size_t size, void *vaddr,
 }
 
 const struct dma_map_ops swiotlb_dma_ops = {
-	.mapping_error		= swiotlb_dma_mapping_error,
+	.mapping_error		= dma_direct_mapping_error,
 	.alloc			= swiotlb_alloc,
 	.free			= swiotlb_free,
 	.sync_single_for_cpu	= swiotlb_sync_single_for_cpu,
-- 
cgit v1.2.3


From fe0640eb30b7da261ae84d252ed9ed3c7e68dfd8 Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Mon, 15 Oct 2018 10:22:21 -0700
Subject: compiler.h: update definition of unreachable()

Fixes the objtool warning seen with Clang:
arch/x86/mm/fault.o: warning: objtool: no_context()+0x220: unreachable
instruction

Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
mutually exclusive")

Josh noted that the fallback definition was meant to work around a
pre-gcc-4.6 bug. GCC still needs to work around
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365, so compiler-gcc.h
defines its own version of unreachable().  Clang and ICC can use this
shared definition.

Link: https://github.com/ClangBuiltLinux/linux/issues/204
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Suggested-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index b5fb034fa6fa..2e0b6322588b 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -124,7 +124,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
 # define ASM_UNREACHABLE
 #endif
 #ifndef unreachable
-# define unreachable() do { annotate_reachable(); do { } while (1); } while (0)
+# define unreachable() do {		\
+	annotate_unreachable();		\
+	__builtin_unreachable();	\
+} while (0)
 #endif
 
 /*
-- 
cgit v1.2.3


From 1ff2fea5e30ca15752777441ecb64a169fe22e9e Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Mon, 15 Oct 2018 14:24:27 -0700
Subject: compiler-gcc: remove comment about gcc 4.5 from unreachable()

Remove the comment about being unable to detect __builtin_unreachable.
__builtin_unreachable was implemented in the GCC 4.5 timeframe. The
kernel's minimum supported version of GCC is 4.6 since commit
cafa0010cd51 ("Raise the minimum required gcc version to 4.6"). Commit
cb984d101b30 ("compiler-gcc: integrate the various compiler-gcc[345].h
files") shows that unreachable() had different guards based on GCC
version.

Suggested-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler-gcc.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index cfac027e1625..2010493e1040 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -96,10 +96,6 @@
  * Mark a position in code as unreachable.  This can be used to
  * suppress control flow warnings after asm blocks that transfer
  * control elsewhere.
- *
- * Early snapshots of gcc 4.5 don't support this and we can't detect
- * this in the preprocessor, but we can live with this because they're
- * unreleased.  Really, we need to have autoconf for the kernel.
  */
 #define unreachable() \
 	do {					\
-- 
cgit v1.2.3


From fafadcd16595c1df82df399f62421718ec9bf70a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 30 Sep 2018 16:13:33 -0700
Subject: swiotlb: don't dip into swiotlb pool for coherent allocations

All architectures that support swiotlb also have a zone that backs up
these less than full addressing allocations (usually ZONE_DMA32).

Because of that it is rather pointless to fall back to the global swiotlb
buffer if the normal dma direct allocation failed - the only thing this
will do is to eat up bounce buffers that would be more useful to serve
streaming mappings.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/arm64/mm/dma-mapping.c |   6 +--
 include/linux/swiotlb.h     |   5 ---
 kernel/dma/swiotlb.c        | 105 +-------------------------------------------
 3 files changed, 5 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 8d91b927e09e..eee6cfcfde9e 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -112,7 +112,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 		return addr;
 	}
 
-	ptr = swiotlb_alloc(dev, size, dma_handle, flags, attrs);
+	ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
 	if (!ptr)
 		goto no_mem;
 
@@ -133,7 +133,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 	return coherent_ptr;
 
 no_map:
-	swiotlb_free(dev, size, ptr, *dma_handle, attrs);
+	dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
 no_mem:
 	return NULL;
 }
@@ -151,7 +151,7 @@ static void __dma_free(struct device *dev, size_t size,
 			return;
 		vunmap(vaddr);
 	}
-	swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
+	dma_direct_free_pages(dev, size, swiotlb_addr, dma_handle, attrs);
 }
 
 static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index f847c1b265c4..a387b59640a4 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -67,11 +67,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev,
 
 /* Accessory functions. */
 
-void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
-		gfp_t flags, unsigned long attrs);
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_addr, unsigned long attrs);
-
 extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 				   unsigned long offset, size_t size,
 				   enum dma_data_direction dir,
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 57507b18caa4..1a01b0ac0a5e 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -622,78 +622,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 	}
 }
 
-static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
-		size_t size)
-{
-	u64 mask = DMA_BIT_MASK(32);
-
-	if (dev && dev->coherent_dma_mask)
-		mask = dev->coherent_dma_mask;
-	return addr + size - 1 <= mask;
-}
-
-static void *
-swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		unsigned long attrs)
-{
-	phys_addr_t phys_addr;
-
-	if (swiotlb_force == SWIOTLB_NO_FORCE)
-		goto out_warn;
-
-	phys_addr = swiotlb_tbl_map_single(dev,
-			__phys_to_dma(dev, io_tlb_start),
-			0, size, DMA_FROM_DEVICE, attrs);
-	if (phys_addr == SWIOTLB_MAP_ERROR)
-		goto out_warn;
-
-	*dma_handle = __phys_to_dma(dev, phys_addr);
-	if (!dma_coherent_ok(dev, *dma_handle, size))
-		goto out_unmap;
-
-	memset(phys_to_virt(phys_addr), 0, size);
-	return phys_to_virt(phys_addr);
-
-out_unmap:
-	dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-		(unsigned long long)dev->coherent_dma_mask,
-		(unsigned long long)*dma_handle);
-
-	/*
-	 * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-	 */
-	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-			DMA_ATTR_SKIP_CPU_SYNC);
-out_warn:
-	if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
-		dev_warn(dev,
-			"swiotlb: coherent allocation failed, size=%zu\n",
-			size);
-		dump_stack();
-	}
-	return NULL;
-}
-
-static bool swiotlb_free_buffer(struct device *dev, size_t size,
-		dma_addr_t dma_addr)
-{
-	phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
-
-	WARN_ON_ONCE(irqs_disabled());
-
-	if (!is_swiotlb_buffer(phys_addr))
-		return false;
-
-	/*
-	 * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-	 * DMA_ATTR_SKIP_CPU_SYNC is optional.
-	 */
-	swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-				 DMA_ATTR_SKIP_CPU_SYNC);
-	return true;
-}
-
 static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys,
 		size_t size, enum dma_data_direction dir, unsigned long attrs)
 {
@@ -926,39 +854,10 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask)
 	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 
-void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-		gfp_t gfp, unsigned long attrs)
-{
-	void *vaddr;
-
-	/* temporary workaround: */
-	if (gfp & __GFP_NOWARN)
-		attrs |= DMA_ATTR_NO_WARN;
-
-	/*
-	 * Don't print a warning when the first allocation attempt fails.
-	 * swiotlb_alloc_coherent() will print a warning when the DMA memory
-	 * allocation ultimately failed.
-	 */
-	gfp |= __GFP_NOWARN;
-
-	vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
-	if (!vaddr)
-		vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
-	return vaddr;
-}
-
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-		dma_addr_t dma_addr, unsigned long attrs)
-{
-	if (!swiotlb_free_buffer(dev, size, dma_addr))
-		dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
 const struct dma_map_ops swiotlb_dma_ops = {
 	.mapping_error		= dma_direct_mapping_error,
-	.alloc			= swiotlb_alloc,
-	.free			= swiotlb_free,
+	.alloc			= dma_direct_alloc,
+	.free			= dma_direct_free,
 	.sync_single_for_cpu	= swiotlb_sync_single_for_cpu,
 	.sync_single_for_device	= swiotlb_sync_single_for_device,
 	.sync_sg_for_cpu	= swiotlb_sync_sg_for_cpu,
-- 
cgit v1.2.3


From cdf6b11daa77d4b55ddf0530842a551cc5562a93 Mon Sep 17 00:00:00 2001
From: Ben Whitten <ben.whitten@gmail.com>
Date: Fri, 19 Oct 2018 10:33:50 +0100
Subject: regmap: Add regmap_noinc_write API

The regmap API had a noinc_read function added for instances where devices
supported returning data from an internal FIFO in a single read.

This commit adds the noinc_write variant to allow writing to a non
incrementing register, this is used in devices such as the sx1301 for
loading firmware.

Signed-off-by: Ben Whitten <ben.whitten@lairdtech.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/base/regmap/internal.h |  3 ++
 drivers/base/regmap/regmap.c   | 77 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h         | 19 +++++++++++
 3 files changed, 99 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h
index a6bf34d6394e..404f123cbe55 100644
--- a/drivers/base/regmap/internal.h
+++ b/drivers/base/regmap/internal.h
@@ -94,11 +94,13 @@ struct regmap {
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	bool (*writeable_noinc_reg)(struct device *dev, unsigned int reg);
 	bool (*readable_noinc_reg)(struct device *dev, unsigned int reg);
 	const struct regmap_access_table *wr_table;
 	const struct regmap_access_table *rd_table;
 	const struct regmap_access_table *volatile_table;
 	const struct regmap_access_table *precious_table;
+	const struct regmap_access_table *wr_noinc_table;
 	const struct regmap_access_table *rd_noinc_table;
 
 	int (*reg_read)(void *context, unsigned int reg, unsigned int *val);
@@ -183,6 +185,7 @@ bool regmap_writeable(struct regmap *map, unsigned int reg);
 bool regmap_readable(struct regmap *map, unsigned int reg);
 bool regmap_volatile(struct regmap *map, unsigned int reg);
 bool regmap_precious(struct regmap *map, unsigned int reg);
+bool regmap_writeable_noinc(struct regmap *map, unsigned int reg);
 bool regmap_readable_noinc(struct regmap *map, unsigned int reg);
 
 int _regmap_write(struct regmap *map, unsigned int reg,
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 0360a90ad6b6..d4f1fc642600 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -168,6 +168,17 @@ bool regmap_precious(struct regmap *map, unsigned int reg)
 	return false;
 }
 
+bool regmap_writeable_noinc(struct regmap *map, unsigned int reg)
+{
+	if (map->writeable_noinc_reg)
+		return map->writeable_noinc_reg(map->dev, reg);
+
+	if (map->wr_noinc_table)
+		return regmap_check_range_table(map, reg, map->wr_noinc_table);
+
+	return true;
+}
+
 bool regmap_readable_noinc(struct regmap *map, unsigned int reg)
 {
 	if (map->readable_noinc_reg)
@@ -777,11 +788,13 @@ struct regmap *__regmap_init(struct device *dev,
 	map->rd_table = config->rd_table;
 	map->volatile_table = config->volatile_table;
 	map->precious_table = config->precious_table;
+	map->wr_noinc_table = config->wr_noinc_table;
 	map->rd_noinc_table = config->rd_noinc_table;
 	map->writeable_reg = config->writeable_reg;
 	map->readable_reg = config->readable_reg;
 	map->volatile_reg = config->volatile_reg;
 	map->precious_reg = config->precious_reg;
+	map->writeable_noinc_reg = config->writeable_noinc_reg;
 	map->readable_noinc_reg = config->readable_noinc_reg;
 	map->cache_type = config->cache_type;
 
@@ -1298,6 +1311,7 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config)
 	map->readable_reg = config->readable_reg;
 	map->volatile_reg = config->volatile_reg;
 	map->precious_reg = config->precious_reg;
+	map->writeable_noinc_reg = config->writeable_noinc_reg;
 	map->readable_noinc_reg = config->readable_noinc_reg;
 	map->cache_type = config->cache_type;
 
@@ -1897,6 +1911,69 @@ int regmap_raw_write(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_raw_write);
 
+/**
+ * regmap_noinc_write(): Write data from a register without incrementing the
+ *			register number
+ *
+ * @map: Register map to write to
+ * @reg: Register to write to
+ * @val: Pointer to data buffer
+ * @val_len: Length of output buffer in bytes.
+ *
+ * The regmap API usually assumes that bulk bus write operations will write a
+ * range of registers. Some devices have certain registers for which a write
+ * operation can write to an internal FIFO.
+ *
+ * The target register must be volatile but registers after it can be
+ * completely unrelated cacheable registers.
+ *
+ * This will attempt multiple writes as required to write val_len bytes.
+ *
+ * A value of zero will be returned on success, a negative errno will be
+ * returned in error cases.
+ */
+int regmap_noinc_write(struct regmap *map, unsigned int reg,
+		      const void *val, size_t val_len)
+{
+	size_t write_len;
+	int ret;
+
+	if (!map->bus)
+		return -EINVAL;
+	if (!map->bus->write)
+		return -ENOTSUPP;
+	if (val_len % map->format.val_bytes)
+		return -EINVAL;
+	if (!IS_ALIGNED(reg, map->reg_stride))
+		return -EINVAL;
+	if (val_len == 0)
+		return -EINVAL;
+
+	map->lock(map->lock_arg);
+
+	if (!regmap_volatile(map, reg) || !regmap_writeable_noinc(map, reg)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	while (val_len) {
+		if (map->max_raw_write && map->max_raw_write < val_len)
+			write_len = map->max_raw_write;
+		else
+			write_len = val_len;
+		ret = _regmap_raw_write(map, reg, val, write_len);
+		if (ret)
+			goto out_unlock;
+		val = ((u8 *)val) + write_len;
+		val_len -= write_len;
+	}
+
+out_unlock:
+	map->unlock(map->lock_arg);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(regmap_noinc_write);
+
 /**
  * regmap_field_update_bits_base() - Perform a read/modify/write cycle a
  *                                   register field.
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 379505a53722..de04dc46c7a9 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -268,6 +268,13 @@ typedef void (*regmap_unlock)(void *);
  *                field is NULL but precious_table (see below) is not, the
  *                check is performed on such table (a register is precious if
  *                it belongs to one of the ranges specified by precious_table).
+ * @writeable_noinc_reg: Optional callback returning true if the register
+ *			supports multiple write operations without incrementing
+ *			the register number. If this field is NULL but
+ *			wr_noinc_table (see below) is not, the check is
+ *			performed on such table (a register is no increment
+ *			writeable if it belongs to one of the ranges specified
+ *			by wr_noinc_table).
  * @readable_noinc_reg: Optional callback returning true if the register
  *			supports multiple read operations without incrementing
  *			the register number. If this field is NULL but
@@ -302,6 +309,7 @@ typedef void (*regmap_unlock)(void *);
  * @rd_table:     As above, for read access.
  * @volatile_table: As above, for volatile registers.
  * @precious_table: As above, for precious registers.
+ * @wr_noinc_table: As above, for no increment writeable registers.
  * @rd_noinc_table: As above, for no increment readable registers.
  * @reg_defaults: Power on reset values for registers (for use with
  *                register cache support).
@@ -352,6 +360,7 @@ struct regmap_config {
 	bool (*readable_reg)(struct device *dev, unsigned int reg);
 	bool (*volatile_reg)(struct device *dev, unsigned int reg);
 	bool (*precious_reg)(struct device *dev, unsigned int reg);
+	bool (*writeable_noinc_reg)(struct device *dev, unsigned int reg);
 	bool (*readable_noinc_reg)(struct device *dev, unsigned int reg);
 
 	bool disable_locking;
@@ -369,6 +378,7 @@ struct regmap_config {
 	const struct regmap_access_table *rd_table;
 	const struct regmap_access_table *volatile_table;
 	const struct regmap_access_table *precious_table;
+	const struct regmap_access_table *wr_noinc_table;
 	const struct regmap_access_table *rd_noinc_table;
 	const struct reg_default *reg_defaults;
 	unsigned int num_reg_defaults;
@@ -979,6 +989,8 @@ int regmap_write(struct regmap *map, unsigned int reg, unsigned int val);
 int regmap_write_async(struct regmap *map, unsigned int reg, unsigned int val);
 int regmap_raw_write(struct regmap *map, unsigned int reg,
 		     const void *val, size_t val_len);
+int regmap_noinc_write(struct regmap *map, unsigned int reg,
+		     const void *val, size_t val_len);
 int regmap_bulk_write(struct regmap *map, unsigned int reg, const void *val,
 			size_t val_count);
 int regmap_multi_reg_write(struct regmap *map, const struct reg_sequence *regs,
@@ -1222,6 +1234,13 @@ static inline int regmap_raw_write_async(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_noinc_write(struct regmap *map, unsigned int reg,
+				    const void *val, size_t val_len)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_bulk_write(struct regmap *map, unsigned int reg,
 				    const void *val, size_t val_count)
 {
-- 
cgit v1.2.3


From bcb047ebf28453da56f0265aeeb9edc52b797ea7 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 4 Oct 2018 15:25:58 +0800
Subject: regulator: bd718x7: Remove struct bd718xx_pmic

All the fields in struct bd718xx_pmic are not really necessary.
Remove struct bd718xx_pmic to simplify the code.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Reviewed-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/bd718x7-regulator.c | 59 ++++++++++++-----------------------
 include/linux/mfd/rohm-bd718x7.h      |  2 --
 2 files changed, 20 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/bd718x7-regulator.c b/drivers/regulator/bd718x7-regulator.c
index d2522d4e1505..3a47e0372e77 100644
--- a/drivers/regulator/bd718x7-regulator.c
+++ b/drivers/regulator/bd718x7-regulator.c
@@ -15,13 +15,6 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/slab.h>
 
-struct bd718xx_pmic {
-	struct bd718xx_regulator_data *rdata;
-	struct bd718xx *mfd;
-	struct platform_device *pdev;
-	struct regulator_dev *rdev[BD718XX_REGULATOR_AMOUNT];
-};
-
 /*
  * BUCK1/2/3/4
  * BUCK1RAMPRATE[1:0] BUCK1 DVS ramp rate setting
@@ -33,12 +26,10 @@ struct bd718xx_pmic {
 static int bd718xx_buck1234_set_ramp_delay(struct regulator_dev *rdev,
 					   int ramp_delay)
 {
-	struct bd718xx_pmic *pmic = rdev_get_drvdata(rdev);
-	struct bd718xx *mfd = pmic->mfd;
 	int id = rdev->desc->id;
 	unsigned int ramp_value = BUCK_RAMPRATE_10P00MV;
 
-	dev_dbg(&pmic->pdev->dev, "Buck[%d] Set Ramp = %d\n", id + 1,
+	dev_dbg(&rdev->dev, "Buck[%d] Set Ramp = %d\n", id + 1,
 		ramp_delay);
 	switch (ramp_delay) {
 	case 1 ... 1250:
@@ -55,12 +46,12 @@ static int bd718xx_buck1234_set_ramp_delay(struct regulator_dev *rdev,
 		break;
 	default:
 		ramp_value = BUCK_RAMPRATE_10P00MV;
-		dev_err(&pmic->pdev->dev,
+		dev_err(&rdev->dev,
 			"%s: ramp_delay: %d not supported, setting 10000mV//us\n",
 			rdev->desc->name, ramp_delay);
 	}
 
-	return regmap_update_bits(mfd->regmap, BD718XX_REG_BUCK1_CTRL + id,
+	return regmap_update_bits(rdev->regmap, BD718XX_REG_BUCK1_CTRL + id,
 				  BUCK_RAMPRATE_MASK, ramp_value << 6);
 }
 
@@ -1022,7 +1013,7 @@ struct bd718xx_pmic_inits {
 
 static int bd718xx_probe(struct platform_device *pdev)
 {
-	struct bd718xx_pmic *pmic;
+	struct bd718xx *mfd;
 	struct regulator_config config = { 0 };
 	struct bd718xx_pmic_inits pmic_regulators[] = {
 		[BD718XX_TYPE_BD71837] = {
@@ -1037,54 +1028,46 @@ static int bd718xx_probe(struct platform_device *pdev)
 
 	int i, j, err;
 
-	pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
-	if (!pmic)
-		return -ENOMEM;
-
-	pmic->pdev = pdev;
-	pmic->mfd = dev_get_drvdata(pdev->dev.parent);
-
-	if (!pmic->mfd) {
+	mfd = dev_get_drvdata(pdev->dev.parent);
+	if (!mfd) {
 		dev_err(&pdev->dev, "No MFD driver data\n");
 		err = -EINVAL;
 		goto err;
 	}
-	if (pmic->mfd->chip_type >= BD718XX_TYPE_AMOUNT ||
-	    !pmic_regulators[pmic->mfd->chip_type].r_datas) {
+
+	if (mfd->chip_type >= BD718XX_TYPE_AMOUNT ||
+	    !pmic_regulators[mfd->chip_type].r_datas) {
 		dev_err(&pdev->dev, "Unsupported chip type\n");
 		err = -EINVAL;
 		goto err;
 	}
 
-	platform_set_drvdata(pdev, pmic);
-
 	/* Register LOCK release */
-	err = regmap_update_bits(pmic->mfd->regmap, BD718XX_REG_REGLOCK,
+	err = regmap_update_bits(mfd->regmap, BD718XX_REG_REGLOCK,
 				 (REGLOCK_PWRSEQ | REGLOCK_VREG), 0);
 	if (err) {
-		dev_err(&pmic->pdev->dev, "Failed to unlock PMIC (%d)\n", err);
+		dev_err(&pdev->dev, "Failed to unlock PMIC (%d)\n", err);
 		goto err;
 	} else {
-		dev_dbg(&pmic->pdev->dev, "Unlocked lock register 0x%x\n",
+		dev_dbg(&pdev->dev, "Unlocked lock register 0x%x\n",
 			BD718XX_REG_REGLOCK);
 	}
 
-	for (i = 0; i < pmic_regulators[pmic->mfd->chip_type].r_amount; i++) {
+	for (i = 0; i < pmic_regulators[mfd->chip_type].r_amount; i++) {
 
 		const struct regulator_desc *desc;
 		struct regulator_dev *rdev;
 		const struct bd718xx_regulator_data *r;
 
-		r = &(*pmic_regulators[pmic->mfd->chip_type].r_datas)[i];
+		r = &(*pmic_regulators[mfd->chip_type].r_datas)[i];
 		desc = &r->desc;
 
 		config.dev = pdev->dev.parent;
-		config.driver_data = pmic;
-		config.regmap = pmic->mfd->regmap;
+		config.regmap = mfd->regmap;
 
 		rdev = devm_regulator_register(&pdev->dev, desc, &config);
 		if (IS_ERR(rdev)) {
-			dev_err(pmic->mfd->dev,
+			dev_err(&pdev->dev,
 				"failed to register %s regulator\n",
 				desc->name);
 			err = PTR_ERR(rdev);
@@ -1096,28 +1079,26 @@ static int bd718xx_probe(struct platform_device *pdev)
 		 * can now switch the control from PMIC state machine to the
 		 * register interface
 		 */
-		err = regmap_update_bits(pmic->mfd->regmap, r->init.reg,
+		err = regmap_update_bits(mfd->regmap, r->init.reg,
 					 r->init.mask, r->init.val);
 		if (err) {
-			dev_err(&pmic->pdev->dev,
+			dev_err(&pdev->dev,
 				"Failed to write BUCK/LDO SEL bit for (%s)\n",
 				desc->name);
 			goto err;
 		}
 		for (j = 0; j < r->additional_init_amnt; j++) {
-			err = regmap_update_bits(pmic->mfd->regmap,
+			err = regmap_update_bits(mfd->regmap,
 						 r->additional_inits[j].reg,
 						 r->additional_inits[j].mask,
 						 r->additional_inits[j].val);
 			if (err) {
-				dev_err(&pmic->pdev->dev,
+				dev_err(&pdev->dev,
 					"Buck (%s) initialization failed\n",
 					desc->name);
 				goto err;
 			}
 		}
-
-		pmic->rdev[i] = rdev;
 	}
 
 err:
diff --git a/include/linux/mfd/rohm-bd718x7.h b/include/linux/mfd/rohm-bd718x7.h
index 26acf9a92498..fd194bfc836f 100644
--- a/include/linux/mfd/rohm-bd718x7.h
+++ b/include/linux/mfd/rohm-bd718x7.h
@@ -321,7 +321,6 @@ enum {
 	BD718XX_PWRBTN_LONG_PRESS_15S
 };
 
-struct bd718xx_pmic;
 struct bd718xx_clk;
 
 struct bd718xx {
@@ -333,7 +332,6 @@ struct bd718xx {
 	int chip_irq;
 	struct regmap_irq_chip_data *irq_data;
 
-	struct bd718xx_pmic *pmic;
 	struct bd718xx_clk *clk;
 };
 
-- 
cgit v1.2.3


From 99c228a994ec8b1580c43631866fd2c5440f5bfd Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Thu, 18 Oct 2018 14:22:55 +0300
Subject: fs: group frequently accessed fields of struct super_block together

Kernel test robot reported [1] a 6% performance regression in a
concurrent unlink(2) workload on commit 60f7ed8c7c4d ("fsnotify: send
path type events to group with super block marks").

The performance test was run with no fsnotify marks at all on the
data set, so the only extra instructions added by the offending
commit are tests of the super_block fields s_fsnotify_{marks,mask}
and these tests happen on almost every single inode access.

When adding those fields to the super_block struct, we did not give much
thought of placing them on a hot cache lines (we just placed them at the
end of the struct).

Re-organize struct super_block to try and keep some frequently accessed
fields on the same cache line.

Move the frequently accessed fields s_fsnotify_{marks,mask} near the
frequently accessed fields s_fs_info,s_time_gran, while filling a 64bit
alignment hole after s_time_gran.

Move the seldom accessed fields s_id,s_uuid,s_max_links,s_mode near the
seldom accessed fields s_vfs_rename_mutex,s_subtype.

Rong Chen confirmed that this patch solved the reported problem.

[1] https://lkml.org/lkml/2018/9/30/206

Reported-by: kernel test robot <rong.a.chen@intel.com>
Tested-by: kernel test robot <rong.a.chen@intel.com>
Fixes: 1e6cb72399 ("fsnotify: add super block object type")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 include/linux/fs.h | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c14801d0aa3..6da94deb957f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1393,17 +1393,26 @@ struct super_block {
 
 	struct sb_writers	s_writers;
 
+	/*
+	 * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
+	 * s_fsnotify_marks together for cache efficiency. They are frequently
+	 * accessed and rarely modified.
+	 */
+	void			*s_fs_info;	/* Filesystem private info */
+
+	/* Granularity of c/m/atime in ns (cannot be worse than a second) */
+	u32			s_time_gran;
+#ifdef CONFIG_FSNOTIFY
+	__u32			s_fsnotify_mask;
+	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
+#endif
+
 	char			s_id[32];	/* Informational name */
 	uuid_t			s_uuid;		/* UUID */
 
-	void 			*s_fs_info;	/* Filesystem private info */
 	unsigned int		s_max_links;
 	fmode_t			s_mode;
 
-	/* Granularity of c/m/atime in ns.
-	   Cannot be worse than a second */
-	u32		   s_time_gran;
-
 	/*
 	 * The next field is for VFS *only*. No filesystems have any business
 	 * even looking at it. You had been warned.
@@ -1464,11 +1473,6 @@ struct super_block {
 
 	spinlock_t		s_inode_wblist_lock;
 	struct list_head	s_inodes_wb;	/* writeback inodes */
-
-#ifdef CONFIG_FSNOTIFY
-	__u32			s_fsnotify_mask;
-	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
-#endif
 } __randomize_layout;
 
 /* Helper functions so that in most cases filesystems will
-- 
cgit v1.2.3


From 3952105df4723abbd36b57e88c8dad42cf6c8b59 Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibis@codeaurora.org>
Date: Wed, 17 Oct 2018 19:25:23 +0530
Subject: remoteproc: Introduce custom dump function for each remoteproc
 segment

Introduce custom dump function and private data per remoteproc dump
segment. The dump function is responsible for filling the device memory
segment associated with coredump

Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 18 +++++++++++-------
 include/linux/remoteproc.h           |  6 ++++++
 2 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index f77a42f6a8aa..6bed40d01dbf 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1508,14 +1508,18 @@ static void rproc_coredump(struct rproc *rproc)
 		phdr->p_flags = PF_R | PF_W | PF_X;
 		phdr->p_align = 0;
 
-		ptr = rproc_da_to_va(rproc, segment->da, segment->size);
-		if (!ptr) {
-			dev_err(&rproc->dev,
-				"invalid coredump segment (%pad, %zu)\n",
-				&segment->da, segment->size);
-			memset(data + offset, 0xff, segment->size);
+		if (segment->dump) {
+			segment->dump(rproc, segment, data + offset);
 		} else {
-			memcpy(data + offset, ptr, segment->size);
+			ptr = rproc_da_to_va(rproc, segment->da, segment->size);
+			if (!ptr) {
+				dev_err(&rproc->dev,
+					"invalid coredump segment (%pad, %zu)\n",
+					&segment->da, segment->size);
+				memset(data + offset, 0xff, segment->size);
+			} else {
+				memcpy(data + offset, ptr, segment->size);
+			}
 		}
 
 		offset += phdr->p_filesz;
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 8bb0cf0416f1..2d036adab7ff 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -412,6 +412,9 @@ enum rproc_crash_type {
  * @node:	list node related to the rproc segment list
  * @da:		device address of the segment
  * @size:	size of the segment
+ * @priv:	private data associated with the dump_segment
+ * @dump:	custom dump function to fill device memory segment associated
+ *		with coredump
  */
 struct rproc_dump_segment {
 	struct list_head node;
@@ -419,6 +422,9 @@ struct rproc_dump_segment {
 	dma_addr_t da;
 	size_t size;
 
+	void *priv;
+	void (*dump)(struct rproc *rproc, struct rproc_dump_segment *segment,
+		     void *dest);
 	loff_t offset;
 };
 
-- 
cgit v1.2.3


From ab8f873bb90da7bbe40e2f41c92a4971c4f0dc76 Mon Sep 17 00:00:00 2001
From: Sibi Sankar <sibis@codeaurora.org>
Date: Wed, 17 Oct 2018 19:25:24 +0530
Subject: remoteproc: Add mechanism for custom dump function assignment

This patch adds a mechanism for assigning each rproc dump segment with
a custom dump function and private data. The dump function is to be
called for each rproc segment during coredump if assigned.

Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
[bjorn: reordred arguments to rproc_coredump_add_custom_segment()]
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
---
 drivers/remoteproc/remoteproc_core.c | 38 ++++++++++++++++++++++++++++++++++++
 include/linux/remoteproc.h           |  6 ++++++
 2 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c
index 6bed40d01dbf..54ec38fc5dca 100644
--- a/drivers/remoteproc/remoteproc_core.c
+++ b/drivers/remoteproc/remoteproc_core.c
@@ -1446,6 +1446,44 @@ int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size)
 }
 EXPORT_SYMBOL(rproc_coredump_add_segment);
 
+/**
+ * rproc_coredump_add_custom_segment() - add custom coredump segment
+ * @rproc:	handle of a remote processor
+ * @da:		device address
+ * @size:	size of segment
+ * @dumpfn:	custom dump function called for each segment during coredump
+ * @priv:	private data
+ *
+ * Add device memory to the list of segments to be included in the coredump
+ * and associate the segment with the given custom dump function and private
+ * data.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int rproc_coredump_add_custom_segment(struct rproc *rproc,
+				      dma_addr_t da, size_t size,
+				      void (*dumpfn)(struct rproc *rproc,
+						     struct rproc_dump_segment *segment,
+						     void *dest),
+				      void *priv)
+{
+	struct rproc_dump_segment *segment;
+
+	segment = kzalloc(sizeof(*segment), GFP_KERNEL);
+	if (!segment)
+		return -ENOMEM;
+
+	segment->da = da;
+	segment->size = size;
+	segment->priv = priv;
+	segment->dump = dumpfn;
+
+	list_add_tail(&segment->node, &rproc->dump_segments);
+
+	return 0;
+}
+EXPORT_SYMBOL(rproc_coredump_add_custom_segment);
+
 /**
  * rproc_coredump() - perform coredump
  * @rproc:	rproc handle
diff --git a/include/linux/remoteproc.h b/include/linux/remoteproc.h
index 2d036adab7ff..507a2b524208 100644
--- a/include/linux/remoteproc.h
+++ b/include/linux/remoteproc.h
@@ -592,6 +592,12 @@ int rproc_boot(struct rproc *rproc);
 void rproc_shutdown(struct rproc *rproc);
 void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
 int rproc_coredump_add_segment(struct rproc *rproc, dma_addr_t da, size_t size);
+int rproc_coredump_add_custom_segment(struct rproc *rproc,
+				      dma_addr_t da, size_t size,
+				      void (*dumpfn)(struct rproc *rproc,
+						     struct rproc_dump_segment *segment,
+						     void *dest),
+				      void *priv);
 
 static inline struct rproc_vdev *vdev_to_rvdev(struct virtio_device *vdev)
 {
-- 
cgit v1.2.3


From 144991602e6a14d667b295f1b099e609ce857772 Mon Sep 17 00:00:00 2001
From: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Date: Thu, 18 Oct 2018 15:16:09 +0200
Subject: bpf: rename stack trace map operations

In the following patches queue and stack maps (FIFO and LIFO
datastructures) will be implemented.  In order to avoid confusion and
a possible name clash rename stack_map_ops to stack_trace_map_ops

Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_types.h | 2 +-
 kernel/bpf/stackmap.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index fa48343a5ea1..7bad4e1947ed 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
 #ifdef CONFIG_PERF_EVENTS
-BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index b2ade10f7ec3..90daf285de03 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
 	put_callchain_buffers();
 }
 
-const struct bpf_map_ops stack_map_ops = {
+const struct bpf_map_ops stack_trace_map_ops = {
 	.map_alloc = stack_map_alloc,
 	.map_free = stack_map_free,
 	.map_get_next_key = stack_map_get_next_key,
-- 
cgit v1.2.3


From 2ea864c58f19bf70a0e2415f9f1c53814e07f1b4 Mon Sep 17 00:00:00 2001
From: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Date: Thu, 18 Oct 2018 15:16:20 +0200
Subject: bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE

ARG_PTR_TO_UNINIT_MAP_VALUE argument is a pointer to a memory zone
used to save the value of a map.  Basically the same as
ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra
argument.

This will be used in the following patch that implements some new
helpers that receive a pointer to be filled with a map value.

Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h   | 1 +
 kernel/bpf/verifier.c | 9 ++++++---
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e60fff48288b..0f8b863e0229 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -138,6 +138,7 @@ enum bpf_arg_type {
 	ARG_CONST_MAP_PTR,	/* const argument used as pointer to bpf_map */
 	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
 	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
+	ARG_PTR_TO_UNINIT_MAP_VALUE,	/* pointer to valid memory used to store a map value */
 
 	/* the following constraints used to prototype bpf_memcmp() and other
 	 * functions that access data on eBPF program stack
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 3f93a548a642..d84c91ac3b70 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2117,7 +2117,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 	}
 
 	if (arg_type == ARG_PTR_TO_MAP_KEY ||
-	    arg_type == ARG_PTR_TO_MAP_VALUE) {
+	    arg_type == ARG_PTR_TO_MAP_VALUE ||
+	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
 		expected_type = PTR_TO_STACK;
 		if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
 		    type != expected_type)
@@ -2187,7 +2188,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		err = check_helper_mem_access(env, regno,
 					      meta->map_ptr->key_size, false,
 					      NULL);
-	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
+	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
+		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
 		 * check [value, value + map->value_size) validity
 		 */
@@ -2196,9 +2198,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			verbose(env, "invalid map_ptr to access map->value\n");
 			return -EACCES;
 		}
+		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
 		err = check_helper_mem_access(env, regno,
 					      meta->map_ptr->value_size, false,
-					      NULL);
+					      meta);
 	} else if (arg_type_is_mem_size(arg_type)) {
 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 
-- 
cgit v1.2.3


From f1a2e44a3aeccb3ff18d3ccc0b0203e70b95bd92 Mon Sep 17 00:00:00 2001
From: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Date: Thu, 18 Oct 2018 15:16:25 +0200
Subject: bpf: add queue and stack maps

Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
These maps support peek, pop and push operations that are exposed to eBPF
programs through the new bpf_map[peek/pop/push] helpers.  Those operations
are exposed to userspace applications through the already existing
syscalls in the following way:

BPF_MAP_LOOKUP_ELEM            -> peek
BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
BPF_MAP_UPDATE_ELEM            -> push

Queue/stack maps are implemented using a buffer, tail and head indexes,
hence BPF_F_NO_PREALLOC is not supported.

As opposite to other maps, queue and stack do not use RCU for protecting
maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
argument that is a pointer to a memory zone where to save the value of a
map.  Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
be passed as an extra argument.

Our main motivation for implementing queue/stack maps was to keep track
of a pool of elements, like network ports in a SNAT, however we forsee
other use cases, like for exampling saving last N kernel events in a map
and then analysing from userspace.

Signed-off-by: Mauricio Vasquez B <mauricio.vasquez@polito.it>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h           |   6 +
 include/linux/bpf_types.h     |   2 +
 include/uapi/linux/bpf.h      |  29 ++++-
 kernel/bpf/Makefile           |   2 +-
 kernel/bpf/core.c             |   3 +
 kernel/bpf/helpers.c          |  43 +++++++
 kernel/bpf/queue_stack_maps.c | 288 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c          |   6 +
 kernel/bpf/verifier.c         |  19 ++-
 net/core/filter.c             |   6 +
 10 files changed, 401 insertions(+), 3 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0f8b863e0229..33014ae73103 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -39,6 +39,9 @@ struct bpf_map_ops {
 	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
 	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
 	int (*map_delete_elem)(struct bpf_map *map, void *key);
+	int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+	int (*map_pop_elem)(struct bpf_map *map, void *value);
+	int (*map_peek_elem)(struct bpf_map *map, void *value);
 
 	/* funcs called by prog_array and perf_event_array map */
 	void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -811,6 +814,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 7bad4e1947ed..44d9ab4809bd 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 5e46f6732781..70082cb626b4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -128,6 +128,8 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_CGROUP_STORAGE,
 	BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
 	BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+	BPF_MAP_TYPE_QUEUE,
+	BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -462,6 +464,28 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * 	Description
+ * 		Push an element *value* in *map*. *flags* is one of:
+ *
+ * 		**BPF_EXIST**
+ * 		If the queue/stack is full, the oldest element is removed to
+ * 		make room for this.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * 	Description
+ * 		Pop an element from *map*.
+ * Return
+ * 		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * 	Description
+ * 		Get an element from *map* without removing it.
+ * Return
+ * 		0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * 	Description
  * 		For tracing programs, safely attempt to read *size* bytes from
@@ -2303,7 +2327,10 @@ union bpf_attr {
 	FN(skb_ancestor_cgroup_id),	\
 	FN(sk_lookup_tcp),		\
 	FN(sk_lookup_udp),		\
-	FN(sk_release),
+	FN(sk_release),			\
+	FN(map_push_elem),		\
+	FN(map_pop_elem),		\
+	FN(map_peek_elem),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index ff8262626b8f..4c2fa3ac56f6 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -3,7 +3,7 @@ obj-y := core.o
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
-obj-$(CONFIG_BPF_SYSCALL) += local_storage.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
 ifeq ($(CONFIG_NET),y)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index defcf4df6d91..7c7eeea8cffc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1783,6 +1783,9 @@ BPF_CALL_0(bpf_user_rnd_u32)
 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
 const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
+const struct bpf_func_proto bpf_map_push_elem_proto __weak;
+const struct bpf_func_proto bpf_map_pop_elem_proto __weak;
+const struct bpf_func_proto bpf_map_peek_elem_proto __weak;
 
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 6502115e8f55..ab0d5e3f9892 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -76,6 +76,49 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
 	.arg2_type	= ARG_PTR_TO_MAP_KEY,
 };
 
+BPF_CALL_3(bpf_map_push_elem, struct bpf_map *, map, void *, value, u64, flags)
+{
+	return map->ops->map_push_elem(map, value, flags);
+}
+
+const struct bpf_func_proto bpf_map_push_elem_proto = {
+	.func		= bpf_map_push_elem,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_MAP_VALUE,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_map_pop_elem, struct bpf_map *, map, void *, value)
+{
+	return map->ops->map_pop_elem(map, value);
+}
+
+const struct bpf_func_proto bpf_map_pop_elem_proto = {
+	.func		= bpf_map_pop_elem,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
+};
+
+BPF_CALL_2(bpf_map_peek_elem, struct bpf_map *, map, void *, value)
+{
+	return map->ops->map_peek_elem(map, value);
+}
+
+const struct bpf_func_proto bpf_map_peek_elem_proto = {
+	.func		= bpf_map_pop_elem,
+	.gpl_only	= false,
+	.pkt_access	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_UNINIT_MAP_VALUE,
+};
+
 const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 	.func		= bpf_user_rnd_u32,
 	.gpl_only	= false,
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
new file mode 100644
index 000000000000..12a93fb37449
--- /dev/null
+++ b/kernel/bpf/queue_stack_maps.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * queue_stack_maps.c: BPF queue and stack maps
+ *
+ * Copyright (c) 2018 Politecnico di Torino
+ */
+#include <linux/bpf.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "percpu_freelist.h"
+
+#define QUEUE_STACK_CREATE_FLAG_MASK \
+	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
+
+struct bpf_queue_stack {
+	struct bpf_map map;
+	raw_spinlock_t lock;
+	u32 head, tail;
+	u32 size; /* max_entries + 1 */
+
+	char elements[0] __aligned(8);
+};
+
+static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
+{
+	return container_of(map, struct bpf_queue_stack, map);
+}
+
+static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs)
+{
+	return qs->head == qs->tail;
+}
+
+static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
+{
+	u32 head = qs->head + 1;
+
+	if (unlikely(head >= qs->size))
+		head = 0;
+
+	return head == qs->tail;
+}
+
+/* Called from syscall */
+static int queue_stack_map_alloc_check(union bpf_attr *attr)
+{
+	/* check sanity of attributes */
+	if (attr->max_entries == 0 || attr->key_size != 0 ||
+	    attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK)
+		return -EINVAL;
+
+	if (attr->value_size > KMALLOC_MAX_SIZE)
+		/* if value_size is bigger, the user space won't be able to
+		 * access the elements.
+		 */
+		return -E2BIG;
+
+	return 0;
+}
+
+static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
+{
+	int ret, numa_node = bpf_map_attr_numa_node(attr);
+	struct bpf_queue_stack *qs;
+	u32 size, value_size;
+	u64 queue_size, cost;
+
+	size = attr->max_entries + 1;
+	value_size = attr->value_size;
+
+	queue_size = sizeof(*qs) + (u64) value_size * size;
+
+	cost = queue_size;
+	if (cost >= U32_MAX - PAGE_SIZE)
+		return ERR_PTR(-E2BIG);
+
+	cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+	ret = bpf_map_precharge_memlock(cost);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	qs = bpf_map_area_alloc(queue_size, numa_node);
+	if (!qs)
+		return ERR_PTR(-ENOMEM);
+
+	memset(qs, 0, sizeof(*qs));
+
+	bpf_map_init_from_attr(&qs->map, attr);
+
+	qs->map.pages = cost;
+	qs->size = size;
+
+	raw_spin_lock_init(&qs->lock);
+
+	return &qs->map;
+}
+
+/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
+static void queue_stack_map_free(struct bpf_map *map)
+{
+	struct bpf_queue_stack *qs = bpf_queue_stack(map);
+
+	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+	 * so the programs (can be more than one that used this map) were
+	 * disconnected from events. Wait for outstanding critical sections in
+	 * these programs to complete
+	 */
+	synchronize_rcu();
+
+	bpf_map_area_free(qs);
+}
+
+static int __queue_map_get(struct bpf_map *map, void *value, bool delete)
+{
+	struct bpf_queue_stack *qs = bpf_queue_stack(map);
+	unsigned long flags;
+	int err = 0;
+	void *ptr;
+
+	raw_spin_lock_irqsave(&qs->lock, flags);
+
+	if (queue_stack_map_is_empty(qs)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	ptr = &qs->elements[qs->tail * qs->map.value_size];
+	memcpy(value, ptr, qs->map.value_size);
+
+	if (delete) {
+		if (unlikely(++qs->tail >= qs->size))
+			qs->tail = 0;
+	}
+
+out:
+	raw_spin_unlock_irqrestore(&qs->lock, flags);
+	return err;
+}
+
+
+static int __stack_map_get(struct bpf_map *map, void *value, bool delete)
+{
+	struct bpf_queue_stack *qs = bpf_queue_stack(map);
+	unsigned long flags;
+	int err = 0;
+	void *ptr;
+	u32 index;
+
+	raw_spin_lock_irqsave(&qs->lock, flags);
+
+	if (queue_stack_map_is_empty(qs)) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	index = qs->head - 1;
+	if (unlikely(index >= qs->size))
+		index = qs->size - 1;
+
+	ptr = &qs->elements[index * qs->map.value_size];
+	memcpy(value, ptr, qs->map.value_size);
+
+	if (delete)
+		qs->head = index;
+
+out:
+	raw_spin_unlock_irqrestore(&qs->lock, flags);
+	return err;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_peek_elem(struct bpf_map *map, void *value)
+{
+	return __queue_map_get(map, value, false);
+}
+
+/* Called from syscall or from eBPF program */
+static int stack_map_peek_elem(struct bpf_map *map, void *value)
+{
+	return __stack_map_get(map, value, false);
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_map_pop_elem(struct bpf_map *map, void *value)
+{
+	return __queue_map_get(map, value, true);
+}
+
+/* Called from syscall or from eBPF program */
+static int stack_map_pop_elem(struct bpf_map *map, void *value)
+{
+	return __stack_map_get(map, value, true);
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_push_elem(struct bpf_map *map, void *value,
+				     u64 flags)
+{
+	struct bpf_queue_stack *qs = bpf_queue_stack(map);
+	unsigned long irq_flags;
+	int err = 0;
+	void *dst;
+
+	/* BPF_EXIST is used to force making room for a new element in case the
+	 * map is full
+	 */
+	bool replace = (flags & BPF_EXIST);
+
+	/* Check supported flags for queue and stack maps */
+	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&qs->lock, irq_flags);
+
+	if (queue_stack_map_is_full(qs)) {
+		if (!replace) {
+			err = -E2BIG;
+			goto out;
+		}
+		/* advance tail pointer to overwrite oldest element */
+		if (unlikely(++qs->tail >= qs->size))
+			qs->tail = 0;
+	}
+
+	dst = &qs->elements[qs->head * qs->map.value_size];
+	memcpy(dst, value, qs->map.value_size);
+
+	if (unlikely(++qs->head >= qs->size))
+		qs->head = 0;
+
+out:
+	raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
+	return err;
+}
+
+/* Called from syscall or from eBPF program */
+static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	return NULL;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 flags)
+{
+	return -EINVAL;
+}
+
+/* Called from syscall or from eBPF program */
+static int queue_stack_map_delete_elem(struct bpf_map *map, void *key)
+{
+	return -EINVAL;
+}
+
+/* Called from syscall */
+static int queue_stack_map_get_next_key(struct bpf_map *map, void *key,
+					void *next_key)
+{
+	return -EINVAL;
+}
+
+const struct bpf_map_ops queue_map_ops = {
+	.map_alloc_check = queue_stack_map_alloc_check,
+	.map_alloc = queue_stack_map_alloc,
+	.map_free = queue_stack_map_free,
+	.map_lookup_elem = queue_stack_map_lookup_elem,
+	.map_update_elem = queue_stack_map_update_elem,
+	.map_delete_elem = queue_stack_map_delete_elem,
+	.map_push_elem = queue_stack_map_push_elem,
+	.map_pop_elem = queue_map_pop_elem,
+	.map_peek_elem = queue_map_peek_elem,
+	.map_get_next_key = queue_stack_map_get_next_key,
+};
+
+const struct bpf_map_ops stack_map_ops = {
+	.map_alloc_check = queue_stack_map_alloc_check,
+	.map_alloc = queue_stack_map_alloc,
+	.map_free = queue_stack_map_free,
+	.map_lookup_elem = queue_stack_map_lookup_elem,
+	.map_update_elem = queue_stack_map_update_elem,
+	.map_delete_elem = queue_stack_map_delete_elem,
+	.map_push_elem = queue_stack_map_push_elem,
+	.map_pop_elem = stack_map_pop_elem,
+	.map_peek_elem = stack_map_peek_elem,
+	.map_get_next_key = queue_stack_map_get_next_key,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 78d9dd95e25f..1617407f9ee5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -727,6 +727,9 @@ static int map_lookup_elem(union bpf_attr *attr)
 		err = bpf_fd_htab_map_lookup_elem(map, key, value);
 	} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
 		err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
+	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+		   map->map_type == BPF_MAP_TYPE_STACK) {
+		err = map->ops->map_peek_elem(map, value);
 	} else {
 		rcu_read_lock();
 		ptr = map->ops->map_lookup_elem(map, key);
@@ -857,6 +860,9 @@ static int map_update_elem(union bpf_attr *attr)
 		/* rcu_read_lock() is not needed */
 		err = bpf_fd_reuseport_array_update_elem(map, key, value,
 							 attr->flags);
+	} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
+		   map->map_type == BPF_MAP_TYPE_STACK) {
+		err = map->ops->map_push_elem(map, value, attr->flags);
 	} else {
 		rcu_read_lock();
 		err = map->ops->map_update_elem(map, key, value, attr->flags);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d84c91ac3b70..7d6d9cf9ebd5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2324,6 +2324,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (func_id != BPF_FUNC_sk_select_reuseport)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_QUEUE:
+	case BPF_MAP_TYPE_STACK:
+		if (func_id != BPF_FUNC_map_peek_elem &&
+		    func_id != BPF_FUNC_map_pop_elem &&
+		    func_id != BPF_FUNC_map_push_elem)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -2380,6 +2387,13 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
 			goto error;
 		break;
+	case BPF_FUNC_map_peek_elem:
+	case BPF_FUNC_map_pop_elem:
+	case BPF_FUNC_map_push_elem:
+		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
+		    map->map_type != BPF_MAP_TYPE_STACK)
+			goto error;
+		break;
 	default:
 		break;
 	}
@@ -2675,7 +2689,10 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	if (func_id != BPF_FUNC_tail_call &&
 	    func_id != BPF_FUNC_map_lookup_elem &&
 	    func_id != BPF_FUNC_map_update_elem &&
-	    func_id != BPF_FUNC_map_delete_elem)
+	    func_id != BPF_FUNC_map_delete_elem &&
+	    func_id != BPF_FUNC_map_push_elem &&
+	    func_id != BPF_FUNC_map_pop_elem &&
+	    func_id != BPF_FUNC_map_peek_elem)
 		return 0;
 
 	if (meta->map_ptr == NULL) {
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..ea48ec789b5c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4876,6 +4876,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_map_update_elem_proto;
 	case BPF_FUNC_map_delete_elem:
 		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_map_push_elem:
+		return &bpf_map_push_elem_proto;
+	case BPF_FUNC_map_pop_elem:
+		return &bpf_map_pop_elem_proto;
+	case BPF_FUNC_map_peek_elem:
+		return &bpf_map_peek_elem_proto;
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
-- 
cgit v1.2.3


From b39b5f411dcfce28ff954e5d6acb2c11be3cb0ec Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 19 Oct 2018 09:57:57 -0700
Subject: bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_end() and
bpf_restore_data_end() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/filter.h | 21 +++++++++++++++++++++
 kernel/bpf/cgroup.c    |  6 ++++++
 net/core/filter.c      | 36 +++++++++++++++++++++++++++++++++++-
 3 files changed, 62 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..91b4c934f02e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
 	cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_end(
+	struct sk_buff *skb, void **saved_data_end)
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	*saved_data_end = cb->data_end;
+	cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_end(
+	struct sk_buff *skb, void *saved_data_end)
+{
+	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+	cb->data_end = saved_data_end;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
 	/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..9425c2fb872f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 {
 	unsigned int offset = skb->data - skb_network_header(skb);
 	struct sock *save_sk;
+	void *saved_data_end;
 	struct cgroup *cgrp;
 	int ret;
 
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 	save_sk = skb->sk;
 	skb->sk = sk;
 	__skb_push(skb, offset);
+
+	/* compute pointers for the bpf prog */
+	bpf_compute_and_save_data_end(skb, &saved_data_end);
+
 	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 				 bpf_prog_run_save_cb);
+	bpf_restore_data_end(skb, saved_data_end);
 	__skb_pull(skb, offset);
 	skb->sk = save_sk;
 	return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index ea48ec789b5c..5fd5139e8638 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5352,6 +5352,40 @@ static bool sk_filter_is_valid_access(int off, int size,
 	return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+				   enum bpf_access_type type,
+				   const struct bpf_prog *prog,
+				   struct bpf_insn_access_aux *info)
+{
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, tc_classid):
+	case bpf_ctx_range(struct __sk_buff, data_meta):
+	case bpf_ctx_range(struct __sk_buff, flow_keys):
+		return false;
+	}
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case bpf_ctx_range(struct __sk_buff, mark):
+		case bpf_ctx_range(struct __sk_buff, priority):
+		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	switch (off) {
+	case bpf_ctx_range(struct __sk_buff, data):
+		info->reg_type = PTR_TO_PACKET;
+		break;
+	case bpf_ctx_range(struct __sk_buff, data_end):
+		info->reg_type = PTR_TO_PACKET_END;
+		break;
+	}
+
+	return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
 				enum bpf_access_type type,
 				const struct bpf_prog *prog,
@@ -7044,7 +7078,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
 	.get_func_proto		= cg_skb_func_proto,
-	.is_valid_access	= sk_filter_is_valid_access,
+	.is_valid_access	= cg_skb_is_valid_access,
 	.convert_ctx_access	= bpf_convert_ctx_access,
 };
 
-- 
cgit v1.2.3


From 5032d079909d1ac5c2535acc32d5f01cd245d8ea Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 18 Oct 2018 13:58:35 -0700
Subject: bpf: skmsg, fix psock create on existing kcm/tls port

Before using the psock returned by sk_psock_get() when adding it to a
sockmap we need to ensure it is actually a sockmap based psock.
Previously we were only checking this after incrementing the reference
counter which was an error. This resulted in a slab-out-of-bounds
error when the psock was not actually a sockmap type.

This moves the check up so the reference counter is only used
if it is a sockmap psock.

Eric reported the following KASAN BUG,

BUG: KASAN: slab-out-of-bounds in atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
BUG: KASAN: slab-out-of-bounds in refcount_inc_not_zero_checked+0x97/0x2f0 lib/refcount.c:120
Read of size 4 at addr ffff88019548be58 by task syz-executor4/22387

CPU: 1 PID: 22387 Comm: syz-executor4 Not tainted 4.19.0-rc7+ #264
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113
 print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256
 kasan_report_error mm/kasan/report.c:354 [inline]
 kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412
 check_memory_region_inline mm/kasan/kasan.c:260 [inline]
 check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
 kasan_check_read+0x11/0x20 mm/kasan/kasan.c:272
 atomic_read include/asm-generic/atomic-instrumented.h:21 [inline]
 refcount_inc_not_zero_checked+0x97/0x2f0 lib/refcount.c:120
 sk_psock_get include/linux/skmsg.h:379 [inline]
 sock_map_link.isra.6+0x41f/0xe30 net/core/sock_map.c:178
 sock_hash_update_common+0x19b/0x11e0 net/core/sock_map.c:669
 sock_hash_update_elem+0x306/0x470 net/core/sock_map.c:738
 map_update_elem+0x819/0xdf0 kernel/bpf/syscall.c:818

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h | 25 ++++++++++++++++++++-----
 net/core/sock_map.c   | 11 ++++++-----
 2 files changed, 26 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 22347b08e1f8..84e18863f6a4 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -270,11 +270,6 @@ static inline struct sk_psock *sk_psock(const struct sock *sk)
 	return rcu_dereference_sk_user_data(sk);
 }
 
-static inline bool sk_has_psock(struct sock *sk)
-{
-	return sk_psock(sk) != NULL && sk->sk_prot->recvmsg == tcp_bpf_recvmsg;
-}
-
 static inline void sk_psock_queue_msg(struct sk_psock *psock,
 				      struct sk_msg *msg)
 {
@@ -374,6 +369,26 @@ static inline bool sk_psock_test_state(const struct sk_psock *psock,
 	return test_bit(bit, &psock->state);
 }
 
+static inline struct sk_psock *sk_psock_get_checked(struct sock *sk)
+{
+	struct sk_psock *psock;
+
+	rcu_read_lock();
+	psock = sk_psock(sk);
+	if (psock) {
+		if (sk->sk_prot->recvmsg != tcp_bpf_recvmsg) {
+			psock = ERR_PTR(-EBUSY);
+			goto out;
+		}
+
+		if (!refcount_inc_not_zero(&psock->refcnt))
+			psock = ERR_PTR(-EBUSY);
+	}
+out:
+	rcu_read_unlock();
+	return psock;
+}
+
 static inline struct sk_psock *sk_psock_get(struct sock *sk)
 {
 	struct sk_psock *psock;
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 3c0e44cb811a..be6092ac69f8 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -175,12 +175,13 @@ static int sock_map_link(struct bpf_map *map, struct sk_psock_progs *progs,
 		}
 	}
 
-	psock = sk_psock_get(sk);
+	psock = sk_psock_get_checked(sk);
+	if (IS_ERR(psock)) {
+		ret = PTR_ERR(psock);
+		goto out_progs;
+	}
+
 	if (psock) {
-		if (!sk_has_psock(sk)) {
-			ret = -EBUSY;
-			goto out_progs;
-		}
 		if ((msg_parser && READ_ONCE(psock->progs.msg_parser)) ||
 		    (skb_progs  && READ_ONCE(psock->progs.skb_parser))) {
 			sk_psock_put(sk, psock);
-- 
cgit v1.2.3


From c9fbd71f73094311b31ee703a918e9e0df502cef Mon Sep 17 00:00:00 2001
From: Debabrata Banerjee <dbanerje@akamai.com>
Date: Thu, 18 Oct 2018 11:18:26 -0400
Subject: netpoll: allow cleanup to be synchronous

This fixes a problem introduced by:
commit 2cde6acd49da ("netpoll: Fix __netpoll_rcu_free so that it can hold the rtnl lock")

When using netconsole on a bond, __netpoll_cleanup can asynchronously
recurse multiple times, each __netpoll_free_async call can result in
more __netpoll_free_async's. This means there is now a race between
cleanup_work queues on multiple netpoll_info's on multiple devices and
the configuration of a new netpoll. For example if a netconsole is set
to enable 0, reconfigured, and enable 1 immediately, this netconsole
will likely not work.

Given the reason for __netpoll_free_async is it can be called when rtnl
is not locked, if it is locked, we should be able to execute
synchronously. It appears to be locked everywhere it's called from.

Generalize the design pattern from the teaming driver for current
callers of __netpoll_free_async.

CC: Neil Horman <nhorman@tuxdriver.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: Debabrata Banerjee <dbanerje@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  3 ++-
 drivers/net/macvlan.c           |  2 +-
 drivers/net/team/team.c         |  5 +----
 include/linux/netpoll.h         |  4 +---
 net/8021q/vlan_dev.c            |  3 +--
 net/bridge/br_device.c          |  2 +-
 net/core/netpoll.c              | 21 +++++----------------
 net/dsa/slave.c                 |  2 +-
 8 files changed, 13 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ee28ec9e0aba..ffa37adb7681 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -963,7 +963,8 @@ static inline void slave_disable_netpoll(struct slave *slave)
 		return;
 
 	slave->np = NULL;
-	__netpoll_free_async(np);
+
+	__netpoll_free(np);
 }
 
 static void bond_poll_controller(struct net_device *bond_dev)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index cfda146f3b3b..fc8d5f1ee1ad 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1077,7 +1077,7 @@ static void macvlan_dev_netpoll_cleanup(struct net_device *dev)
 
 	vlan->netpoll = NULL;
 
-	__netpoll_free_async(netpoll);
+	__netpoll_free(netpoll);
 }
 #endif	/* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index d887016e54b6..db633ae9f784 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1104,10 +1104,7 @@ static void team_port_disable_netpoll(struct team_port *port)
 		return;
 	port->np = NULL;
 
-	/* Wait for transmitting packets to finish before freeing. */
-	synchronize_rcu_bh();
-	__netpoll_cleanup(np);
-	kfree(np);
+	__netpoll_free(np);
 }
 #else
 static int team_port_enable_netpoll(struct team_port *port)
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 3ef82d3a78db..676f1ff161a9 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -31,8 +31,6 @@ struct netpoll {
 	bool ipv6;
 	u16 local_port, remote_port;
 	u8 remote_mac[ETH_ALEN];
-
-	struct work_struct cleanup_work;
 };
 
 struct netpoll_info {
@@ -63,7 +61,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt);
 int __netpoll_setup(struct netpoll *np, struct net_device *ndev);
 int netpoll_setup(struct netpoll *np);
 void __netpoll_cleanup(struct netpoll *np);
-void __netpoll_free_async(struct netpoll *np);
+void __netpoll_free(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 546af0e73ac3..ff720f1ebf73 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -756,8 +756,7 @@ static void vlan_dev_netpoll_cleanup(struct net_device *dev)
 		return;
 
 	vlan->netpoll = NULL;
-
-	__netpoll_free_async(netpoll);
+	__netpoll_free(netpoll);
 }
 #endif /* CONFIG_NET_POLL_CONTROLLER */
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e053a4e43758..c6abf927f0c9 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -344,7 +344,7 @@ void br_netpoll_disable(struct net_bridge_port *p)
 
 	p->np = NULL;
 
-	__netpoll_free_async(np);
+	__netpoll_free(np);
 }
 
 #endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 3ae899805f8b..5da9552b186b 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -57,7 +57,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu);
 	 MAX_UDP_CHUNK)
 
 static void zap_completion_queue(void);
-static void netpoll_async_cleanup(struct work_struct *work);
 
 static unsigned int carrier_timeout = 4;
 module_param(carrier_timeout, uint, 0644);
@@ -589,7 +588,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
 
 	np->dev = ndev;
 	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
-	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
 
 	if (ndev->priv_flags & IFF_DISABLE_NETPOLL) {
 		np_err(np, "%s doesn't support polling, aborting\n",
@@ -788,10 +786,6 @@ void __netpoll_cleanup(struct netpoll *np)
 {
 	struct netpoll_info *npinfo;
 
-	/* rtnl_dereference would be preferable here but
-	 * rcu_cleanup_netpoll path can put us in here safely without
-	 * holding the rtnl, so plain rcu_dereference it is
-	 */
 	npinfo = rtnl_dereference(np->dev->npinfo);
 	if (!npinfo)
 		return;
@@ -812,21 +806,16 @@ void __netpoll_cleanup(struct netpoll *np)
 }
 EXPORT_SYMBOL_GPL(__netpoll_cleanup);
 
-static void netpoll_async_cleanup(struct work_struct *work)
+void __netpoll_free(struct netpoll *np)
 {
-	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
+	ASSERT_RTNL();
 
-	rtnl_lock();
+	/* Wait for transmitting packets to finish before freeing. */
+	synchronize_rcu_bh();
 	__netpoll_cleanup(np);
-	rtnl_unlock();
 	kfree(np);
 }
-
-void __netpoll_free_async(struct netpoll *np)
-{
-	schedule_work(&np->cleanup_work);
-}
-EXPORT_SYMBOL_GPL(__netpoll_free_async);
+EXPORT_SYMBOL_GPL(__netpoll_free);
 
 void netpoll_cleanup(struct netpoll *np)
 {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 5428ef529019..7d0c19e7edcf 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -722,7 +722,7 @@ static void dsa_slave_netpoll_cleanup(struct net_device *dev)
 
 	p->netpoll = NULL;
 
-	__netpoll_free_async(netpoll);
+	__netpoll_free(netpoll);
 }
 
 static void dsa_slave_poll_controller(struct net_device *dev)
-- 
cgit v1.2.3


From bff5b4b3737219195ca0caef4ff7884303cb5dc1 Mon Sep 17 00:00:00 2001
From: Yuiko Oshino <yuiko.oshino@microchip.com>
Date: Thu, 18 Oct 2018 15:06:01 -0400
Subject: net: phy: micrel: add Microchip KSZ9131 initial driver

Add support for Microchip Technology KSZ9131 10/100/1000 Ethernet PHY

Signed-off-by: Yuiko Oshino <yuiko.oshino@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 130 ++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/micrel_phy.h |   1 +
 2 files changed, 130 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 3db06b40580d..9265dea79412 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -14,7 +14,7 @@
  * option) any later version.
  *
  * Support : Micrel Phys:
- *		Giga phys: ksz9021, ksz9031
+ *		Giga phys: ksz9021, ksz9031, ksz9131
  *		100/10 Phys : ksz8001, ksz8721, ksz8737, ksz8041
  *			   ksz8021, ksz8031, ksz8051,
  *			   ksz8081, ksz8091,
@@ -609,6 +609,116 @@ err_force_master:
 	return result;
 }
 
+#define KSZ9131_SKEW_5BIT_MAX	2400
+#define KSZ9131_SKEW_4BIT_MAX	800
+#define KSZ9131_OFFSET		700
+#define KSZ9131_STEP		100
+
+static int ksz9131_of_load_skew_values(struct phy_device *phydev,
+				       struct device_node *of_node,
+				       u16 reg, size_t field_sz,
+				       char *field[], u8 numfields)
+{
+	int val[4] = {-(1 + KSZ9131_OFFSET), -(2 + KSZ9131_OFFSET),
+		      -(3 + KSZ9131_OFFSET), -(4 + KSZ9131_OFFSET)};
+	int skewval, skewmax = 0;
+	int matches = 0;
+	u16 maxval;
+	u16 newval;
+	u16 mask;
+	int i;
+
+	/* psec properties in dts should mean x pico seconds */
+	if (field_sz == 5)
+		skewmax = KSZ9131_SKEW_5BIT_MAX;
+	else
+		skewmax = KSZ9131_SKEW_4BIT_MAX;
+
+	for (i = 0; i < numfields; i++)
+		if (!of_property_read_s32(of_node, field[i], &skewval)) {
+			if (skewval < -KSZ9131_OFFSET)
+				skewval = -KSZ9131_OFFSET;
+			else if (skewval > skewmax)
+				skewval = skewmax;
+
+			val[i] = skewval + KSZ9131_OFFSET;
+			matches++;
+		}
+
+	if (!matches)
+		return 0;
+
+	if (matches < numfields)
+		newval = ksz9031_extended_read(phydev, OP_DATA, 2, reg);
+	else
+		newval = 0;
+
+	maxval = (field_sz == 4) ? 0xf : 0x1f;
+	for (i = 0; i < numfields; i++)
+		if (val[i] != -(i + 1 + KSZ9131_OFFSET)) {
+			mask = 0xffff;
+			mask ^= maxval << (field_sz * i);
+			newval = (newval & mask) |
+				(((val[i] / KSZ9131_STEP) & maxval)
+					<< (field_sz * i));
+		}
+
+	return ksz9031_extended_write(phydev, OP_DATA, 2, reg, newval);
+}
+
+static int ksz9131_config_init(struct phy_device *phydev)
+{
+	const struct device *dev = &phydev->mdio.dev;
+	struct device_node *of_node = dev->of_node;
+	char *clk_skews[2] = {"rxc-skew-psec", "txc-skew-psec"};
+	char *rx_data_skews[4] = {
+		"rxd0-skew-psec", "rxd1-skew-psec",
+		"rxd2-skew-psec", "rxd3-skew-psec"
+	};
+	char *tx_data_skews[4] = {
+		"txd0-skew-psec", "txd1-skew-psec",
+		"txd2-skew-psec", "txd3-skew-psec"
+	};
+	char *control_skews[2] = {"txen-skew-psec", "rxdv-skew-psec"};
+	const struct device *dev_walker;
+	int ret;
+
+	dev_walker = &phydev->mdio.dev;
+	do {
+		of_node = dev_walker->of_node;
+		dev_walker = dev_walker->parent;
+	} while (!of_node && dev_walker);
+
+	if (!of_node)
+		return 0;
+
+	ret = ksz9131_of_load_skew_values(phydev, of_node,
+					  MII_KSZ9031RN_CLK_PAD_SKEW, 5,
+					  clk_skews, 2);
+	if (ret < 0)
+		return ret;
+
+	ret = ksz9131_of_load_skew_values(phydev, of_node,
+					  MII_KSZ9031RN_CONTROL_PAD_SKEW, 4,
+					  control_skews, 2);
+	if (ret < 0)
+		return ret;
+
+	ret = ksz9131_of_load_skew_values(phydev, of_node,
+					  MII_KSZ9031RN_RX_DATA_PAD_SKEW, 4,
+					  rx_data_skews, 4);
+	if (ret < 0)
+		return ret;
+
+	ret = ksz9131_of_load_skew_values(phydev, of_node,
+					  MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
+					  tx_data_skews, 4);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
 #define KSZ8873MLL_GLOBAL_CONTROL_4	0x06
 #define KSZ8873MLL_GLOBAL_CONTROL_4_DUPLEX	BIT(6)
 #define KSZ8873MLL_GLOBAL_CONTROL_4_SPEED	BIT(4)
@@ -974,6 +1084,23 @@ static struct phy_driver ksphy_driver[] = {
 	.get_stats	= kszphy_get_stats,
 	.suspend	= genphy_suspend,
 	.resume		= kszphy_resume,
+}, {
+	.phy_id		= PHY_ID_KSZ9131,
+	.phy_id_mask	= MICREL_PHY_ID_MASK,
+	.name		= "Microchip KSZ9131 Gigabit PHY",
+	.features	= PHY_GBIT_FEATURES,
+	.flags		= PHY_HAS_INTERRUPT,
+	.driver_data	= &ksz9021_type,
+	.probe		= kszphy_probe,
+	.config_init	= ksz9131_config_init,
+	.read_status	= ksz9031_read_status,
+	.ack_interrupt	= kszphy_ack_interrupt,
+	.config_intr	= kszphy_config_intr,
+	.get_sset_count = kszphy_get_sset_count,
+	.get_strings	= kszphy_get_strings,
+	.get_stats	= kszphy_get_stats,
+	.suspend	= genphy_suspend,
+	.resume		= kszphy_resume,
 }, {
 	.phy_id		= PHY_ID_KSZ8873MLL,
 	.phy_id_mask	= MICREL_PHY_ID_MASK,
@@ -1022,6 +1149,7 @@ MODULE_LICENSE("GPL");
 static struct mdio_device_id __maybe_unused micrel_tbl[] = {
 	{ PHY_ID_KSZ9021, 0x000ffffe },
 	{ PHY_ID_KSZ9031, MICREL_PHY_ID_MASK },
+	{ PHY_ID_KSZ9131, MICREL_PHY_ID_MASK },
 	{ PHY_ID_KSZ8001, 0x00fffffc },
 	{ PHY_ID_KS8737, MICREL_PHY_ID_MASK },
 	{ PHY_ID_KSZ8021, 0x00ffffff },
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 472fa4d4ea62..7361cd3fddc1 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -31,6 +31,7 @@
 #define PHY_ID_KSZ8081		0x00221560
 #define PHY_ID_KSZ8061		0x00221570
 #define PHY_ID_KSZ9031		0x00221620
+#define PHY_ID_KSZ9131		0x00221640
 
 #define PHY_ID_KSZ886X		0x00221430
 #define PHY_ID_KSZ8863		0x00221435
-- 
cgit v1.2.3


From 6fff607e2f14bd7c63c06c464a6f93b8efbabe28 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 19 Oct 2018 19:56:49 -0700
Subject: bpf: sk_msg program helper bpf_msg_push_data

This allows user to push data into a msg using sk_msg program types.
The format is as follows,

	bpf_msg_push_data(msg, offset, len, flags)

this will insert 'len' bytes at offset 'offset'. For example to
prepend 10 bytes at the front of the message the user can,

	bpf_msg_push_data(msg, 0, 10, 0);

This will invalidate data bounds so BPF user will have to then recheck
data bounds after calling this. After this the msg size will have been
updated and the user is free to write into the added bytes. We allow
any offset/len as long as it is within the (data, data_end) range.
However, a copy will be required if the ring is full and its possible
for the helper to fail with ENOMEM or EINVAL errors which need to be
handled by the BPF program.

This can be used similar to XDP metadata to pass data between sk_msg
layer and lower layers.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/skmsg.h    |   5 ++
 include/uapi/linux/bpf.h |  20 ++++++-
 net/core/filter.c        | 134 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 158 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 84e18863f6a4..2a11e9d91dfa 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -207,6 +207,11 @@ static inline struct scatterlist *sk_msg_elem(struct sk_msg *msg, int which)
 	return &msg->sg.data[which];
 }
 
+static inline struct scatterlist sk_msg_elem_cpy(struct sk_msg *msg, int which)
+{
+	return msg->sg.data[which];
+}
+
 static inline struct page *sk_msg_page(struct sk_msg *msg, int which)
 {
 	return sg_page(sk_msg_elem(msg, which));
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a2fb333290dc..852dc17ab47a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2240,6 +2240,23 @@ union bpf_attr {
  *		pointer that was returned from bpf_sk_lookup_xxx\ ().
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_push_data(struct sk_buff *skb, u32 start, u32 len, u64 flags)
+ *	Description
+ *		For socket policies, insert *len* bytes into msg at offset
+ *		*start*.
+ *
+ *		If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ *		*msg* it may want to insert metadata or options into the msg.
+ *		This can later be read and used by any of the lower layer BPF
+ *		hooks.
+ *
+ *		This helper may fail if under memory pressure (a malloc
+ *		fails) in these cases BPF programs will get an appropriate
+ *		error and BPF programs will need to handle them.
+ *
+ *	Return
+ *		0 on success, or a negative error in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2331,7 +2348,8 @@ union bpf_attr {
 	FN(sk_release),			\
 	FN(map_push_elem),		\
 	FN(map_pop_elem),		\
-	FN(map_peek_elem),
+	FN(map_peek_elem),		\
+	FN(msg_push_data),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 5fd5139e8638..35c6933c2622 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2297,6 +2297,137 @@ static const struct bpf_func_proto bpf_msg_pull_data_proto = {
 	.arg4_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
+	   u32, len, u64, flags)
+{
+	struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
+	u32 new, i = 0, l, space, copy = 0, offset = 0;
+	u8 *raw, *to, *from;
+	struct page *page;
+
+	if (unlikely(flags))
+		return -EINVAL;
+
+	/* First find the starting scatterlist element */
+	i = msg->sg.start;
+	do {
+		l = sk_msg_elem(msg, i)->length;
+
+		if (start < offset + l)
+			break;
+		offset += l;
+		sk_msg_iter_var_next(i);
+	} while (i != msg->sg.end);
+
+	if (start >= offset + l)
+		return -EINVAL;
+
+	space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
+
+	/* If no space available will fallback to copy, we need at
+	 * least one scatterlist elem available to push data into
+	 * when start aligns to the beginning of an element or two
+	 * when it falls inside an element. We handle the start equals
+	 * offset case because its the common case for inserting a
+	 * header.
+	 */
+	if (!space || (space == 1 && start != offset))
+		copy = msg->sg.data[i].length;
+
+	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
+			   get_order(copy + len));
+	if (unlikely(!page))
+		return -ENOMEM;
+
+	if (copy) {
+		int front, back;
+
+		raw = page_address(page);
+
+		psge = sk_msg_elem(msg, i);
+		front = start - offset;
+		back = psge->length - front;
+		from = sg_virt(psge);
+
+		if (front)
+			memcpy(raw, from, front);
+
+		if (back) {
+			from += front;
+			to = raw + front + len;
+
+			memcpy(to, from, back);
+		}
+
+		put_page(sg_page(psge));
+	} else if (start - offset) {
+		psge = sk_msg_elem(msg, i);
+		rsge = sk_msg_elem_cpy(msg, i);
+
+		psge->length = start - offset;
+		rsge.length -= psge->length;
+		rsge.offset += start;
+
+		sk_msg_iter_var_next(i);
+		sg_unmark_end(psge);
+		sk_msg_iter_next(msg, end);
+	}
+
+	/* Slot(s) to place newly allocated data */
+	new = i;
+
+	/* Shift one or two slots as needed */
+	if (!copy) {
+		sge = sk_msg_elem_cpy(msg, i);
+
+		sk_msg_iter_var_next(i);
+		sg_unmark_end(&sge);
+		sk_msg_iter_next(msg, end);
+
+		nsge = sk_msg_elem_cpy(msg, i);
+		if (rsge.length) {
+			sk_msg_iter_var_next(i);
+			nnsge = sk_msg_elem_cpy(msg, i);
+		}
+
+		while (i != msg->sg.end) {
+			msg->sg.data[i] = sge;
+			sge = nsge;
+			sk_msg_iter_var_next(i);
+			if (rsge.length) {
+				nsge = nnsge;
+				nnsge = sk_msg_elem_cpy(msg, i);
+			} else {
+				nsge = sk_msg_elem_cpy(msg, i);
+			}
+		}
+	}
+
+	/* Place newly allocated data buffer */
+	sk_mem_charge(msg->sk, len);
+	msg->sg.size += len;
+	msg->sg.copy[new] = false;
+	sg_set_page(&msg->sg.data[new], page, len + copy, 0);
+	if (rsge.length) {
+		get_page(sg_page(&rsge));
+		sk_msg_iter_var_next(new);
+		msg->sg.data[new] = rsge;
+	}
+
+	sk_msg_compute_data_pointers(msg);
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_msg_push_data_proto = {
+	.func		= bpf_msg_push_data,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
 {
 	return task_get_classid(skb);
@@ -4854,6 +4985,7 @@ bool bpf_helper_changes_pkt_data(void *func)
 	    func == bpf_xdp_adjust_head ||
 	    func == bpf_xdp_adjust_meta ||
 	    func == bpf_msg_pull_data ||
+	    func == bpf_msg_push_data ||
 	    func == bpf_xdp_adjust_tail ||
 #if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
 	    func == bpf_lwt_seg6_store_bytes ||
@@ -5130,6 +5262,8 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_msg_cork_bytes_proto;
 	case BPF_FUNC_msg_pull_data:
 		return &bpf_msg_pull_data_proto;
+	case BPF_FUNC_msg_push_data:
+		return &bpf_msg_push_data_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
 	default:
-- 
cgit v1.2.3


From b2c3fa546705944e748666b474ffdaebaec0569f Mon Sep 17 00:00:00 2001
From: Dennis Zhou <dennis@kernel.org>
Date: Sat, 20 Oct 2018 14:56:11 -0400
Subject: blkcg: fix edge case for blk_get_rl() under memory pressure

It is possible for blkg creation to fail when in blk_get_rl(). In this
situation, the fallback logic returns the nearest created blkg. There is
however special handling for the request_list for the root blkcg. This
fixes the missing edge case from the earlier series changing
blk_get_rl().

Fixes: e2b0989954ae ("blkcg: cleanup and make blk_get_rl use blkg_lookup_create")
Signed-off-by: Dennis Zhou <dennis@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-cgroup.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index b7fd08013de2..1e76ceebeb5d 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -597,7 +597,7 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 	if (unlikely(!blkg))
 		blkg = __blkg_lookup_create(blkcg, q);
 
-	if (!blkg_tryget(blkg))
+	if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
 		goto rl_use_root;
 
 	rcu_read_unlock();
-- 
cgit v1.2.3


From d459d853c2edc793135e4bfa4e345c758f1cc859 Mon Sep 17 00:00:00 2001
From: Dennis Zhou <dennis@kernel.org>
Date: Sat, 20 Oct 2018 14:56:12 -0400
Subject: blkcg: reassociate bios when make_request() is called recursively

When submitting a bio, multiple recursive calls to make_request() may
occur. This causes the initial associate done in blkcg_bio_issue_check()
to be incorrect and reference the prior request_queue. This introduces
a helper to do reassociation when make_request() is recursively called.

Fixes: a7b39b4e961c ("blkcg: always associate a bio with a blkg")
Reported-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: Dennis Zhou <dennis@kernel.org>
Tested-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 20 ++++++++++++++++++++
 block/blk-core.c    |  1 +
 include/linux/bio.h |  3 +++
 3 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 17a8b0aa7050..bbfeb4ee2892 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -2083,6 +2083,26 @@ int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
 	return ret;
 }
 
+/**
+ * bio_reassociate_blkg - reassociate a bio with a blkg from q
+ * @q: request_queue where bio is going
+ * @bio: target bio
+ *
+ * When submitting a bio, multiple recursive calls to make_request() may occur.
+ * This causes the initial associate done in blkcg_bio_issue_check() to be
+ * incorrect and reference the prior request_queue.  This performs reassociation
+ * when this situation happens.
+ */
+int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
+{
+	if (bio->bi_blkg) {
+		blkg_put(bio->bi_blkg);
+		bio->bi_blkg = NULL;
+	}
+
+	return bio_associate_create_blkg(q, bio);
+}
+
 /**
  * bio_disassociate_task - undo bio_associate_current()
  * @bio: target bio
diff --git a/block/blk-core.c b/block/blk-core.c
index cdfabc5646da..3ed60723e242 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2433,6 +2433,7 @@ blk_qc_t generic_make_request(struct bio *bio)
 			if (q)
 				blk_queue_exit(q);
 			q = bio->bi_disk->queue;
+			bio_reassociate_blkg(q, bio);
 			flags = 0;
 			if (bio->bi_opf & REQ_NOWAIT)
 				flags = BLK_MQ_REQ_NOWAIT;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index f447b0ebb288..b47c7f716731 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -514,6 +514,7 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
 int bio_associate_blkg_from_css(struct bio *bio,
 				struct cgroup_subsys_state *css);
 int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
+int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
 void bio_clone_blkg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
@@ -522,6 +523,8 @@ static inline int bio_associate_blkg_from_css(struct bio *bio,
 { return 0; }
 static inline int bio_associate_create_blkg(struct request_queue *q,
 					    struct bio *bio) { return 0; }
+static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
+{ return 0; }
 static inline void bio_disassociate_task(struct bio *bio) { }
 static inline void bio_clone_blkg_association(struct bio *dst,
 					      struct bio *src) { }
-- 
cgit v1.2.3


From f8d5d0cc145cc21bfc56ef807dc28102aebbf228 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 7 Nov 2017 16:30:10 -0500
Subject: xarray: Add definition of struct xarray

This is a direct replacement for struct radix_tree_root.  Some of the
struct members have changed name; convert those, and use a #define so
that radix_tree users continue to work without change.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 include/linux/radix-tree.h               | 28 ++++--------
 include/linux/xarray.h                   | 70 +++++++++++++++++++++++++++++
 lib/Makefile                             |  2 +-
 lib/idr.c                                |  4 +-
 lib/radix-tree.c                         | 75 ++++++++++++++++----------------
 lib/xarray.c                             | 44 +++++++++++++++++++
 tools/testing/radix-tree/Makefile        |  5 ++-
 tools/testing/radix-tree/linux/bug.h     |  1 +
 tools/testing/radix-tree/linux/kconfig.h |  1 +
 tools/testing/radix-tree/multiorder.c    |  6 +--
 tools/testing/radix-tree/test.c          |  6 +--
 tools/testing/radix-tree/xarray.c        |  7 +++
 12 files changed, 181 insertions(+), 68 deletions(-)
 create mode 100644 lib/xarray.c
 create mode 100644 tools/testing/radix-tree/linux/kconfig.h
 create mode 100644 tools/testing/radix-tree/xarray.c

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 60f3d8eb2cb7..0b080bd88ab7 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -30,6 +30,9 @@
 #include <linux/types.h>
 #include <linux/xarray.h>
 
+/* Keep unconverted code working */
+#define radix_tree_root		xarray
+
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
  * slot are interpreted:
@@ -92,36 +95,21 @@ struct radix_tree_node {
 	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
 };
 
-/* The IDR tag is stored in the low bits of the GFP flags */
+/* The IDR tag is stored in the low bits of xa_flags */
 #define ROOT_IS_IDR	((__force gfp_t)4)
-/* The top bits of gfp_mask are used to store the root tags */
+/* The top bits of xa_flags are used to store the root tags */
 #define ROOT_TAG_SHIFT	(__GFP_BITS_SHIFT)
 
-struct radix_tree_root {
-	spinlock_t		xa_lock;
-	gfp_t			gfp_mask;
-	struct radix_tree_node	__rcu *rnode;
-};
-
-#define RADIX_TREE_INIT(name, mask)	{				\
-	.xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),			\
-	.gfp_mask = (mask),						\
-	.rnode = NULL,							\
-}
+#define RADIX_TREE_INIT(name, mask)	XARRAY_INIT(name, mask)
 
 #define RADIX_TREE(name, mask) \
 	struct radix_tree_root name = RADIX_TREE_INIT(name, mask)
 
-#define INIT_RADIX_TREE(root, mask)					\
-do {									\
-	spin_lock_init(&(root)->xa_lock);				\
-	(root)->gfp_mask = (mask);					\
-	(root)->rnode = NULL;						\
-} while (0)
+#define INIT_RADIX_TREE(root, mask) xa_init_flags(root, mask)
 
 static inline bool radix_tree_empty(const struct radix_tree_root *root)
 {
-	return root->rnode == NULL;
+	return root->xa_head == NULL;
 }
 
 /**
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 4d1cd7a083e8..9122cf8bf52a 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -10,6 +10,8 @@
  */
 
 #include <linux/bug.h>
+#include <linux/compiler.h>
+#include <linux/kconfig.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -153,6 +155,74 @@ static inline bool xa_is_internal(const void *entry)
 	return ((unsigned long)entry & 3) == 2;
 }
 
+/**
+ * struct xarray - The anchor of the XArray.
+ * @xa_lock: Lock that protects the contents of the XArray.
+ *
+ * To use the xarray, define it statically or embed it in your data structure.
+ * It is a very small data structure, so it does not usually make sense to
+ * allocate it separately and keep a pointer to it in your data structure.
+ *
+ * You may use the xa_lock to protect your own data structures as well.
+ */
+/*
+ * If all of the entries in the array are NULL, @xa_head is a NULL pointer.
+ * If the only non-NULL entry in the array is at index 0, @xa_head is that
+ * entry.  If any other entry in the array is non-NULL, @xa_head points
+ * to an @xa_node.
+ */
+struct xarray {
+	spinlock_t	xa_lock;
+/* private: The rest of the data structure is not to be used directly. */
+	gfp_t		xa_flags;
+	void __rcu *	xa_head;
+};
+
+#define XARRAY_INIT(name, flags) {				\
+	.xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock),		\
+	.xa_flags = flags,					\
+	.xa_head = NULL,					\
+}
+
+/**
+ * DEFINE_XARRAY_FLAGS() - Define an XArray with custom flags.
+ * @name: A string that names your XArray.
+ * @flags: XA_FLAG values.
+ *
+ * This is intended for file scope definitions of XArrays.  It declares
+ * and initialises an empty XArray with the chosen name and flags.  It is
+ * equivalent to calling xa_init_flags() on the array, but it does the
+ * initialisation at compiletime instead of runtime.
+ */
+#define DEFINE_XARRAY_FLAGS(name, flags)				\
+	struct xarray name = XARRAY_INIT(name, flags)
+
+/**
+ * DEFINE_XARRAY() - Define an XArray.
+ * @name: A string that names your XArray.
+ *
+ * This is intended for file scope definitions of XArrays.  It declares
+ * and initialises an empty XArray with the chosen name.  It is equivalent
+ * to calling xa_init() on the array, but it does the initialisation at
+ * compiletime instead of runtime.
+ */
+#define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)
+
+void xa_init_flags(struct xarray *, gfp_t flags);
+
+/**
+ * xa_init() - Initialise an empty XArray.
+ * @xa: XArray.
+ *
+ * An empty XArray is full of NULL entries.
+ *
+ * Context: Any context.
+ */
+static inline void xa_init(struct xarray *xa)
+{
+	xa_init_flags(xa, 0);
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
diff --git a/lib/Makefile b/lib/Makefile
index ca3f7ebb900d..057385f1f145 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n
 KCOV_INSTRUMENT_dynamic_debug.o := n
 
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
-	 rbtree.o radix-tree.o timerqueue.o\
+	 rbtree.o radix-tree.o timerqueue.o xarray.o \
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o chacha20.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
diff --git a/lib/idr.c b/lib/idr.c
index 88419fbc5737..9a366b5be2c2 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -39,8 +39,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
 	unsigned int base = idr->idr_base;
 	unsigned int id = *nextid;
 
-	if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
-		idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
+	if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
+		idr->idr_rt.xa_flags |= IDR_RT_MARKER;
 
 	id = (id < base) ? 0 : id - base;
 	radix_tree_iter_init(&iter, id);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 59b28111eabc..299d4bdba109 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -124,7 +124,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 
 static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
 {
-	return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
+	return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
 }
 
 static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -147,32 +147,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
 
 static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
 {
-	root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
+	root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
 {
-	root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
+	root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline void root_tag_clear_all(struct radix_tree_root *root)
 {
-	root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
+	root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
 }
 
 static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
 {
-	return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
+	return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
 }
 
 static inline unsigned root_tags_get(const struct radix_tree_root *root)
 {
-	return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
+	return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
 }
 
 static inline bool is_idr(const struct radix_tree_root *root)
 {
-	return !!(root->gfp_mask & ROOT_IS_IDR);
+	return !!(root->xa_flags & ROOT_IS_IDR);
 }
 
 /*
@@ -291,12 +291,12 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 /* For debug */
 static void radix_tree_dump(struct radix_tree_root *root)
 {
-	pr_debug("radix root: %p rnode %p tags %x\n",
-			root, root->rnode,
-			root->gfp_mask >> ROOT_TAG_SHIFT);
-	if (!radix_tree_is_internal_node(root->rnode))
+	pr_debug("radix root: %p xa_head %p tags %x\n",
+			root, root->xa_head,
+			root->xa_flags >> ROOT_TAG_SHIFT);
+	if (!radix_tree_is_internal_node(root->xa_head))
 		return;
-	dump_node(entry_to_node(root->rnode), 0);
+	dump_node(entry_to_node(root->xa_head), 0);
 }
 
 static void dump_ida_node(void *entry, unsigned long index)
@@ -340,9 +340,9 @@ static void dump_ida_node(void *entry, unsigned long index)
 static void ida_dump(struct ida *ida)
 {
 	struct radix_tree_root *root = &ida->ida_rt;
-	pr_debug("ida: %p node %p free %d\n", ida, root->rnode,
-				root->gfp_mask >> ROOT_TAG_SHIFT);
-	dump_ida_node(root->rnode, 0);
+	pr_debug("ida: %p node %p free %d\n", ida, root->xa_head,
+				root->xa_flags >> ROOT_TAG_SHIFT);
+	dump_ida_node(root->xa_head, 0);
 }
 #endif
 
@@ -576,7 +576,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
 static unsigned radix_tree_load_root(const struct radix_tree_root *root,
 		struct radix_tree_node **nodep, unsigned long *maxindex)
 {
-	struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+	struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
 
 	*nodep = node;
 
@@ -605,7 +605,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 	while (index > shift_maxindex(maxshift))
 		maxshift += RADIX_TREE_MAP_SHIFT;
 
-	entry = rcu_dereference_raw(root->rnode);
+	entry = rcu_dereference_raw(root->xa_head);
 	if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
 		goto out;
 
@@ -633,7 +633,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		if (radix_tree_is_internal_node(entry)) {
 			entry_to_node(entry)->parent = node;
 		} else if (xa_is_value(entry)) {
-			/* Moving an exceptional root->rnode to a node */
+			/* Moving an exceptional root->xa_head to a node */
 			node->exceptional = 1;
 		}
 		/*
@@ -642,7 +642,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		 */
 		node->slots[0] = (void __rcu *)entry;
 		entry = node_to_entry(node);
-		rcu_assign_pointer(root->rnode, entry);
+		rcu_assign_pointer(root->xa_head, entry);
 		shift += RADIX_TREE_MAP_SHIFT;
 	} while (shift <= maxshift);
 out:
@@ -659,7 +659,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 	bool shrunk = false;
 
 	for (;;) {
-		struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+		struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
 		struct radix_tree_node *child;
 
 		if (!radix_tree_is_internal_node(node))
@@ -695,9 +695,9 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		 * moving the node from one part of the tree to another: if it
 		 * was safe to dereference the old pointer to it
 		 * (node->slots[0]), it will be safe to dereference the new
-		 * one (root->rnode) as far as dependent read barriers go.
+		 * one (root->xa_head) as far as dependent read barriers go.
 		 */
-		root->rnode = (void __rcu *)child;
+		root->xa_head = (void __rcu *)child;
 		if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
 			root_tag_clear(root, IDR_FREE);
 
@@ -745,9 +745,8 @@ static bool delete_node(struct radix_tree_root *root,
 
 		if (node->count) {
 			if (node_to_entry(node) ==
-					rcu_dereference_raw(root->rnode))
-				deleted |= radix_tree_shrink(root,
-								update_node);
+					rcu_dereference_raw(root->xa_head))
+				deleted |= radix_tree_shrink(root, update_node);
 			return deleted;
 		}
 
@@ -762,7 +761,7 @@ static bool delete_node(struct radix_tree_root *root,
 			 */
 			if (!is_idr(root))
 				root_tag_clear_all(root);
-			root->rnode = NULL;
+			root->xa_head = NULL;
 		}
 
 		WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -787,7 +786,7 @@ static bool delete_node(struct radix_tree_root *root,
  *	at position @index in the radix tree @root.
  *
  *	Until there is more than one item in the tree, no nodes are
- *	allocated and @root->rnode is used as a direct slot instead of
+ *	allocated and @root->xa_head is used as a direct slot instead of
  *	pointing to a node, in which case *@nodep will be NULL.
  *
  *	Returns -ENOMEM, or 0 for success.
@@ -797,7 +796,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 			void __rcu ***slotp)
 {
 	struct radix_tree_node *node = NULL, *child;
-	void __rcu **slot = (void __rcu **)&root->rnode;
+	void __rcu **slot = (void __rcu **)&root->xa_head;
 	unsigned long maxindex;
 	unsigned int shift, offset = 0;
 	unsigned long max = index | ((1UL << order) - 1);
@@ -813,7 +812,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
 		if (error < 0)
 			return error;
 		shift = error;
-		child = rcu_dereference_raw(root->rnode);
+		child = rcu_dereference_raw(root->xa_head);
 	}
 
 	while (shift > order) {
@@ -1004,7 +1003,7 @@ EXPORT_SYMBOL(__radix_tree_insert);
  *	tree @root.
  *
  *	Until there is more than one item in the tree, no nodes are
- *	allocated and @root->rnode is used as a direct slot instead of
+ *	allocated and @root->xa_head is used as a direct slot instead of
  *	pointing to a node, in which case *@nodep will be NULL.
  */
 void *__radix_tree_lookup(const struct radix_tree_root *root,
@@ -1017,7 +1016,7 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
 
  restart:
 	parent = NULL;
-	slot = (void __rcu **)&root->rnode;
+	slot = (void __rcu **)&root->xa_head;
 	radix_tree_load_root(root, &node, &maxindex);
 	if (index > maxindex)
 		return NULL;
@@ -1168,9 +1167,9 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	/*
 	 * This function supports replacing exceptional entries and
 	 * deleting entries, but that needs accounting against the
-	 * node unless the slot is root->rnode.
+	 * node unless the slot is root->xa_head.
 	 */
-	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) &&
+	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
 			(count || exceptional));
 	replace_slot(slot, item, node, count, exceptional);
 
@@ -1722,7 +1721,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 		iter->tags = 1;
 		iter->node = NULL;
 		__set_iter_shift(iter, 0);
-		return (void __rcu **)&root->rnode;
+		return (void __rcu **)&root->xa_head;
 	}
 
 	do {
@@ -2109,7 +2108,7 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
 			      unsigned long max)
 {
 	struct radix_tree_node *node = NULL, *child;
-	void __rcu **slot = (void __rcu **)&root->rnode;
+	void __rcu **slot = (void __rcu **)&root->xa_head;
 	unsigned long maxindex, start = iter->next_index;
 	unsigned int shift, offset = 0;
 
@@ -2125,7 +2124,7 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
 		if (error < 0)
 			return ERR_PTR(error);
 		shift = error;
-		child = rcu_dereference_raw(root->rnode);
+		child = rcu_dereference_raw(root->xa_head);
 	}
 	if (start == 0 && shift == 0)
 		shift = RADIX_TREE_MAP_SHIFT;
@@ -2190,10 +2189,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
  */
 void idr_destroy(struct idr *idr)
 {
-	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
+	struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
 	if (radix_tree_is_internal_node(node))
 		radix_tree_free_nodes(node);
-	idr->idr_rt.rnode = NULL;
+	idr->idr_rt.xa_head = NULL;
 	root_tag_set(&idr->idr_rt, IDR_FREE);
 }
 EXPORT_SYMBOL(idr_destroy);
diff --git a/lib/xarray.c b/lib/xarray.c
new file mode 100644
index 000000000000..862f4c64c754
--- /dev/null
+++ b/lib/xarray.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * XArray implementation
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/export.h>
+#include <linux/xarray.h>
+
+/*
+ * Coding conventions in this file:
+ *
+ * @xa is used to refer to the entire xarray.
+ * @xas is the 'xarray operation state'.  It may be either a pointer to
+ * an xa_state, or an xa_state stored on the stack.  This is an unfortunate
+ * ambiguity.
+ * @index is the index of the entry being operated on
+ * @mark is an xa_mark_t; a small number indicating one of the mark bits.
+ * @node refers to an xa_node; usually the primary one being operated on by
+ * this function.
+ * @offset is the index into the slots array inside an xa_node.
+ * @parent refers to the @xa_node closer to the head than @node.
+ * @entry refers to something stored in a slot in the xarray
+ */
+
+/**
+ * xa_init_flags() - Initialise an empty XArray with flags.
+ * @xa: XArray.
+ * @flags: XA_FLAG values.
+ *
+ * If you need to initialise an XArray with special flags (eg you need
+ * to take the lock from interrupt context), use this function instead
+ * of xa_init().
+ *
+ * Context: Any context.
+ */
+void xa_init_flags(struct xarray *xa, gfp_t flags)
+{
+	spin_lock_init(&xa->xa_lock);
+	xa->xa_flags = flags;
+	xa->xa_head = NULL;
+}
+EXPORT_SYMBOL(xa_init_flags);
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 12adcf9ffe86..c0cf1c79efd5 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -5,7 +5,7 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder
-CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
@@ -35,6 +35,7 @@ vpath %.c ../../lib
 $(OFILES): Makefile *.h */*.h generated/map-shift.h \
 	../../include/linux/*.h \
 	../../include/asm/*.h \
+	../../../include/linux/xarray.h \
 	../../../include/linux/radix-tree.h \
 	../../../include/linux/idr.h
 
@@ -44,6 +45,8 @@ radix-tree.c: ../../../lib/radix-tree.c
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
 
+xarray.o: ../../../lib/xarray.c
+
 generated/map-shift.h:
 	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
 		echo "#define XA_CHUNK_SHIFT $(SHIFT)" >		\
diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h
index 23b8ed52f8c8..03dc8a57eb99 100644
--- a/tools/testing/radix-tree/linux/bug.h
+++ b/tools/testing/radix-tree/linux/bug.h
@@ -1 +1,2 @@
+#include <stdio.h>
 #include "asm/bug.h"
diff --git a/tools/testing/radix-tree/linux/kconfig.h b/tools/testing/radix-tree/linux/kconfig.h
new file mode 100644
index 000000000000..6c8675859913
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kconfig.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/kconfig.h"
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 2b4f4dba1882..080aea450430 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -192,13 +192,13 @@ static void multiorder_shrink(unsigned long index, int order)
 
 	assert(item_insert_order(&tree, 0, order) == 0);
 
-	node = tree.rnode;
+	node = tree.xa_head;
 
 	assert(item_insert(&tree, index) == 0);
-	assert(node != tree.rnode);
+	assert(node != tree.xa_head);
 
 	assert(item_delete(&tree, index) != 0);
-	assert(node == tree.rnode);
+	assert(node == tree.xa_head);
 
 	for (i = 0; i < max; i++) {
 		struct item *item = item_lookup(&tree, i);
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index 62de66c314b7..70ddf964d51c 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -281,7 +281,7 @@ static int verify_node(struct radix_tree_node *slot, unsigned int tag,
 
 void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag)
 {
-	struct radix_tree_node *node = root->rnode;
+	struct radix_tree_node *node = root->xa_head;
 	if (!radix_tree_is_internal_node(node))
 		return;
 	verify_node(node, tag, !!root_tag_get(root, tag));
@@ -311,13 +311,13 @@ void item_kill_tree(struct radix_tree_root *root)
 		}
 	}
 	assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0);
-	assert(root->rnode == NULL);
+	assert(root->xa_head == NULL);
 }
 
 void tree_verify_min_height(struct radix_tree_root *root, int maxindex)
 {
 	unsigned shift;
-	struct radix_tree_node *node = root->rnode;
+	struct radix_tree_node *node = root->xa_head;
 	if (!radix_tree_is_internal_node(node)) {
 		assert(maxindex == 0);
 		return;
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
new file mode 100644
index 000000000000..9bbd667172a7
--- /dev/null
+++ b/tools/testing/radix-tree/xarray.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * xarray.c: Userspace shim for XArray test-suite
+ * Copyright (c) 2018 Matthew Wilcox <willy@infradead.org>
+ */
+
+#include "../../../lib/xarray.c"
-- 
cgit v1.2.3


From 01959dfe771c6893365482ec78dc1d9cbbbe6de8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Thu, 9 Nov 2017 09:23:56 -0500
Subject: xarray: Define struct xa_node

This is a direct replacement for struct radix_tree_node.  A couple of
struct members have changed name, so convert those.  Use a #define so
that radix tree users continue to work without change.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
 include/linux/radix-tree.h            | 29 +++------------------
 include/linux/xarray.h                | 27 ++++++++++++++++++++
 lib/radix-tree.c                      | 48 +++++++++++++++++------------------
 mm/workingset.c                       | 16 ++++++------
 tools/testing/radix-tree/multiorder.c | 30 +++++++++++-----------
 5 files changed, 77 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 0b080bd88ab7..15388b7e38b9 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -32,6 +32,7 @@
 
 /* Keep unconverted code working */
 #define radix_tree_root		xarray
+#define radix_tree_node		xa_node
 
 /*
  * The bottom two bits of the slot determine how the remaining bits in the
@@ -60,41 +61,17 @@ static inline bool radix_tree_is_internal_node(void *ptr)
 
 /*** radix-tree API starts here ***/
 
-#define RADIX_TREE_MAX_TAGS 3
-
 #define RADIX_TREE_MAP_SHIFT	XA_CHUNK_SHIFT
 #define RADIX_TREE_MAP_SIZE	(1UL << RADIX_TREE_MAP_SHIFT)
 #define RADIX_TREE_MAP_MASK	(RADIX_TREE_MAP_SIZE-1)
 
-#define RADIX_TREE_TAG_LONGS	\
-	((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG)
+#define RADIX_TREE_MAX_TAGS	XA_MAX_MARKS
+#define RADIX_TREE_TAG_LONGS	XA_MARK_LONGS
 
 #define RADIX_TREE_INDEX_BITS  (8 /* CHAR_BIT */ * sizeof(unsigned long))
 #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
 					  RADIX_TREE_MAP_SHIFT))
 
-/*
- * @count is the count of every non-NULL element in the ->slots array
- * whether that is a value entry, a retry entry, a user pointer,
- * a sibling entry or a pointer to the next level of the tree.
- * @exceptional is the count of every element in ->slots which is
- * either a value entry or a sibling of a value entry.
- */
-struct radix_tree_node {
-	unsigned char	shift;		/* Bits remaining in each slot */
-	unsigned char	offset;		/* Slot offset in parent */
-	unsigned char	count;		/* Total entry count */
-	unsigned char	exceptional;	/* Exceptional entry count */
-	struct radix_tree_node *parent;		/* Used when ascending tree */
-	struct radix_tree_root *root;		/* The tree we belong to */
-	union {
-		struct list_head private_list;	/* For tree user */
-		struct rcu_head	rcu_head;	/* Used when freeing node */
-	};
-	void __rcu	*slots[RADIX_TREE_MAP_SIZE];
-	unsigned long	tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
-};
-
 /* The IDR tag is stored in the low bits of xa_flags */
 #define ROOT_IS_IDR	((__force gfp_t)4)
 /* The top bits of xa_flags are used to store the root tags */
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 9122cf8bf52a..52141dfc5a90 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -252,6 +252,33 @@ static inline void xa_init(struct xarray *xa)
 #endif
 #define XA_CHUNK_SIZE		(1UL << XA_CHUNK_SHIFT)
 #define XA_CHUNK_MASK		(XA_CHUNK_SIZE - 1)
+#define XA_MAX_MARKS		3
+#define XA_MARK_LONGS		DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG)
+
+/*
+ * @count is the count of every non-NULL element in the ->slots array
+ * whether that is a value entry, a retry entry, a user pointer,
+ * a sibling entry or a pointer to the next level of the tree.
+ * @nr_values is the count of every element in ->slots which is
+ * either a value entry or a sibling of a value entry.
+ */
+struct xa_node {
+	unsigned char	shift;		/* Bits remaining in each slot */
+	unsigned char	offset;		/* Slot offset in parent */
+	unsigned char	count;		/* Total entry count */
+	unsigned char	nr_values;	/* Value entry count */
+	struct xa_node __rcu *parent;	/* NULL at top of tree */
+	struct xarray	*array;		/* The array we belong to */
+	union {
+		struct list_head private_list;	/* For tree user */
+		struct rcu_head	rcu_head;	/* Used when freeing node */
+	};
+	void __rcu	*slots[XA_CHUNK_SIZE];
+	union {
+		unsigned long	tags[XA_MAX_MARKS][XA_MARK_LONGS];
+		unsigned long	marks[XA_MAX_MARKS][XA_MARK_LONGS];
+	};
+};
 
 /* Private */
 static inline bool xa_is_node(const void *entry)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 299d4bdba109..8a568cea1237 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -260,11 +260,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
 	unsigned long i;
 
-	pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n",
+	pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d nr_values %d\n",
 		node, node->offset, index, index | node_maxindex(node),
 		node->parent,
 		node->tags[0][0], node->tags[1][0], node->tags[2][0],
-		node->shift, node->count, node->exceptional);
+		node->shift, node->count, node->nr_values);
 
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		unsigned long first = index | (i << node->shift);
@@ -354,7 +354,7 @@ static struct radix_tree_node *
 radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
 			struct radix_tree_root *root,
 			unsigned int shift, unsigned int offset,
-			unsigned int count, unsigned int exceptional)
+			unsigned int count, unsigned int nr_values)
 {
 	struct radix_tree_node *ret = NULL;
 
@@ -401,9 +401,9 @@ out:
 		ret->shift = shift;
 		ret->offset = offset;
 		ret->count = count;
-		ret->exceptional = exceptional;
+		ret->nr_values = nr_values;
 		ret->parent = parent;
-		ret->root = root;
+		ret->array = root;
 	}
 	return ret;
 }
@@ -633,8 +633,8 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
 		if (radix_tree_is_internal_node(entry)) {
 			entry_to_node(entry)->parent = node;
 		} else if (xa_is_value(entry)) {
-			/* Moving an exceptional root->xa_head to a node */
-			node->exceptional = 1;
+			/* Moving a value entry root->xa_head to a node */
+			node->nr_values = 1;
 		}
 		/*
 		 * entry was already in the radix tree, so we do not need
@@ -928,12 +928,12 @@ static inline int insert_entries(struct radix_tree_node *node,
 		if (xa_is_node(old))
 			radix_tree_free_nodes(old);
 		if (xa_is_value(old))
-			node->exceptional--;
+			node->nr_values--;
 	}
 	if (node) {
 		node->count += n;
 		if (xa_is_value(item))
-			node->exceptional += n;
+			node->nr_values += n;
 	}
 	return n;
 }
@@ -947,7 +947,7 @@ static inline int insert_entries(struct radix_tree_node *node,
 	if (node) {
 		node->count++;
 		if (xa_is_value(item))
-			node->exceptional++;
+			node->nr_values++;
 	}
 	return 1;
 }
@@ -1083,7 +1083,7 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 EXPORT_SYMBOL(radix_tree_lookup);
 
 static inline void replace_sibling_entries(struct radix_tree_node *node,
-				void __rcu **slot, int count, int exceptional)
+				void __rcu **slot, int count, int values)
 {
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 	unsigned offset = get_slot_offset(node, slot);
@@ -1096,18 +1096,18 @@ static inline void replace_sibling_entries(struct radix_tree_node *node,
 			node->slots[offset] = NULL;
 			node->count--;
 		}
-		node->exceptional += exceptional;
+		node->nr_values += values;
 	}
 #endif
 }
 
 static void replace_slot(void __rcu **slot, void *item,
-		struct radix_tree_node *node, int count, int exceptional)
+		struct radix_tree_node *node, int count, int values)
 {
-	if (node && (count || exceptional)) {
+	if (node && (count || values)) {
 		node->count += count;
-		node->exceptional += exceptional;
-		replace_sibling_entries(node, slot, count, exceptional);
+		node->nr_values += values;
+		replace_sibling_entries(node, slot, count, values);
 	}
 
 	rcu_assign_pointer(*slot, item);
@@ -1161,17 +1161,17 @@ void __radix_tree_replace(struct radix_tree_root *root,
 			  radix_tree_update_node_t update_node)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional = !!xa_is_value(item) - !!xa_is_value(old);
+	int values = !!xa_is_value(item) - !!xa_is_value(old);
 	int count = calculate_count(root, node, slot, item, old);
 
 	/*
-	 * This function supports replacing exceptional entries and
+	 * This function supports replacing value entries and
 	 * deleting entries, but that needs accounting against the
 	 * node unless the slot is root->xa_head.
 	 */
 	WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
-			(count || exceptional));
-	replace_slot(slot, item, node, count, exceptional);
+			(count || values));
+	replace_slot(slot, item, node, count, values);
 
 	if (!node)
 		return;
@@ -1193,7 +1193,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
  * across slot lookup and replacement.
  *
  * NOTE: This cannot be used to switch between non-entries (empty slots),
- * regular entries, and exceptional entries, as that requires accounting
+ * regular entries, and value entries, as that requires accounting
  * inside the radix tree node. When switching from one type of entry or
  * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
  * radix_tree_iter_replace().
@@ -1301,7 +1301,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index,
 		rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
 	}
 	rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
-	parent->exceptional -= (end - offset);
+	parent->nr_values -= (end - offset);
 
 	if (order == parent->shift)
 		return 0;
@@ -1961,7 +1961,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 				struct radix_tree_node *node, void __rcu **slot)
 {
 	void *old = rcu_dereference_raw(*slot);
-	int exceptional = xa_is_value(old) ? -1 : 0;
+	int values = xa_is_value(old) ? -1 : 0;
 	unsigned offset = get_slot_offset(node, slot);
 	int tag;
 
@@ -1971,7 +1971,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
 			node_tag_clear(root, node, tag, offset);
 
-	replace_slot(slot, NULL, node, -1, exceptional);
+	replace_slot(slot, NULL, node, -1, values);
 	return node && delete_node(root, node, NULL);
 }
 
diff --git a/mm/workingset.c b/mm/workingset.c
index bb109b3afac2..c201cbb8c00f 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -349,7 +349,7 @@ void workingset_update_node(struct radix_tree_node *node)
 	 * already where they should be. The list_empty() test is safe
 	 * as node->private_list is protected by the i_pages lock.
 	 */
-	if (node->count && node->count == node->exceptional) {
+	if (node->count && node->count == node->nr_values) {
 		if (list_empty(&node->private_list))
 			list_lru_add(&shadow_nodes, &node->private_list);
 	} else {
@@ -428,8 +428,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * to reclaim, take the node off-LRU, and drop the lru_lock.
 	 */
 
-	node = container_of(item, struct radix_tree_node, private_list);
-	mapping = container_of(node->root, struct address_space, i_pages);
+	node = container_of(item, struct xa_node, private_list);
+	mapping = container_of(node->array, struct address_space, i_pages);
 
 	/* Coming from the list, invert the lock order */
 	if (!xa_trylock(&mapping->i_pages)) {
@@ -446,25 +446,25 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * no pages, so we expect to be able to remove them all and
 	 * delete and free the empty node afterwards.
 	 */
-	if (WARN_ON_ONCE(!node->exceptional))
+	if (WARN_ON_ONCE(!node->nr_values))
 		goto out_invalid;
-	if (WARN_ON_ONCE(node->count != node->exceptional))
+	if (WARN_ON_ONCE(node->count != node->nr_values))
 		goto out_invalid;
 	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
 		if (node->slots[i]) {
 			if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
 				goto out_invalid;
-			if (WARN_ON_ONCE(!node->exceptional))
+			if (WARN_ON_ONCE(!node->nr_values))
 				goto out_invalid;
 			if (WARN_ON_ONCE(!mapping->nrexceptional))
 				goto out_invalid;
 			node->slots[i] = NULL;
-			node->exceptional--;
+			node->nr_values--;
 			node->count--;
 			mapping->nrexceptional--;
 		}
 	}
-	if (WARN_ON_ONCE(node->exceptional))
+	if (WARN_ON_ONCE(node->nr_values))
 		goto out_invalid;
 	inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
 	__radix_tree_delete_node(&mapping->i_pages, node,
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 080aea450430..60786fa55302 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -393,7 +393,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 	radix_tree_insert(&tree, 1 << order2, xa_mk_value(5));
 	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
 	assert(item2 == xa_mk_value(5));
-	assert(node->exceptional == 1);
+	assert(node->nr_values == 1);
 
 	item2 = radix_tree_lookup(&tree, 0);
 	free(item2);
@@ -401,7 +401,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
 	radix_tree_join(&tree, 0, order1, item1);
 	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
 	assert(item2 == item1);
-	assert(node->exceptional == 0);
+	assert(node->nr_values == 0);
 	item_kill_tree(&tree);
 }
 
@@ -409,7 +409,7 @@ static void multiorder_join2(unsigned order1, unsigned order2)
  * This test revealed an accounting bug for value entries at one point.
  * Nodes were being freed back into the pool with an elevated exception count
  * by radix_tree_join() and then radix_tree_split() was failing to zero the
- * count of exceptional entries.
+ * count of value entries.
  */
 static void multiorder_join3(unsigned int order)
 {
@@ -433,7 +433,7 @@ static void multiorder_join3(unsigned int order)
 	}
 
 	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->exceptional == node->count);
+	assert(node->nr_values == node->count);
 
 	item_kill_tree(&tree);
 }
@@ -520,7 +520,7 @@ static void __multiorder_split2(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == xa_mk_value(5));
-	assert(node->exceptional > 0);
+	assert(node->nr_values > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
@@ -530,7 +530,7 @@ static void __multiorder_split2(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item != xa_mk_value(5));
-	assert(node->exceptional == 0);
+	assert(node->nr_values == 0);
 
 	item_kill_tree(&tree);
 }
@@ -547,7 +547,7 @@ static void __multiorder_split3(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == xa_mk_value(5));
-	assert(node->exceptional > 0);
+	assert(node->nr_values > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
@@ -556,7 +556,7 @@ static void __multiorder_split3(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == xa_mk_value(7));
-	assert(node->exceptional > 0);
+	assert(node->nr_values > 0);
 
 	item_kill_tree(&tree);
 
@@ -564,7 +564,7 @@ static void __multiorder_split3(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 0, &node, NULL);
 	assert(item == xa_mk_value(5));
-	assert(node->exceptional > 0);
+	assert(node->nr_values > 0);
 
 	radix_tree_split(&tree, 0, new_order);
 	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
@@ -577,13 +577,13 @@ static void __multiorder_split3(int old_order, int new_order)
 
 	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
 	assert(item == xa_mk_value(7));
-	assert(node->count == node->exceptional);
+	assert(node->count == node->nr_values);
 	do {
 		node = node->parent;
 		if (!node)
 			break;
 		assert(node->count == 1);
-		assert(node->exceptional == 0);
+		assert(node->nr_values == 0);
 	} while (1);
 
 	item_kill_tree(&tree);
@@ -611,15 +611,15 @@ static void multiorder_account(void)
 
 	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->count == node->exceptional * 2);
+	assert(node->count == node->nr_values * 2);
 	radix_tree_delete(&tree, 1 << 5);
-	assert(node->exceptional == 0);
+	assert(node->nr_values == 0);
 
 	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
-	assert(node->count == node->exceptional * 2);
+	assert(node->count == node->nr_values * 2);
 	__radix_tree_replace(&tree, node, slot, NULL, NULL);
-	assert(node->exceptional == 0);
+	assert(node->nr_values == 0);
 
 	item_kill_tree(&tree);
 }
-- 
cgit v1.2.3


From ad3d6c7263e368abdc151e1cc13dc78aa39cc7a7 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 7 Nov 2017 14:57:46 -0500
Subject: xarray: Add XArray load operation

The xa_load function brings with it a lot of infrastructure; xa_empty(),
xa_is_err(), and large chunks of the XArray advanced API that are used
to implement xa_load.

As the test-suite demonstrates, it is possible to use the XArray functions
on a radix tree.  The radix tree functions depend on the GFP flags being
stored in the root of the tree, so it's not possible to use the radix
tree functions on an XArray.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h                    | 336 ++++++++++++++++++++++++++++++
 lib/Kconfig.debug                         |   3 +
 lib/Makefile                              |   1 +
 lib/radix-tree.c                          |  43 ----
 lib/test_xarray.c                         |  87 ++++++++
 lib/xarray.c                              | 195 +++++++++++++++++
 tools/include/linux/kernel.h              |   1 +
 tools/testing/radix-tree/.gitignore       |   1 +
 tools/testing/radix-tree/Makefile         |   6 +-
 tools/testing/radix-tree/linux/kernel.h   |   1 +
 tools/testing/radix-tree/linux/rcupdate.h |   2 +
 tools/testing/radix-tree/main.c           |   1 +
 tools/testing/radix-tree/test.h           |   1 +
 tools/testing/radix-tree/xarray.c         |  28 +++
 14 files changed, 661 insertions(+), 45 deletions(-)
 create mode 100644 lib/test_xarray.c

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 52141dfc5a90..a0df8217068c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -12,6 +12,8 @@
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/kconfig.h>
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
@@ -30,6 +32,10 @@
  *
  * 0-62: Sibling entries
  * 256: Retry entry
+ *
+ * Errors are also represented as internal entries, but use the negative
+ * space (-4094 to -2).  They're never stored in the slots array; only
+ * returned by the normal API.
  */
 
 #define BITS_PER_XA_VALUE	(BITS_PER_LONG - 1)
@@ -155,6 +161,42 @@ static inline bool xa_is_internal(const void *entry)
 	return ((unsigned long)entry & 3) == 2;
 }
 
+/**
+ * xa_is_err() - Report whether an XArray operation returned an error
+ * @entry: Result from calling an XArray function
+ *
+ * If an XArray operation cannot complete an operation, it will return
+ * a special value indicating an error.  This function tells you
+ * whether an error occurred; xa_err() tells you which error occurred.
+ *
+ * Context: Any context.
+ * Return: %true if the entry indicates an error.
+ */
+static inline bool xa_is_err(const void *entry)
+{
+	return unlikely(xa_is_internal(entry));
+}
+
+/**
+ * xa_err() - Turn an XArray result into an errno.
+ * @entry: Result from calling an XArray function.
+ *
+ * If an XArray operation cannot complete an operation, it will return
+ * a special pointer value which encodes an errno.  This function extracts
+ * the errno from the pointer value, or returns 0 if the pointer does not
+ * represent an errno.
+ *
+ * Context: Any context.
+ * Return: A negative errno or 0.
+ */
+static inline int xa_err(void *entry)
+{
+	/* xa_to_internal() would not do sign extension. */
+	if (xa_is_err(entry))
+		return (long)entry >> 2;
+	return 0;
+}
+
 /**
  * struct xarray - The anchor of the XArray.
  * @xa_lock: Lock that protects the contents of the XArray.
@@ -209,6 +251,7 @@ struct xarray {
 #define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)
 
 void xa_init_flags(struct xarray *, gfp_t flags);
+void *xa_load(struct xarray *, unsigned long index);
 
 /**
  * xa_init() - Initialise an empty XArray.
@@ -223,6 +266,18 @@ static inline void xa_init(struct xarray *xa)
 	xa_init_flags(xa, 0);
 }
 
+/**
+ * xa_empty() - Determine if an array has any present entries.
+ * @xa: XArray.
+ *
+ * Context: Any context.
+ * Return: %true if the array contains only NULL pointers.
+ */
+static inline bool xa_empty(const struct xarray *xa)
+{
+	return xa->xa_head == NULL;
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -280,6 +335,65 @@ struct xa_node {
 	};
 };
 
+void xa_dump(const struct xarray *);
+void xa_dump_node(const struct xa_node *);
+
+#ifdef XA_DEBUG
+#define XA_BUG_ON(xa, x) do {					\
+		if (x) {					\
+			xa_dump(xa);				\
+			BUG();					\
+		}						\
+	} while (0)
+#define XA_NODE_BUG_ON(node, x) do {				\
+		if (x) {					\
+			if (node) xa_dump_node(node);		\
+			BUG();					\
+		}						\
+	} while (0)
+#else
+#define XA_BUG_ON(xa, x)	do { } while (0)
+#define XA_NODE_BUG_ON(node, x)	do { } while (0)
+#endif
+
+/* Private */
+static inline void *xa_head(const struct xarray *xa)
+{
+	return rcu_dereference_check(xa->xa_head,
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline void *xa_head_locked(const struct xarray *xa)
+{
+	return rcu_dereference_protected(xa->xa_head,
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline void *xa_entry(const struct xarray *xa,
+				const struct xa_node *node, unsigned int offset)
+{
+	XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE);
+	return rcu_dereference_check(node->slots[offset],
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline void *xa_entry_locked(const struct xarray *xa,
+				const struct xa_node *node, unsigned int offset)
+{
+	XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE);
+	return rcu_dereference_protected(node->slots[offset],
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline struct xa_node *xa_to_node(const void *entry)
+{
+	return (struct xa_node *)((unsigned long)entry - 2);
+}
+
 /* Private */
 static inline bool xa_is_node(const void *entry)
 {
@@ -312,4 +426,226 @@ static inline bool xa_is_sibling(const void *entry)
 
 #define XA_RETRY_ENTRY		xa_mk_internal(256)
 
+/**
+ * xa_is_retry() - Is the entry a retry entry?
+ * @entry: Entry retrieved from the XArray
+ *
+ * Return: %true if the entry is a retry entry.
+ */
+static inline bool xa_is_retry(const void *entry)
+{
+	return unlikely(entry == XA_RETRY_ENTRY);
+}
+
+/**
+ * typedef xa_update_node_t - A callback function from the XArray.
+ * @node: The node which is being processed
+ *
+ * This function is called every time the XArray updates the count of
+ * present and value entries in a node.  It allows advanced users to
+ * maintain the private_list in the node.
+ *
+ * Context: The xa_lock is held and interrupts may be disabled.
+ *	    Implementations should not drop the xa_lock, nor re-enable
+ *	    interrupts.
+ */
+typedef void (*xa_update_node_t)(struct xa_node *node);
+
+/*
+ * The xa_state is opaque to its users.  It contains various different pieces
+ * of state involved in the current operation on the XArray.  It should be
+ * declared on the stack and passed between the various internal routines.
+ * The various elements in it should not be accessed directly, but only
+ * through the provided accessor functions.  The below documentation is for
+ * the benefit of those working on the code, not for users of the XArray.
+ *
+ * @xa_node usually points to the xa_node containing the slot we're operating
+ * on (and @xa_offset is the offset in the slots array).  If there is a
+ * single entry in the array at index 0, there are no allocated xa_nodes to
+ * point to, and so we store %NULL in @xa_node.  @xa_node is set to
+ * the value %XAS_RESTART if the xa_state is not walked to the correct
+ * position in the tree of nodes for this operation.  If an error occurs
+ * during an operation, it is set to an %XAS_ERROR value.  If we run off the
+ * end of the allocated nodes, it is set to %XAS_BOUNDS.
+ */
+struct xa_state {
+	struct xarray *xa;
+	unsigned long xa_index;
+	unsigned char xa_shift;
+	unsigned char xa_sibs;
+	unsigned char xa_offset;
+	unsigned char xa_pad;		/* Helps gcc generate better code */
+	struct xa_node *xa_node;
+	struct xa_node *xa_alloc;
+	xa_update_node_t xa_update;
+};
+
+/*
+ * We encode errnos in the xas->xa_node.  If an error has happened, we need to
+ * drop the lock to fix it, and once we've done so the xa_state is invalid.
+ */
+#define XA_ERROR(errno) ((struct xa_node *)(((unsigned long)errno << 2) | 2UL))
+#define XAS_BOUNDS	((struct xa_node *)1UL)
+#define XAS_RESTART	((struct xa_node *)3UL)
+
+#define __XA_STATE(array, index, shift, sibs)  {	\
+	.xa = array,					\
+	.xa_index = index,				\
+	.xa_shift = shift,				\
+	.xa_sibs = sibs,				\
+	.xa_offset = 0,					\
+	.xa_pad = 0,					\
+	.xa_node = XAS_RESTART,				\
+	.xa_alloc = NULL,				\
+	.xa_update = NULL				\
+}
+
+/**
+ * XA_STATE() - Declare an XArray operation state.
+ * @name: Name of this operation state (usually xas).
+ * @array: Array to operate on.
+ * @index: Initial index of interest.
+ *
+ * Declare and initialise an xa_state on the stack.
+ */
+#define XA_STATE(name, array, index)				\
+	struct xa_state name = __XA_STATE(array, index, 0, 0)
+
+/**
+ * XA_STATE_ORDER() - Declare an XArray operation state.
+ * @name: Name of this operation state (usually xas).
+ * @array: Array to operate on.
+ * @index: Initial index of interest.
+ * @order: Order of entry.
+ *
+ * Declare and initialise an xa_state on the stack.  This variant of
+ * XA_STATE() allows you to specify the 'order' of the element you
+ * want to operate on.`
+ */
+#define XA_STATE_ORDER(name, array, index, order)		\
+	struct xa_state name = __XA_STATE(array,		\
+			(index >> order) << order,		\
+			order - (order % XA_CHUNK_SHIFT),	\
+			(1U << (order % XA_CHUNK_SHIFT)) - 1)
+
+#define xas_marked(xas, mark)	xa_marked((xas)->xa, (mark))
+#define xas_trylock(xas)	xa_trylock((xas)->xa)
+#define xas_lock(xas)		xa_lock((xas)->xa)
+#define xas_unlock(xas)		xa_unlock((xas)->xa)
+#define xas_lock_bh(xas)	xa_lock_bh((xas)->xa)
+#define xas_unlock_bh(xas)	xa_unlock_bh((xas)->xa)
+#define xas_lock_irq(xas)	xa_lock_irq((xas)->xa)
+#define xas_unlock_irq(xas)	xa_unlock_irq((xas)->xa)
+#define xas_lock_irqsave(xas, flags) \
+				xa_lock_irqsave((xas)->xa, flags)
+#define xas_unlock_irqrestore(xas, flags) \
+				xa_unlock_irqrestore((xas)->xa, flags)
+
+/**
+ * xas_error() - Return an errno stored in the xa_state.
+ * @xas: XArray operation state.
+ *
+ * Return: 0 if no error has been noted.  A negative errno if one has.
+ */
+static inline int xas_error(const struct xa_state *xas)
+{
+	return xa_err(xas->xa_node);
+}
+
+/**
+ * xas_set_err() - Note an error in the xa_state.
+ * @xas: XArray operation state.
+ * @err: Negative error number.
+ *
+ * Only call this function with a negative @err; zero or positive errors
+ * will probably not behave the way you think they should.  If you want
+ * to clear the error from an xa_state, use xas_reset().
+ */
+static inline void xas_set_err(struct xa_state *xas, long err)
+{
+	xas->xa_node = XA_ERROR(err);
+}
+
+/**
+ * xas_invalid() - Is the xas in a retry or error state?
+ * @xas: XArray operation state.
+ *
+ * Return: %true if the xas cannot be used for operations.
+ */
+static inline bool xas_invalid(const struct xa_state *xas)
+{
+	return (unsigned long)xas->xa_node & 3;
+}
+
+/**
+ * xas_valid() - Is the xas a valid cursor into the array?
+ * @xas: XArray operation state.
+ *
+ * Return: %true if the xas can be used for operations.
+ */
+static inline bool xas_valid(const struct xa_state *xas)
+{
+	return !xas_invalid(xas);
+}
+
+/**
+ * xas_reset() - Reset an XArray operation state.
+ * @xas: XArray operation state.
+ *
+ * Resets the error or walk state of the @xas so future walks of the
+ * array will start from the root.  Use this if you have dropped the
+ * xarray lock and want to reuse the xa_state.
+ *
+ * Context: Any context.
+ */
+static inline void xas_reset(struct xa_state *xas)
+{
+	xas->xa_node = XAS_RESTART;
+}
+
+/**
+ * xas_retry() - Retry the operation if appropriate.
+ * @xas: XArray operation state.
+ * @entry: Entry from xarray.
+ *
+ * The advanced functions may sometimes return an internal entry, such as
+ * a retry entry or a zero entry.  This function sets up the @xas to restart
+ * the walk from the head of the array if needed.
+ *
+ * Context: Any context.
+ * Return: true if the operation needs to be retried.
+ */
+static inline bool xas_retry(struct xa_state *xas, const void *entry)
+{
+	if (!xa_is_retry(entry))
+		return false;
+	xas_reset(xas);
+	return true;
+}
+
+void *xas_load(struct xa_state *);
+
+/**
+ * xas_reload() - Refetch an entry from the xarray.
+ * @xas: XArray operation state.
+ *
+ * Use this function to check that a previously loaded entry still has
+ * the same value.  This is useful for the lockless pagecache lookup where
+ * we walk the array with only the RCU lock to protect us, lock the page,
+ * then check that the page hasn't moved since we looked it up.
+ *
+ * The caller guarantees that @xas is still valid.  If it may be in an
+ * error or restart state, call xas_load() instead.
+ *
+ * Return: The entry at this location in the xarray.
+ */
+static inline void *xas_reload(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	if (node)
+		return xa_entry(xas->xa, node, xas->xa_offset);
+	return xa_head(xas->xa);
+}
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4966c4fbe7f7..091155e12422 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1813,6 +1813,9 @@ config TEST_BITFIELD
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
+config TEST_XARRAY
+	tristate "Test the XArray code at runtime"
+
 config TEST_OVERFLOW
 	tristate "Test check_*_overflow() functions at runtime"
 
diff --git a/lib/Makefile b/lib/Makefile
index 057385f1f145..e6f809556af5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
 obj-$(CONFIG_TEST_KMOD) += test_kmod.o
 obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8a568cea1237..b8e961428484 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -256,49 +256,6 @@ static unsigned long next_index(unsigned long index,
 }
 
 #ifndef __KERNEL__
-static void dump_node(struct radix_tree_node *node, unsigned long index)
-{
-	unsigned long i;
-
-	pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d nr_values %d\n",
-		node, node->offset, index, index | node_maxindex(node),
-		node->parent,
-		node->tags[0][0], node->tags[1][0], node->tags[2][0],
-		node->shift, node->count, node->nr_values);
-
-	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
-		unsigned long first = index | (i << node->shift);
-		unsigned long last = first | ((1UL << node->shift) - 1);
-		void *entry = node->slots[i];
-		if (!entry)
-			continue;
-		if (entry == RADIX_TREE_RETRY) {
-			pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n",
-					i, first, last, node);
-		} else if (!radix_tree_is_internal_node(entry)) {
-			pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
-					entry, i, first, last, node);
-		} else if (xa_is_sibling(entry)) {
-			pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
-					entry, i, first, last, node,
-					node->slots[xa_to_sibling(entry)]);
-		} else {
-			dump_node(entry_to_node(entry), first);
-		}
-	}
-}
-
-/* For debug */
-static void radix_tree_dump(struct radix_tree_root *root)
-{
-	pr_debug("radix root: %p xa_head %p tags %x\n",
-			root, root->xa_head,
-			root->xa_flags >> ROOT_TAG_SHIFT);
-	if (!radix_tree_is_internal_node(root->xa_head))
-		return;
-	dump_node(entry_to_node(root->xa_head), 0);
-}
-
 static void dump_ida_node(void *entry, unsigned long index)
 {
 	unsigned long i;
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
new file mode 100644
index 000000000000..a7248b87617f
--- /dev/null
+++ b/lib/test_xarray.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * test_xarray.c: Test the XArray API
+ * Copyright (c) 2017-2018 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/xarray.h>
+#include <linux/module.h>
+
+static unsigned int tests_run;
+static unsigned int tests_passed;
+
+#ifndef XA_DEBUG
+# ifdef __KERNEL__
+void xa_dump(const struct xarray *xa) { }
+# endif
+#undef XA_BUG_ON
+#define XA_BUG_ON(xa, x) do {					\
+	tests_run++;						\
+	if (x) {						\
+		printk("BUG at %s:%d\n", __func__, __LINE__);	\
+		xa_dump(xa);					\
+		dump_stack();					\
+	} else {						\
+		tests_passed++;					\
+	}							\
+} while (0)
+#endif
+
+static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	radix_tree_insert(xa, index, xa_mk_value(index));
+	return NULL;
+}
+
+static void xa_erase_index(struct xarray *xa, unsigned long index)
+{
+	radix_tree_delete(xa, index);
+}
+
+static noinline void check_xa_load(struct xarray *xa)
+{
+	unsigned long i, j;
+
+	for (i = 0; i < 1024; i++) {
+		for (j = 0; j < 1024; j++) {
+			void *entry = xa_load(xa, j);
+			if (j < i)
+				XA_BUG_ON(xa, xa_to_value(entry) != j);
+			else
+				XA_BUG_ON(xa, entry);
+		}
+		XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+	}
+
+	for (i = 0; i < 1024; i++) {
+		for (j = 0; j < 1024; j++) {
+			void *entry = xa_load(xa, j);
+			if (j >= i)
+				XA_BUG_ON(xa, xa_to_value(entry) != j);
+			else
+				XA_BUG_ON(xa, entry);
+		}
+		xa_erase_index(xa, i);
+	}
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static RADIX_TREE(array, GFP_KERNEL);
+
+static int xarray_checks(void)
+{
+	check_xa_load(&array);
+
+	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
+	return (tests_run == tests_passed) ? 0 : -EINVAL;
+}
+
+static void xarray_exit(void)
+{
+}
+
+module_init(xarray_checks);
+module_exit(xarray_exit);
+MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/xarray.c b/lib/xarray.c
index 862f4c64c754..19cfcbc69a68 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -24,6 +24,100 @@
  * @entry refers to something stored in a slot in the xarray
  */
 
+/* extracts the offset within this node from the index */
+static unsigned int get_offset(unsigned long index, struct xa_node *node)
+{
+	return (index >> node->shift) & XA_CHUNK_MASK;
+}
+
+/* move the index either forwards (find) or backwards (sibling slot) */
+static void xas_move_index(struct xa_state *xas, unsigned long offset)
+{
+	unsigned int shift = xas->xa_node->shift;
+	xas->xa_index &= ~XA_CHUNK_MASK << shift;
+	xas->xa_index += offset << shift;
+}
+
+static void *set_bounds(struct xa_state *xas)
+{
+	xas->xa_node = XAS_BOUNDS;
+	return NULL;
+}
+
+/*
+ * Starts a walk.  If the @xas is already valid, we assume that it's on
+ * the right path and just return where we've got to.  If we're in an
+ * error state, return NULL.  If the index is outside the current scope
+ * of the xarray, return NULL without changing @xas->xa_node.  Otherwise
+ * set @xas->xa_node to NULL and return the current head of the array.
+ */
+static void *xas_start(struct xa_state *xas)
+{
+	void *entry;
+
+	if (xas_valid(xas))
+		return xas_reload(xas);
+	if (xas_error(xas))
+		return NULL;
+
+	entry = xa_head(xas->xa);
+	if (!xa_is_node(entry)) {
+		if (xas->xa_index)
+			return set_bounds(xas);
+	} else {
+		if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK)
+			return set_bounds(xas);
+	}
+
+	xas->xa_node = NULL;
+	return entry;
+}
+
+static void *xas_descend(struct xa_state *xas, struct xa_node *node)
+{
+	unsigned int offset = get_offset(xas->xa_index, node);
+	void *entry = xa_entry(xas->xa, node, offset);
+
+	xas->xa_node = node;
+	if (xa_is_sibling(entry)) {
+		offset = xa_to_sibling(entry);
+		entry = xa_entry(xas->xa, node, offset);
+	}
+
+	xas->xa_offset = offset;
+	return entry;
+}
+
+/**
+ * xas_load() - Load an entry from the XArray (advanced).
+ * @xas: XArray operation state.
+ *
+ * Usually walks the @xas to the appropriate state to load the entry
+ * stored at xa_index.  However, it will do nothing and return %NULL if
+ * @xas is in an error state.  xas_load() will never expand the tree.
+ *
+ * If the xa_state is set up to operate on a multi-index entry, xas_load()
+ * may return %NULL or an internal entry, even if there are entries
+ * present within the range specified by @xas.
+ *
+ * Context: Any context.  The caller should hold the xa_lock or the RCU lock.
+ * Return: Usually an entry in the XArray, but see description for exceptions.
+ */
+void *xas_load(struct xa_state *xas)
+{
+	void *entry = xas_start(xas);
+
+	while (xa_is_node(entry)) {
+		struct xa_node *node = xa_to_node(entry);
+
+		if (xas->xa_shift > node->shift)
+			break;
+		entry = xas_descend(xas, node);
+	}
+	return entry;
+}
+EXPORT_SYMBOL_GPL(xas_load);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
@@ -42,3 +136,104 @@ void xa_init_flags(struct xarray *xa, gfp_t flags)
 	xa->xa_head = NULL;
 }
 EXPORT_SYMBOL(xa_init_flags);
+
+/**
+ * xa_load() - Load an entry from an XArray.
+ * @xa: XArray.
+ * @index: index into array.
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: The entry at @index in @xa.
+ */
+void *xa_load(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+
+	rcu_read_lock();
+	do {
+		entry = xas_load(&xas);
+	} while (xas_retry(&xas, entry));
+	rcu_read_unlock();
+
+	return entry;
+}
+EXPORT_SYMBOL(xa_load);
+
+#ifdef XA_DEBUG
+void xa_dump_node(const struct xa_node *node)
+{
+	unsigned i, j;
+
+	if (!node)
+		return;
+	if ((unsigned long)node & 3) {
+		pr_cont("node %px\n", node);
+		return;
+	}
+
+	pr_cont("node %px %s %d parent %px shift %d count %d values %d "
+		"array %px list %px %px marks",
+		node, node->parent ? "offset" : "max", node->offset,
+		node->parent, node->shift, node->count, node->nr_values,
+		node->array, node->private_list.prev, node->private_list.next);
+	for (i = 0; i < XA_MAX_MARKS; i++)
+		for (j = 0; j < XA_MARK_LONGS; j++)
+			pr_cont(" %lx", node->marks[i][j]);
+	pr_cont("\n");
+}
+
+void xa_dump_index(unsigned long index, unsigned int shift)
+{
+	if (!shift)
+		pr_info("%lu: ", index);
+	else if (shift >= BITS_PER_LONG)
+		pr_info("0-%lu: ", ~0UL);
+	else
+		pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1));
+}
+
+void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
+{
+	if (!entry)
+		return;
+
+	xa_dump_index(index, shift);
+
+	if (xa_is_node(entry)) {
+		if (shift == 0) {
+			pr_cont("%px\n", entry);
+		} else {
+			unsigned long i;
+			struct xa_node *node = xa_to_node(entry);
+			xa_dump_node(node);
+			for (i = 0; i < XA_CHUNK_SIZE; i++)
+				xa_dump_entry(node->slots[i],
+				      index + (i << node->shift), node->shift);
+		}
+	} else if (xa_is_value(entry))
+		pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry),
+						xa_to_value(entry), entry);
+	else if (!xa_is_internal(entry))
+		pr_cont("%px\n", entry);
+	else if (xa_is_retry(entry))
+		pr_cont("retry (%ld)\n", xa_to_internal(entry));
+	else if (xa_is_sibling(entry))
+		pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
+	else
+		pr_cont("UNKNOWN ENTRY (%px)\n", entry);
+}
+
+void xa_dump(const struct xarray *xa)
+{
+	void *entry = xa->xa_head;
+	unsigned int shift = 0;
+
+	pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
+			xa->xa_flags, radix_tree_tagged(xa, 0),
+			radix_tree_tagged(xa, 1), radix_tree_tagged(xa, 2));
+	if (xa_is_node(entry))
+		shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
+	xa_dump_entry(entry, 0, shift);
+}
+#endif
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index 0ad884452c5c..6935ef94e77a 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -70,6 +70,7 @@
 #define BUG_ON(cond) assert(!(cond))
 #endif
 #endif
+#define BUG()	BUG_ON(1)
 
 #if __BYTE_ORDER == __BIG_ENDIAN
 #define cpu_to_le16 bswap_16
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index d4706c0ffceb..3834899b6693 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -4,3 +4,4 @@ idr-test
 main
 multiorder
 radix-tree.c
+xarray
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index c0cf1c79efd5..1379f1d78d0b 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
 	  -fsanitize=undefined
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
-TARGETS = main idr-test multiorder
+TARGETS = main idr-test multiorder xarray
 CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
@@ -25,6 +25,8 @@ main:	$(OFILES)
 idr-test.o: ../../../lib/test_ida.c
 idr-test: idr-test.o $(CORE_OFILES)
 
+xarray: $(CORE_OFILES)
+
 multiorder: multiorder.o $(CORE_OFILES)
 
 clean:
@@ -45,7 +47,7 @@ radix-tree.c: ../../../lib/radix-tree.c
 idr.c: ../../../lib/idr.c
 	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
 
-xarray.o: ../../../lib/xarray.c
+xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c
 
 generated/map-shift.h:
 	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 426f32f28547..5d06ac75a14d 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -14,6 +14,7 @@
 #include "../../../include/linux/kconfig.h"
 
 #define printk printf
+#define pr_info printk
 #define pr_debug printk
 #define pr_cont printk
 
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
index 73ed33658203..fd280b070fdb 100644
--- a/tools/testing/radix-tree/linux/rcupdate.h
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -6,5 +6,7 @@
 
 #define rcu_dereference_raw(p) rcu_dereference(p)
 #define rcu_dereference_protected(p, cond) rcu_dereference(p)
+#define rcu_dereference_check(p, cond) rcu_dereference(p)
+#define RCU_INIT_POINTER(p, v)	(p) = (v)
 
 #endif
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index b741686e53d6..09deaf4f0959 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -365,6 +365,7 @@ int main(int argc, char **argv)
 	rcu_register_thread();
 	radix_tree_init();
 
+	xarray_tests();
 	regression1_test();
 	regression2_test();
 	regression3_test();
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 92d901eacf49..e3dc7a16f09b 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -34,6 +34,7 @@ int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
 			unsigned iftag, unsigned thentag);
 unsigned long find_item(struct radix_tree_root *, void *item);
 
+void xarray_tests(void);
 void tag_check(void);
 void multiorder_checks(void);
 void iteration_test(unsigned order, unsigned duration);
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
index 9bbd667172a7..e61e43efe463 100644
--- a/tools/testing/radix-tree/xarray.c
+++ b/tools/testing/radix-tree/xarray.c
@@ -4,4 +4,32 @@
  * Copyright (c) 2018 Matthew Wilcox <willy@infradead.org>
  */
 
+#define XA_DEBUG
+#include "test.h"
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack()	assert(0)
+
 #include "../../../lib/xarray.c"
+#undef XA_DEBUG
+#include "../../../lib/test_xarray.c"
+
+void xarray_tests(void)
+{
+	xarray_checks();
+	xarray_exit();
+}
+
+int __weak main(void)
+{
+	radix_tree_init();
+	xarray_tests();
+	radix_tree_cpu_dead(1);
+	rcu_barrier();
+	if (nr_allocated)
+		printf("nr_allocated = %d\n", nr_allocated);
+	return 0;
+}
-- 
cgit v1.2.3


From 9b89a0355144685a787b0dc5bcf7bdd6f2d02968 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 10 Nov 2017 09:34:31 -0500
Subject: xarray: Add XArray marks

XArray marks are like the radix tree tags, only slightly more strongly
typed.  They are renamed in order to distinguish them from tagged
pointers.  This commit adds the basic get/set/clear operations.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h                        |  63 +++++++
 lib/test_xarray.c                             |  34 ++++
 lib/xarray.c                                  | 232 +++++++++++++++++++++++++-
 tools/include/asm-generic/bitops.h            |   1 +
 tools/include/asm-generic/bitops/atomic.h     |   9 -
 tools/include/asm-generic/bitops/non-atomic.h | 109 ++++++++++++
 tools/include/linux/spinlock.h                |  10 +-
 7 files changed, 445 insertions(+), 13 deletions(-)
 create mode 100644 tools/include/asm-generic/bitops/non-atomic.h

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index a0df8217068c..2de504ae9ba4 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -11,6 +11,7 @@
 
 #include <linux/bug.h>
 #include <linux/compiler.h>
+#include <linux/gfp.h>
 #include <linux/kconfig.h>
 #include <linux/kernel.h>
 #include <linux/rcupdate.h>
@@ -197,6 +198,20 @@ static inline int xa_err(void *entry)
 	return 0;
 }
 
+typedef unsigned __bitwise xa_mark_t;
+#define XA_MARK_0		((__force xa_mark_t)0U)
+#define XA_MARK_1		((__force xa_mark_t)1U)
+#define XA_MARK_2		((__force xa_mark_t)2U)
+#define XA_PRESENT		((__force xa_mark_t)8U)
+#define XA_MARK_MAX		XA_MARK_2
+
+/*
+ * Values for xa_flags.  The radix tree stores its GFP flags in the xa_flags,
+ * and we remain compatible with that.
+ */
+#define XA_FLAGS_MARK(mark)	((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
+						(__force unsigned)(mark)))
+
 /**
  * struct xarray - The anchor of the XArray.
  * @xa_lock: Lock that protects the contents of the XArray.
@@ -252,6 +267,9 @@ struct xarray {
 
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
+bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
+void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
+void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
 /**
  * xa_init() - Initialise an empty XArray.
@@ -278,6 +296,19 @@ static inline bool xa_empty(const struct xarray *xa)
 	return xa->xa_head == NULL;
 }
 
+/**
+ * xa_marked() - Inquire whether any entry in this array has a mark set
+ * @xa: Array
+ * @mark: Mark value
+ *
+ * Context: Any context.
+ * Return: %true if any entry has this mark set.
+ */
+static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
+{
+	return xa->xa_flags & XA_FLAGS_MARK(mark);
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -290,6 +321,12 @@ static inline bool xa_empty(const struct xarray *xa)
 #define xa_unlock_irqrestore(xa, flags) \
 				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
 
+/*
+ * Versions of the normal API which require the caller to hold the xa_lock.
+ */
+void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
+void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
@@ -388,6 +425,22 @@ static inline void *xa_entry_locked(const struct xarray *xa,
 						lockdep_is_held(&xa->xa_lock));
 }
 
+/* Private */
+static inline struct xa_node *xa_parent(const struct xarray *xa,
+					const struct xa_node *node)
+{
+	return rcu_dereference_check(node->parent,
+						lockdep_is_held(&xa->xa_lock));
+}
+
+/* Private */
+static inline struct xa_node *xa_parent_locked(const struct xarray *xa,
+					const struct xa_node *node)
+{
+	return rcu_dereference_protected(node->parent,
+						lockdep_is_held(&xa->xa_lock));
+}
+
 /* Private */
 static inline struct xa_node *xa_to_node(const void *entry)
 {
@@ -588,6 +641,12 @@ static inline bool xas_valid(const struct xa_state *xas)
 	return !xas_invalid(xas);
 }
 
+/* True if the pointer is something other than a node */
+static inline bool xas_not_node(struct xa_node *node)
+{
+	return ((unsigned long)node & 3) || !node;
+}
+
 /**
  * xas_reset() - Reset an XArray operation state.
  * @xas: XArray operation state.
@@ -625,6 +684,10 @@ static inline bool xas_retry(struct xa_state *xas, const void *entry)
 
 void *xas_load(struct xa_state *);
 
+bool xas_get_mark(const struct xa_state *, xa_mark_t);
+void xas_set_mark(const struct xa_state *, xa_mark_t);
+void xas_clear_mark(const struct xa_state *, xa_mark_t);
+
 /**
  * xas_reload() - Refetch an entry from the xarray.
  * @xas: XArray operation state.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index a7248b87617f..f8b0e9ba80a4 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -67,11 +67,45 @@ static noinline void check_xa_load(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
+{
+	/* NULL elements have no marks set */
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+	xa_set_mark(xa, index, XA_MARK_0);
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+
+	/* Storing a pointer will not make a mark appear */
+	XA_BUG_ON(xa, xa_store_index(xa, index, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+	xa_set_mark(xa, index, XA_MARK_0);
+	XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
+
+	/* Setting one mark will not set another mark */
+	XA_BUG_ON(xa, xa_get_mark(xa, index + 1, XA_MARK_0));
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1));
+
+	/* Storing NULL clears marks, and they can't be set again */
+	xa_erase_index(xa, index);
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+	xa_set_mark(xa, index, XA_MARK_0);
+	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+}
+
+static noinline void check_xa_mark(struct xarray *xa)
+{
+	unsigned long index;
+
+	for (index = 0; index < 16384; index += 4)
+		check_xa_mark_1(xa, index);
+}
+
 static RADIX_TREE(array, GFP_KERNEL);
 
 static int xarray_checks(void)
 {
 	check_xa_load(&array);
+	check_xa_mark(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 19cfcbc69a68..aa86c47e532f 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -5,6 +5,7 @@
  * Author: Matthew Wilcox <willy@infradead.org>
  */
 
+#include <linux/bitmap.h>
 #include <linux/export.h>
 #include <linux/xarray.h>
 
@@ -24,6 +25,48 @@
  * @entry refers to something stored in a slot in the xarray
  */
 
+static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
+{
+	if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
+		xa->xa_flags |= XA_FLAGS_MARK(mark);
+}
+
+static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark)
+{
+	if (xa->xa_flags & XA_FLAGS_MARK(mark))
+		xa->xa_flags &= ~(XA_FLAGS_MARK(mark));
+}
+
+static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark)
+{
+	return node->marks[(__force unsigned)mark];
+}
+
+static inline bool node_get_mark(struct xa_node *node,
+		unsigned int offset, xa_mark_t mark)
+{
+	return test_bit(offset, node_marks(node, mark));
+}
+
+/* returns true if the bit was set */
+static inline bool node_set_mark(struct xa_node *node, unsigned int offset,
+				xa_mark_t mark)
+{
+	return __test_and_set_bit(offset, node_marks(node, mark));
+}
+
+/* returns true if the bit was set */
+static inline bool node_clear_mark(struct xa_node *node, unsigned int offset,
+				xa_mark_t mark)
+{
+	return __test_and_clear_bit(offset, node_marks(node, mark));
+}
+
+static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
+{
+	return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
+}
+
 /* extracts the offset within this node from the index */
 static unsigned int get_offset(unsigned long index, struct xa_node *node)
 {
@@ -118,6 +161,85 @@ void *xas_load(struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_load);
 
+/**
+ * xas_get_mark() - Returns the state of this mark.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Return: true if the mark is set, false if the mark is clear or @xas
+ * is in an error state.
+ */
+bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+	if (xas_invalid(xas))
+		return false;
+	if (!xas->xa_node)
+		return xa_marked(xas->xa, mark);
+	return node_get_mark(xas->xa_node, xas->xa_offset, mark);
+}
+EXPORT_SYMBOL_GPL(xas_get_mark);
+
+/**
+ * xas_set_mark() - Sets the mark on this entry and its parents.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Sets the specified mark on this entry, and walks up the tree setting it
+ * on all the ancestor entries.  Does nothing if @xas has not been walked to
+ * an entry, or is in an error state.
+ */
+void xas_set_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+	struct xa_node *node = xas->xa_node;
+	unsigned int offset = xas->xa_offset;
+
+	if (xas_invalid(xas))
+		return;
+
+	while (node) {
+		if (node_set_mark(node, offset, mark))
+			return;
+		offset = node->offset;
+		node = xa_parent_locked(xas->xa, node);
+	}
+
+	if (!xa_marked(xas->xa, mark))
+		xa_mark_set(xas->xa, mark);
+}
+EXPORT_SYMBOL_GPL(xas_set_mark);
+
+/**
+ * xas_clear_mark() - Clears the mark on this entry and its parents.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Clears the specified mark on this entry, and walks back to the head
+ * attempting to clear it on all the ancestor entries.  Does nothing if
+ * @xas has not been walked to an entry, or is in an error state.
+ */
+void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+	struct xa_node *node = xas->xa_node;
+	unsigned int offset = xas->xa_offset;
+
+	if (xas_invalid(xas))
+		return;
+
+	while (node) {
+		if (!node_clear_mark(node, offset, mark))
+			return;
+		if (node_any_mark(node, mark))
+			return;
+
+		offset = node->offset;
+		node = xa_parent_locked(xas->xa, node);
+	}
+
+	if (xa_marked(xas->xa, mark))
+		xa_mark_clear(xas->xa, mark);
+}
+EXPORT_SYMBOL_GPL(xas_clear_mark);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
@@ -160,6 +282,112 @@ void *xa_load(struct xarray *xa, unsigned long index)
 }
 EXPORT_SYMBOL(xa_load);
 
+/**
+ * __xa_set_mark() - Set this mark on this entry while locked.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Attempting to set a mark on a NULL entry does not succeed.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.
+ */
+void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	XA_STATE(xas, xa, index);
+	void *entry = xas_load(&xas);
+
+	if (entry)
+		xas_set_mark(&xas, mark);
+}
+EXPORT_SYMBOL_GPL(__xa_set_mark);
+
+/**
+ * __xa_clear_mark() - Clear this mark on this entry while locked.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.
+ */
+void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	XA_STATE(xas, xa, index);
+	void *entry = xas_load(&xas);
+
+	if (entry)
+		xas_clear_mark(&xas, mark);
+}
+EXPORT_SYMBOL_GPL(__xa_clear_mark);
+
+/**
+ * xa_get_mark() - Inquire whether this mark is set on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * This function uses the RCU read lock, so the result may be out of date
+ * by the time it returns.  If you need the result to be stable, use a lock.
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: True if the entry at @index has this mark set, false if it doesn't.
+ */
+bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+
+	rcu_read_lock();
+	entry = xas_start(&xas);
+	while (xas_get_mark(&xas, mark)) {
+		if (!xa_is_node(entry))
+			goto found;
+		entry = xas_descend(&xas, xa_to_node(entry));
+	}
+	rcu_read_unlock();
+	return false;
+ found:
+	rcu_read_unlock();
+	return true;
+}
+EXPORT_SYMBOL(xa_get_mark);
+
+/**
+ * xa_set_mark() - Set this mark on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Attempting to set a mark on a NULL entry does not succeed.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ */
+void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	xa_lock(xa);
+	__xa_set_mark(xa, index, mark);
+	xa_unlock(xa);
+}
+EXPORT_SYMBOL(xa_set_mark);
+
+/**
+ * xa_clear_mark() - Clear this mark on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Clearing a mark always succeeds.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ */
+void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+	xa_lock(xa);
+	__xa_clear_mark(xa, index, mark);
+	xa_unlock(xa);
+}
+EXPORT_SYMBOL(xa_clear_mark);
+
 #ifdef XA_DEBUG
 void xa_dump_node(const struct xa_node *node)
 {
@@ -230,8 +458,8 @@ void xa_dump(const struct xarray *xa)
 	unsigned int shift = 0;
 
 	pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
-			xa->xa_flags, radix_tree_tagged(xa, 0),
-			radix_tree_tagged(xa, 1), radix_tree_tagged(xa, 2));
+			xa->xa_flags, xa_marked(xa, XA_MARK_0),
+			xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2));
 	if (xa_is_node(entry))
 		shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
 	xa_dump_entry(entry, 0, shift);
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
index 9bce3b56b5e7..5d2ab38965cc 100644
--- a/tools/include/asm-generic/bitops.h
+++ b/tools/include/asm-generic/bitops.h
@@ -27,5 +27,6 @@
 #include <asm-generic/bitops/hweight.h>
 
 #include <asm-generic/bitops/atomic.h>
+#include <asm-generic/bitops/non-atomic.h>
 
 #endif /* __TOOLS_ASM_GENERIC_BITOPS_H */
diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index 21c41ccd1266..2f6ea28764a7 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -15,13 +15,4 @@ static inline void clear_bit(int nr, unsigned long *addr)
 	addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
 }
 
-static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
-{
-	return ((1UL << (nr % __BITS_PER_LONG)) &
-		(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
-}
-
-#define __set_bit(nr, addr)	set_bit(nr, addr)
-#define __clear_bit(nr, addr)	clear_bit(nr, addr)
-
 #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
diff --git a/tools/include/asm-generic/bitops/non-atomic.h b/tools/include/asm-generic/bitops/non-atomic.h
new file mode 100644
index 000000000000..7e10c4b50c5d
--- /dev/null
+++ b/tools/include/asm-generic/bitops/non-atomic.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+
+#include <asm/types.h>
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p  |= mask;
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p &= ~mask;
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p ^= mask;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old | mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail.  You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old & ~mask;
+	return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr,
+					    volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+	unsigned long old = *p;
+
+	*p = old ^ mask;
+	return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 1738c0391da4..622266b197d0 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -8,8 +8,14 @@
 #define spinlock_t		pthread_mutex_t
 #define DEFINE_SPINLOCK(x)	pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
 #define __SPIN_LOCK_UNLOCKED(x)	(pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
-#define spin_lock_init(x)      pthread_mutex_init(x, NULL)
-
+#define spin_lock_init(x)	pthread_mutex_init(x, NULL)
+
+#define spin_lock(x)			pthread_mutex_lock(x)
+#define spin_unlock(x)			pthread_mutex_unlock(x)
+#define spin_lock_bh(x)			pthread_mutex_lock(x)
+#define spin_unlock_bh(x)		pthread_mutex_unlock(x)
+#define spin_lock_irq(x)		pthread_mutex_lock(x)
+#define spin_unlock_irq(x)		pthread_mutex_unlock(x)
 #define spin_lock_irqsave(x, f)		(void)f, pthread_mutex_lock(x)
 #define spin_unlock_irqrestore(x, f)	(void)f, pthread_mutex_unlock(x)
 
-- 
cgit v1.2.3


From 58d6ea3085f2e53714810a513c61629f6d2be0a6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 10 Nov 2017 15:15:08 -0500
Subject: xarray: Add XArray unconditional store operations

xa_store() differs from radix_tree_insert() in that it will overwrite an
existing element in the array rather than returning an error.  This is
the behaviour which most users want, and those that want more complex
behaviour generally want to use the xas family of routines anyway.

For memory allocation, xa_store() will first attempt to request memory
from the slab allocator; if memory is not immediately available, it will
drop the xa_lock and allocate memory, keeping a pointer in the xa_state.
It does not use the per-CPU cache, although those will continue to exist
until all radix tree users are converted to the xarray.

This patch also includes xa_erase() and __xa_erase() for a streamlined
way to store NULL.  Since there is no need to allocate memory in order
to store a NULL in the XArray, we do not need to trouble the user with
deciding what memory allocation flags to use.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h                   | 147 ++++++-
 lib/radix-tree.c                         |   4 +-
 lib/test_xarray.c                        | 177 +++++++-
 lib/xarray.c                             | 693 +++++++++++++++++++++++++++++++
 tools/include/linux/bitmap.h             |   1 +
 tools/include/linux/spinlock.h           |   2 +
 tools/testing/radix-tree/Makefile        |   2 +-
 tools/testing/radix-tree/bitmap.c        |  23 +
 tools/testing/radix-tree/linux/kernel.h  |   4 +
 tools/testing/radix-tree/linux/lockdep.h |  11 +
 10 files changed, 1055 insertions(+), 9 deletions(-)
 create mode 100644 tools/testing/radix-tree/bitmap.c
 create mode 100644 tools/testing/radix-tree/linux/lockdep.h

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 2de504ae9ba4..15eab63286ed 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -205,10 +205,17 @@ typedef unsigned __bitwise xa_mark_t;
 #define XA_PRESENT		((__force xa_mark_t)8U)
 #define XA_MARK_MAX		XA_MARK_2
 
+enum xa_lock_type {
+	XA_LOCK_IRQ = 1,
+	XA_LOCK_BH = 2,
+};
+
 /*
  * Values for xa_flags.  The radix tree stores its GFP flags in the xa_flags,
  * and we remain compatible with that.
  */
+#define XA_FLAGS_LOCK_IRQ	((__force gfp_t)XA_LOCK_IRQ)
+#define XA_FLAGS_LOCK_BH	((__force gfp_t)XA_LOCK_BH)
 #define XA_FLAGS_MARK(mark)	((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
 						(__force unsigned)(mark)))
 
@@ -267,6 +274,7 @@ struct xarray {
 
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
+void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -309,6 +317,23 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
 	return xa->xa_flags & XA_FLAGS_MARK(mark);
 }
 
+/**
+ * xa_erase() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * This function is the equivalent of calling xa_store() with %NULL as
+ * the third argument.  The XArray does not need to allocate memory, so
+ * the user does not need to provide GFP flags.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_erase(struct xarray *xa, unsigned long index)
+{
+	return xa_store(xa, index, NULL, 0);
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -322,11 +347,65 @@ static inline bool xa_marked(const struct xarray *xa, xa_mark_t mark)
 				spin_unlock_irqrestore(&(xa)->xa_lock, flags)
 
 /*
- * Versions of the normal API which require the caller to hold the xa_lock.
- */
+ * Versions of the normal API which require the caller to hold the
+ * xa_lock.  If the GFP flags allow it, they will drop the lock to
+ * allocate memory, then reacquire it afterwards.  These functions
+ * may also re-enable interrupts if the XArray flags indicate the
+ * locking should be interrupt safe.
+ */
+void *__xa_erase(struct xarray *, unsigned long index);
+void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
+/**
+ * xa_erase_bh() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * This function is the equivalent of calling xa_store() with %NULL as
+ * the third argument.  The XArray does not need to allocate memory, so
+ * the user does not need to provide GFP flags.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling softirqs.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_erase_bh(struct xarray *xa, unsigned long index)
+{
+	void *entry;
+
+	xa_lock_bh(xa);
+	entry = __xa_erase(xa, index);
+	xa_unlock_bh(xa);
+
+	return entry;
+}
+
+/**
+ * xa_erase_irq() - Erase this entry from the XArray.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * This function is the equivalent of calling xa_store() with %NULL as
+ * the third argument.  The XArray does not need to allocate memory, so
+ * the user does not need to provide GFP flags.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.
+ * Return: The entry which used to be at this index.
+ */
+static inline void *xa_erase_irq(struct xarray *xa, unsigned long index)
+{
+	void *entry;
+
+	xa_lock_irq(xa);
+	entry = __xa_erase(xa, index);
+	xa_unlock_irq(xa);
+
+	return entry;
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
@@ -441,6 +520,12 @@ static inline struct xa_node *xa_parent_locked(const struct xarray *xa,
 						lockdep_is_held(&xa->xa_lock));
 }
 
+/* Private */
+static inline void *xa_mk_node(const struct xa_node *node)
+{
+	return (void *)((unsigned long)node | 2);
+}
+
 /* Private */
 static inline struct xa_node *xa_to_node(const void *entry)
 {
@@ -647,6 +732,12 @@ static inline bool xas_not_node(struct xa_node *node)
 	return ((unsigned long)node & 3) || !node;
 }
 
+/* True if the node represents head-of-tree, RESTART or BOUNDS */
+static inline bool xas_top(struct xa_node *node)
+{
+	return node <= XAS_RESTART;
+}
+
 /**
  * xas_reset() - Reset an XArray operation state.
  * @xas: XArray operation state.
@@ -683,10 +774,14 @@ static inline bool xas_retry(struct xa_state *xas, const void *entry)
 }
 
 void *xas_load(struct xa_state *);
+void *xas_store(struct xa_state *, void *entry);
 
 bool xas_get_mark(const struct xa_state *, xa_mark_t);
 void xas_set_mark(const struct xa_state *, xa_mark_t);
 void xas_clear_mark(const struct xa_state *, xa_mark_t);
+void xas_init_marks(const struct xa_state *);
+
+bool xas_nomem(struct xa_state *, gfp_t);
 
 /**
  * xas_reload() - Refetch an entry from the xarray.
@@ -711,4 +806,52 @@ static inline void *xas_reload(struct xa_state *xas)
 	return xa_head(xas->xa);
 }
 
+/**
+ * xas_set() - Set up XArray operation state for a different index.
+ * @xas: XArray operation state.
+ * @index: New index into the XArray.
+ *
+ * Move the operation state to refer to a different index.  This will
+ * have the effect of starting a walk from the top; see xas_next()
+ * to move to an adjacent index.
+ */
+static inline void xas_set(struct xa_state *xas, unsigned long index)
+{
+	xas->xa_index = index;
+	xas->xa_node = XAS_RESTART;
+}
+
+/**
+ * xas_set_order() - Set up XArray operation state for a multislot entry.
+ * @xas: XArray operation state.
+ * @index: Target of the operation.
+ * @order: Entry occupies 2^@order indices.
+ */
+static inline void xas_set_order(struct xa_state *xas, unsigned long index,
+					unsigned int order)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	xas->xa_index = order < BITS_PER_LONG ? (index >> order) << order : 0;
+	xas->xa_shift = order - (order % XA_CHUNK_SHIFT);
+	xas->xa_sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+	xas->xa_node = XAS_RESTART;
+#else
+	BUG_ON(order > 0);
+	xas_set(xas, index);
+#endif
+}
+
+/**
+ * xas_set_update() - Set up XArray operation state for a callback.
+ * @xas: XArray operation state.
+ * @update: Function to call when updating a node.
+ *
+ * The XArray can notify a caller after it has updated an xa_node.
+ * This is advanced functionality and is only needed by the page cache.
+ */
+static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update)
+{
+	xas->xa_update = update;
+}
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index b8e961428484..3479d93c32b9 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -47,7 +47,7 @@ static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
 /*
  * Radix tree node cache.
  */
-static struct kmem_cache *radix_tree_node_cachep;
+struct kmem_cache *radix_tree_node_cachep;
 
 /*
  * The radix tree is variable-height, so an insert operation not only has
@@ -365,7 +365,7 @@ out:
 	return ret;
 }
 
-static void radix_tree_node_rcu_free(struct rcu_head *head)
+void radix_tree_node_rcu_free(struct rcu_head *head)
 {
 	struct radix_tree_node *node =
 			container_of(head, struct radix_tree_node, rcu_head);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index f8b0e9ba80a4..b711030fbc32 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -30,13 +30,49 @@ void xa_dump(const struct xarray *xa) { }
 
 static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 {
-	radix_tree_insert(xa, index, xa_mk_value(index));
-	return NULL;
+	return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp);
 }
 
 static void xa_erase_index(struct xarray *xa, unsigned long index)
 {
-	radix_tree_delete(xa, index);
+	XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX));
+	XA_BUG_ON(xa, xa_load(xa, index) != NULL);
+}
+
+/*
+ * If anyone needs this, please move it to xarray.c.  We have no current
+ * users outside the test suite because all current multislot users want
+ * to use the advanced API.
+ */
+static void *xa_store_order(struct xarray *xa, unsigned long index,
+		unsigned order, void *entry, gfp_t gfp)
+{
+	XA_STATE_ORDER(xas, xa, index, order);
+	void *curr;
+
+	do {
+		xas_lock(&xas);
+		curr = xas_store(&xas, entry);
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, gfp));
+
+	return curr;
+}
+
+static noinline void check_xa_err(struct xarray *xa)
+{
+	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0);
+	XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0);
+#ifndef __KERNEL__
+	/* The kernel does not fail GFP_NOWAIT allocations */
+	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
+	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
+#endif
+	XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_KERNEL)) != 0);
+	XA_BUG_ON(xa, xa_err(xa_store(xa, 1, xa_mk_value(0), GFP_KERNEL)) != 0);
+	XA_BUG_ON(xa, xa_err(xa_erase(xa, 1)) != 0);
+// kills the test-suite :-(
+//	XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
 }
 
 static noinline void check_xa_load(struct xarray *xa)
@@ -69,6 +105,9 @@ static noinline void check_xa_load(struct xarray *xa)
 
 static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 {
+	unsigned int order;
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1;
+
 	/* NULL elements have no marks set */
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
 	xa_set_mark(xa, index, XA_MARK_0);
@@ -90,6 +129,37 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
 	xa_set_mark(xa, index, XA_MARK_0);
 	XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+
+	/*
+	 * Storing a multi-index entry over entries with marks gives the
+	 * entire entry the union of the marks
+	 */
+	BUG_ON((index % 4) != 0);
+	for (order = 2; order < max_order; order++) {
+		unsigned long base = round_down(index, 1UL << order);
+		unsigned long next = base + (1UL << order);
+		unsigned long i;
+
+		XA_BUG_ON(xa, xa_store_index(xa, index + 1, GFP_KERNEL));
+		xa_set_mark(xa, index + 1, XA_MARK_0);
+		XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL));
+		xa_set_mark(xa, index + 2, XA_MARK_1);
+		XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL));
+		xa_store_order(xa, index, order, xa_mk_value(index),
+				GFP_KERNEL);
+		for (i = base; i < next; i++) {
+			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
+			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1));
+			XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2));
+		}
+		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0));
+		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1));
+		XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2));
+		xa_erase_index(xa, index);
+		xa_erase_index(xa, next);
+		XA_BUG_ON(xa, !xa_empty(xa));
+	}
+	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
 static noinline void check_xa_mark(struct xarray *xa)
@@ -100,12 +170,111 @@ static noinline void check_xa_mark(struct xarray *xa)
 		check_xa_mark_1(xa, index);
 }
 
-static RADIX_TREE(array, GFP_KERNEL);
+static noinline void check_xa_shrink(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 1);
+	struct xa_node *node;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_store_index(xa, 0, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+
+	/*
+	 * Check that erasing the entry at 1 shrinks the tree and properly
+	 * marks the node as being deleted.
+	 */
+	xas_lock(&xas);
+	XA_BUG_ON(xa, xas_load(&xas) != xa_mk_value(1));
+	node = xas.xa_node;
+	XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != xa_mk_value(0));
+	XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 1) != NULL);
+	XA_BUG_ON(xa, xas.xa_node != XAS_BOUNDS);
+	XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != XA_RETRY_ENTRY);
+	XA_BUG_ON(xa, xas_load(&xas) != NULL);
+	xas_unlock(&xas);
+	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+	xa_erase_index(xa, 0);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_multi_store(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	unsigned long i, j, k;
+	unsigned int max_order = (sizeof(long) == 4) ? 30 : 60;
+
+	/* Loading from any position returns the same value */
+	xa_store_order(xa, 0, 1, xa_mk_value(0), GFP_KERNEL);
+	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+	XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
+	XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
+	rcu_read_lock();
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 2);
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
+	rcu_read_unlock();
+
+	/* Storing adjacent to the value does not alter the value */
+	xa_store(xa, 3, xa, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+	XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
+	XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
+	rcu_read_lock();
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 3);
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
+	rcu_read_unlock();
+
+	/* Overwriting multiple indexes works */
+	xa_store_order(xa, 0, 2, xa_mk_value(1), GFP_KERNEL);
+	XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 2) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 3) != xa_mk_value(1));
+	XA_BUG_ON(xa, xa_load(xa, 4) != NULL);
+	rcu_read_lock();
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 4);
+	XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 4);
+	rcu_read_unlock();
+
+	/* We can erase multiple values with a single store */
+	xa_store_order(xa, 0, 63, NULL, GFP_KERNEL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Even when the first slot is empty but the others aren't */
+	xa_store_index(xa, 1, GFP_KERNEL);
+	xa_store_index(xa, 2, GFP_KERNEL);
+	xa_store_order(xa, 0, 2, NULL, GFP_KERNEL);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (i = 0; i < max_order; i++) {
+		for (j = 0; j < max_order; j++) {
+			xa_store_order(xa, 0, i, xa_mk_value(i), GFP_KERNEL);
+			xa_store_order(xa, 0, j, xa_mk_value(j), GFP_KERNEL);
+
+			for (k = 0; k < max_order; k++) {
+				void *entry = xa_load(xa, (1UL << k) - 1);
+				if ((i < k) && (j < k))
+					XA_BUG_ON(xa, entry != NULL);
+				else
+					XA_BUG_ON(xa, entry != xa_mk_value(j));
+			}
+
+			xa_erase(xa, 0);
+			XA_BUG_ON(xa, !xa_empty(xa));
+		}
+	}
+#endif
+}
+
+static DEFINE_XARRAY(array);
 
 static int xarray_checks(void)
 {
+	check_xa_err(&array);
 	check_xa_load(&array);
 	check_xa_mark(&array);
+	check_xa_shrink(&array);
+	check_multi_store(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index aa86c47e532f..4596a95ed9cd 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -7,6 +7,8 @@
 
 #include <linux/bitmap.h>
 #include <linux/export.h>
+#include <linux/list.h>
+#include <linux/slab.h>
 #include <linux/xarray.h>
 
 /*
@@ -25,6 +27,31 @@
  * @entry refers to something stored in a slot in the xarray
  */
 
+static inline unsigned int xa_lock_type(const struct xarray *xa)
+{
+	return (__force unsigned int)xa->xa_flags & 3;
+}
+
+static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type)
+{
+	if (lock_type == XA_LOCK_IRQ)
+		xas_lock_irq(xas);
+	else if (lock_type == XA_LOCK_BH)
+		xas_lock_bh(xas);
+	else
+		xas_lock(xas);
+}
+
+static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
+{
+	if (lock_type == XA_LOCK_IRQ)
+		xas_unlock_irq(xas);
+	else if (lock_type == XA_LOCK_BH)
+		xas_unlock_bh(xas);
+	else
+		xas_unlock(xas);
+}
+
 static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
 {
 	if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
@@ -67,6 +94,34 @@ static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
 	return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
 }
 
+#define mark_inc(mark) do { \
+	mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
+} while (0)
+
+/*
+ * xas_squash_marks() - Merge all marks to the first entry
+ * @xas: Array operation state.
+ *
+ * Set a mark on the first entry if any entry has it set.  Clear marks on
+ * all sibling entries.
+ */
+static void xas_squash_marks(const struct xa_state *xas)
+{
+	unsigned int mark = 0;
+	unsigned int limit = xas->xa_offset + xas->xa_sibs + 1;
+
+	if (!xas->xa_sibs)
+		return;
+
+	do {
+		unsigned long *marks = xas->xa_node->marks[mark];
+		if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit)
+			continue;
+		__set_bit(xas->xa_offset, marks);
+		bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
+	} while (mark++ != (__force unsigned)XA_MARK_MAX);
+}
+
 /* extracts the offset within this node from the index */
 static unsigned int get_offset(unsigned long index, struct xa_node *node)
 {
@@ -161,6 +216,516 @@ void *xas_load(struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_load);
 
+/* Move the radix tree node cache here */
+extern struct kmem_cache *radix_tree_node_cachep;
+extern void radix_tree_node_rcu_free(struct rcu_head *head);
+
+#define XA_RCU_FREE	((struct xarray *)1)
+
+static void xa_node_free(struct xa_node *node)
+{
+	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+	node->array = XA_RCU_FREE;
+	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
+}
+
+/*
+ * xas_destroy() - Free any resources allocated during the XArray operation.
+ * @xas: XArray operation state.
+ *
+ * This function is now internal-only.
+ */
+static void xas_destroy(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_alloc;
+
+	if (!node)
+		return;
+	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+	kmem_cache_free(radix_tree_node_cachep, node);
+	xas->xa_alloc = NULL;
+}
+
+/**
+ * xas_nomem() - Allocate memory if needed.
+ * @xas: XArray operation state.
+ * @gfp: Memory allocation flags.
+ *
+ * If we need to add new nodes to the XArray, we try to allocate memory
+ * with GFP_NOWAIT while holding the lock, which will usually succeed.
+ * If it fails, @xas is flagged as needing memory to continue.  The caller
+ * should drop the lock and call xas_nomem().  If xas_nomem() succeeds,
+ * the caller should retry the operation.
+ *
+ * Forward progress is guaranteed as one node is allocated here and
+ * stored in the xa_state where it will be found by xas_alloc().  More
+ * nodes will likely be found in the slab allocator, but we do not tie
+ * them up here.
+ *
+ * Return: true if memory was needed, and was successfully allocated.
+ */
+bool xas_nomem(struct xa_state *xas, gfp_t gfp)
+{
+	if (xas->xa_node != XA_ERROR(-ENOMEM)) {
+		xas_destroy(xas);
+		return false;
+	}
+	xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+	if (!xas->xa_alloc)
+		return false;
+	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+	xas->xa_node = XAS_RESTART;
+	return true;
+}
+EXPORT_SYMBOL_GPL(xas_nomem);
+
+/*
+ * __xas_nomem() - Drop locks and allocate memory if needed.
+ * @xas: XArray operation state.
+ * @gfp: Memory allocation flags.
+ *
+ * Internal variant of xas_nomem().
+ *
+ * Return: true if memory was needed, and was successfully allocated.
+ */
+static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
+	__must_hold(xas->xa->xa_lock)
+{
+	unsigned int lock_type = xa_lock_type(xas->xa);
+
+	if (xas->xa_node != XA_ERROR(-ENOMEM)) {
+		xas_destroy(xas);
+		return false;
+	}
+	if (gfpflags_allow_blocking(gfp)) {
+		xas_unlock_type(xas, lock_type);
+		xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+		xas_lock_type(xas, lock_type);
+	} else {
+		xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+	}
+	if (!xas->xa_alloc)
+		return false;
+	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+	xas->xa_node = XAS_RESTART;
+	return true;
+}
+
+static void xas_update(struct xa_state *xas, struct xa_node *node)
+{
+	if (xas->xa_update)
+		xas->xa_update(node);
+	else
+		XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+}
+
+static void *xas_alloc(struct xa_state *xas, unsigned int shift)
+{
+	struct xa_node *parent = xas->xa_node;
+	struct xa_node *node = xas->xa_alloc;
+
+	if (xas_invalid(xas))
+		return NULL;
+
+	if (node) {
+		xas->xa_alloc = NULL;
+	} else {
+		node = kmem_cache_alloc(radix_tree_node_cachep,
+					GFP_NOWAIT | __GFP_NOWARN);
+		if (!node) {
+			xas_set_err(xas, -ENOMEM);
+			return NULL;
+		}
+	}
+
+	if (parent) {
+		node->offset = xas->xa_offset;
+		parent->count++;
+		XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE);
+		xas_update(xas, parent);
+	}
+	XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
+	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+	node->shift = shift;
+	node->count = 0;
+	node->nr_values = 0;
+	RCU_INIT_POINTER(node->parent, xas->xa_node);
+	node->array = xas->xa;
+
+	return node;
+}
+
+/*
+ * Use this to calculate the maximum index that will need to be created
+ * in order to add the entry described by @xas.  Because we cannot store a
+ * multiple-index entry at index 0, the calculation is a little more complex
+ * than you might expect.
+ */
+static unsigned long xas_max(struct xa_state *xas)
+{
+	unsigned long max = xas->xa_index;
+
+#ifdef CONFIG_XARRAY_MULTI
+	if (xas->xa_shift || xas->xa_sibs) {
+		unsigned long mask;
+		mask = (((xas->xa_sibs + 1UL) << xas->xa_shift) - 1);
+		max |= mask;
+		if (mask == max)
+			max++;
+	}
+#endif
+
+	return max;
+}
+
+/* The maximum index that can be contained in the array without expanding it */
+static unsigned long max_index(void *entry)
+{
+	if (!xa_is_node(entry))
+		return 0;
+	return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1;
+}
+
+static void xas_shrink(struct xa_state *xas)
+{
+	struct xarray *xa = xas->xa;
+	struct xa_node *node = xas->xa_node;
+
+	for (;;) {
+		void *entry;
+
+		XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+		if (node->count != 1)
+			break;
+		entry = xa_entry_locked(xa, node, 0);
+		if (!entry)
+			break;
+		if (!xa_is_node(entry) && node->shift)
+			break;
+		xas->xa_node = XAS_BOUNDS;
+
+		RCU_INIT_POINTER(xa->xa_head, entry);
+
+		node->count = 0;
+		node->nr_values = 0;
+		if (!xa_is_node(entry))
+			RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY);
+		xas_update(xas, node);
+		xa_node_free(node);
+		if (!xa_is_node(entry))
+			break;
+		node = xa_to_node(entry);
+		node->parent = NULL;
+	}
+}
+
+/*
+ * xas_delete_node() - Attempt to delete an xa_node
+ * @xas: Array operation state.
+ *
+ * Attempts to delete the @xas->xa_node.  This will fail if xa->node has
+ * a non-zero reference count.
+ */
+static void xas_delete_node(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	for (;;) {
+		struct xa_node *parent;
+
+		XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+		if (node->count)
+			break;
+
+		parent = xa_parent_locked(xas->xa, node);
+		xas->xa_node = parent;
+		xas->xa_offset = node->offset;
+		xa_node_free(node);
+
+		if (!parent) {
+			xas->xa->xa_head = NULL;
+			xas->xa_node = XAS_BOUNDS;
+			return;
+		}
+
+		parent->slots[xas->xa_offset] = NULL;
+		parent->count--;
+		XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE);
+		node = parent;
+		xas_update(xas, node);
+	}
+
+	if (!node->parent)
+		xas_shrink(xas);
+}
+
+/**
+ * xas_free_nodes() - Free this node and all nodes that it references
+ * @xas: Array operation state.
+ * @top: Node to free
+ *
+ * This node has been removed from the tree.  We must now free it and all
+ * of its subnodes.  There may be RCU walkers with references into the tree,
+ * so we must replace all entries with retry markers.
+ */
+static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
+{
+	unsigned int offset = 0;
+	struct xa_node *node = top;
+
+	for (;;) {
+		void *entry = xa_entry_locked(xas->xa, node, offset);
+
+		if (xa_is_node(entry)) {
+			node = xa_to_node(entry);
+			offset = 0;
+			continue;
+		}
+		if (entry)
+			RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY);
+		offset++;
+		while (offset == XA_CHUNK_SIZE) {
+			struct xa_node *parent;
+
+			parent = xa_parent_locked(xas->xa, node);
+			offset = node->offset + 1;
+			node->count = 0;
+			node->nr_values = 0;
+			xas_update(xas, node);
+			xa_node_free(node);
+			if (node == top)
+				return;
+			node = parent;
+		}
+	}
+}
+
+/*
+ * xas_expand adds nodes to the head of the tree until it has reached
+ * sufficient height to be able to contain @xas->xa_index
+ */
+static int xas_expand(struct xa_state *xas, void *head)
+{
+	struct xarray *xa = xas->xa;
+	struct xa_node *node = NULL;
+	unsigned int shift = 0;
+	unsigned long max = xas_max(xas);
+
+	if (!head) {
+		if (max == 0)
+			return 0;
+		while ((max >> shift) >= XA_CHUNK_SIZE)
+			shift += XA_CHUNK_SHIFT;
+		return shift + XA_CHUNK_SHIFT;
+	} else if (xa_is_node(head)) {
+		node = xa_to_node(head);
+		shift = node->shift + XA_CHUNK_SHIFT;
+	}
+	xas->xa_node = NULL;
+
+	while (max > max_index(head)) {
+		xa_mark_t mark = 0;
+
+		XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
+		node = xas_alloc(xas, shift);
+		if (!node)
+			return -ENOMEM;
+
+		node->count = 1;
+		if (xa_is_value(head))
+			node->nr_values = 1;
+		RCU_INIT_POINTER(node->slots[0], head);
+
+		/* Propagate the aggregated mark info to the new child */
+		for (;;) {
+			if (xa_marked(xa, mark))
+				node_set_mark(node, 0, mark);
+			if (mark == XA_MARK_MAX)
+				break;
+			mark_inc(mark);
+		}
+
+		/*
+		 * Now that the new node is fully initialised, we can add
+		 * it to the tree
+		 */
+		if (xa_is_node(head)) {
+			xa_to_node(head)->offset = 0;
+			rcu_assign_pointer(xa_to_node(head)->parent, node);
+		}
+		head = xa_mk_node(node);
+		rcu_assign_pointer(xa->xa_head, head);
+		xas_update(xas, node);
+
+		shift += XA_CHUNK_SHIFT;
+	}
+
+	xas->xa_node = node;
+	return shift;
+}
+
+/*
+ * xas_create() - Create a slot to store an entry in.
+ * @xas: XArray operation state.
+ *
+ * Most users will not need to call this function directly, as it is called
+ * by xas_store().  It is useful for doing conditional store operations
+ * (see the xa_cmpxchg() implementation for an example).
+ *
+ * Return: If the slot already existed, returns the contents of this slot.
+ * If the slot was newly created, returns NULL.  If it failed to create the
+ * slot, returns NULL and indicates the error in @xas.
+ */
+static void *xas_create(struct xa_state *xas)
+{
+	struct xarray *xa = xas->xa;
+	void *entry;
+	void __rcu **slot;
+	struct xa_node *node = xas->xa_node;
+	int shift;
+	unsigned int order = xas->xa_shift;
+
+	if (xas_top(node)) {
+		entry = xa_head_locked(xa);
+		xas->xa_node = NULL;
+		shift = xas_expand(xas, entry);
+		if (shift < 0)
+			return NULL;
+		entry = xa_head_locked(xa);
+		slot = &xa->xa_head;
+	} else if (xas_error(xas)) {
+		return NULL;
+	} else if (node) {
+		unsigned int offset = xas->xa_offset;
+
+		shift = node->shift;
+		entry = xa_entry_locked(xa, node, offset);
+		slot = &node->slots[offset];
+	} else {
+		shift = 0;
+		entry = xa_head_locked(xa);
+		slot = &xa->xa_head;
+	}
+
+	while (shift > order) {
+		shift -= XA_CHUNK_SHIFT;
+		if (!entry) {
+			node = xas_alloc(xas, shift);
+			if (!node)
+				break;
+			rcu_assign_pointer(*slot, xa_mk_node(node));
+		} else if (xa_is_node(entry)) {
+			node = xa_to_node(entry);
+		} else {
+			break;
+		}
+		entry = xas_descend(xas, node);
+		slot = &node->slots[xas->xa_offset];
+	}
+
+	return entry;
+}
+
+static void update_node(struct xa_state *xas, struct xa_node *node,
+		int count, int values)
+{
+	if (!node || (!count && !values))
+		return;
+
+	node->count += count;
+	node->nr_values += values;
+	XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+	XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE);
+	xas_update(xas, node);
+	if (count < 0)
+		xas_delete_node(xas);
+}
+
+/**
+ * xas_store() - Store this entry in the XArray.
+ * @xas: XArray operation state.
+ * @entry: New entry.
+ *
+ * If @xas is operating on a multi-index entry, the entry returned by this
+ * function is essentially meaningless (it may be an internal entry or it
+ * may be %NULL, even if there are non-NULL entries at some of the indices
+ * covered by the range).  This is not a problem for any current users,
+ * and can be changed if needed.
+ *
+ * Return: The old entry at this index.
+ */
+void *xas_store(struct xa_state *xas, void *entry)
+{
+	struct xa_node *node;
+	void __rcu **slot = &xas->xa->xa_head;
+	unsigned int offset, max;
+	int count = 0;
+	int values = 0;
+	void *first, *next;
+	bool value = xa_is_value(entry);
+
+	if (entry)
+		first = xas_create(xas);
+	else
+		first = xas_load(xas);
+
+	if (xas_invalid(xas))
+		return first;
+	node = xas->xa_node;
+	if (node && (xas->xa_shift < node->shift))
+		xas->xa_sibs = 0;
+	if ((first == entry) && !xas->xa_sibs)
+		return first;
+
+	next = first;
+	offset = xas->xa_offset;
+	max = xas->xa_offset + xas->xa_sibs;
+	if (node) {
+		slot = &node->slots[offset];
+		if (xas->xa_sibs)
+			xas_squash_marks(xas);
+	}
+	if (!entry)
+		xas_init_marks(xas);
+
+	for (;;) {
+		/*
+		 * Must clear the marks before setting the entry to NULL,
+		 * otherwise xas_for_each_marked may find a NULL entry and
+		 * stop early.  rcu_assign_pointer contains a release barrier
+		 * so the mark clearing will appear to happen before the
+		 * entry is set to NULL.
+		 */
+		rcu_assign_pointer(*slot, entry);
+		if (xa_is_node(next))
+			xas_free_nodes(xas, xa_to_node(next));
+		if (!node)
+			break;
+		count += !next - !entry;
+		values += !xa_is_value(first) - !value;
+		if (entry) {
+			if (offset == max)
+				break;
+			if (!xa_is_sibling(entry))
+				entry = xa_mk_sibling(xas->xa_offset);
+		} else {
+			if (offset == XA_CHUNK_MASK)
+				break;
+		}
+		next = xa_entry_locked(xas->xa, node, ++offset);
+		if (!xa_is_sibling(next)) {
+			if (!entry && (offset > max))
+				break;
+			first = next;
+		}
+		slot++;
+	}
+
+	update_node(xas, node, count, values);
+	return first;
+}
+EXPORT_SYMBOL_GPL(xas_store);
+
 /**
  * xas_get_mark() - Returns the state of this mark.
  * @xas: XArray operation state.
@@ -240,6 +805,30 @@ void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
 }
 EXPORT_SYMBOL_GPL(xas_clear_mark);
 
+/**
+ * xas_init_marks() - Initialise all marks for the entry
+ * @xas: Array operations state.
+ *
+ * Initialise all marks for the entry specified by @xas.  If we're tracking
+ * free entries with a mark, we need to set it on all entries.  All other
+ * marks are cleared.
+ *
+ * This implementation is not as efficient as it could be; we may walk
+ * up the tree multiple times.
+ */
+void xas_init_marks(const struct xa_state *xas)
+{
+	xa_mark_t mark = 0;
+
+	for (;;) {
+		xas_clear_mark(xas, mark);
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
+}
+EXPORT_SYMBOL_GPL(xas_init_marks);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
@@ -253,9 +842,19 @@ EXPORT_SYMBOL_GPL(xas_clear_mark);
  */
 void xa_init_flags(struct xarray *xa, gfp_t flags)
 {
+	unsigned int lock_type;
+	static struct lock_class_key xa_lock_irq;
+	static struct lock_class_key xa_lock_bh;
+
 	spin_lock_init(&xa->xa_lock);
 	xa->xa_flags = flags;
 	xa->xa_head = NULL;
+
+	lock_type = xa_lock_type(xa);
+	if (lock_type == XA_LOCK_IRQ)
+		lockdep_set_class(&xa->xa_lock, &xa_lock_irq);
+	else if (lock_type == XA_LOCK_BH)
+		lockdep_set_class(&xa->xa_lock, &xa_lock_bh);
 }
 EXPORT_SYMBOL(xa_init_flags);
 
@@ -282,6 +881,100 @@ void *xa_load(struct xarray *xa, unsigned long index)
 }
 EXPORT_SYMBOL(xa_load);
 
+static void *xas_result(struct xa_state *xas, void *curr)
+{
+	XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
+	if (xas_error(xas))
+		curr = xas->xa_node;
+	return curr;
+}
+
+/**
+ * __xa_erase() - Erase this entry from the XArray while locked.
+ * @xa: XArray.
+ * @index: Index into array.
+ *
+ * If the entry at this index is a multi-index entry then all indices will
+ * be erased, and the entry will no longer be a multi-index entry.
+ * This function expects the xa_lock to be held on entry.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index.
+ */
+void *__xa_erase(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	return xas_result(&xas, xas_store(&xas, NULL));
+}
+EXPORT_SYMBOL_GPL(__xa_erase);
+
+/**
+ * xa_store() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * After this function returns, loads from this index will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
+ * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
+ * failed.
+ */
+void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+
+	do {
+		xas_lock(&xas);
+		curr = xas_store(&xas, entry);
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(xa_store);
+
+/**
+ * __xa_store() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
+ */
+void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+
+	do {
+		curr = xas_store(&xas, entry);
+	} while (__xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(__xa_store);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index e63662db131b..05dca5c203f3 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -15,6 +15,7 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, int bits);
 int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 		 const unsigned long *bitmap2, unsigned int bits);
+void bitmap_clear(unsigned long *map, unsigned int start, int len);
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
 
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
index 622266b197d0..c934572d935c 100644
--- a/tools/include/linux/spinlock.h
+++ b/tools/include/linux/spinlock.h
@@ -37,4 +37,6 @@ static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
 	return true;
 }
 
+#include <linux/lockdep.h>
+
 #endif
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 1379f1d78d0b..acf1afa01c5b 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -5,7 +5,7 @@ CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 LDLIBS+= -lpthread -lurcu
 TARGETS = main idr-test multiorder xarray
-CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o
+CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
 OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
 	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
 
diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c
new file mode 100644
index 000000000000..66ec4a24a203
--- /dev/null
+++ b/tools/testing/radix-tree/bitmap.c
@@ -0,0 +1,23 @@
+/* lib/bitmap.c pulls in at least two other files. */
+
+#include <linux/bitmap.h>
+
+void bitmap_clear(unsigned long *map, unsigned int start, int len)
+{
+	unsigned long *p = map + BIT_WORD(start);
+	const unsigned int size = start + len;
+	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+	while (len - bits_to_clear >= 0) {
+		*p &= ~mask_to_clear;
+		len -= bits_to_clear;
+		bits_to_clear = BITS_PER_LONG;
+		mask_to_clear = ~0UL;
+		p++;
+	}
+	if (len) {
+		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		*p &= ~mask_to_clear;
+	}
+}
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 5d06ac75a14d..4568248222ae 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -18,4 +18,8 @@
 #define pr_debug printk
 #define pr_cont printk
 
+#define __acquires(x)
+#define __releases(x)
+#define __must_hold(x)
+
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h
new file mode 100644
index 000000000000..565fccdfe6e9
--- /dev/null
+++ b/tools/testing/radix-tree/linux/lockdep.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LOCKDEP_H
+#define _LINUX_LOCKDEP_H
+struct lock_class_key {
+	unsigned int a;
+};
+
+static inline void lockdep_set_class(spinlock_t *lock,
+					struct lock_class_key *key)
+{
+}
+#endif /* _LINUX_LOCKDEP_H */
-- 
cgit v1.2.3


From 41aec91f55985e7f14ee75fe2f6e7bcfff0d0fae Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 10 Nov 2017 15:34:55 -0500
Subject: xarray: Add XArray conditional store operations

Like cmpxchg(), xa_cmpxchg will only store to the index if the current
entry matches the old entry.  It returns the current entry, which is
usually more useful than the errno returned by radix_tree_insert().
For the users who really only want the errno, the xa_insert() wrapper
provides a more convenient calling convention.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 60 ++++++++++++++++++++++++++++++++++++++++++
 lib/test_xarray.c      | 20 ++++++++++++++
 lib/xarray.c           | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 15eab63286ed..7f740f675b96 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -275,6 +275,8 @@ struct xarray {
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
+void *xa_cmpxchg(struct xarray *, unsigned long index,
+			void *old, void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -334,6 +336,34 @@ static inline void *xa_erase(struct xarray *xa, unsigned long index)
 	return xa_store(xa, index, NULL, 0);
 }
 
+/**
+ * xa_insert() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * If you would rather see the existing entry in the array, use xa_cmpxchg().
+ * This function is for users who don't care what the entry is, only that
+ * one is present.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.
+ *	    May sleep if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int xa_insert(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr = xa_cmpxchg(xa, index, NULL, entry, gfp);
+	if (!curr)
+		return 0;
+	if (xa_is_err(curr))
+		return xa_err(curr);
+	return -EEXIST;
+}
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -355,9 +385,39 @@ static inline void *xa_erase(struct xarray *xa, unsigned long index)
  */
 void *__xa_erase(struct xarray *, unsigned long index);
 void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
+void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
+		void *entry, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
+/**
+ * __xa_insert() - Store this entry in the XArray unless another entry is
+ *			already present.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * If you would rather see the existing entry in the array, use __xa_cmpxchg().
+ * This function is for users who don't care what the entry is, only that
+ * one is present.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ *	    release and reacquire xa_lock if the @gfp flags permit.
+ * Return: 0 if the store succeeded.  -EEXIST if another entry was present.
+ * -ENOMEM if memory could not be allocated.
+ */
+static inline int __xa_insert(struct xarray *xa, unsigned long index,
+		void *entry, gfp_t gfp)
+{
+	void *curr = __xa_cmpxchg(xa, index, NULL, entry, gfp);
+	if (!curr)
+		return 0;
+	if (xa_is_err(curr))
+		return xa_err(curr);
+	return -EEXIST;
+}
+
 /**
  * xa_erase_bh() - Erase this entry from the XArray.
  * @xa: XArray.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index b711030fbc32..fb472258b639 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -198,6 +198,25 @@ static noinline void check_xa_shrink(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_cmpxchg(struct xarray *xa)
+{
+	void *FIVE = xa_mk_value(5);
+	void *SIX = xa_mk_value(6);
+	void *LOTS = xa_mk_value(12345678);
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL);
+	xa_erase_index(xa, 12345678);
+	xa_erase_index(xa, 5);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
 static noinline void check_multi_store(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
@@ -274,6 +293,7 @@ static int xarray_checks(void)
 	check_xa_load(&array);
 	check_xa_mark(&array);
 	check_xa_shrink(&array);
+	check_cmpxchg(&array);
 	check_multi_store(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
diff --git a/lib/xarray.c b/lib/xarray.c
index 4596a95ed9cd..2ba5a98ec618 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -975,6 +975,77 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 }
 EXPORT_SYMBOL(__xa_store);
 
+/**
+ * xa_cmpxchg() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * If the entry at @index is the same as @old, replace it with @entry.
+ * If the return value is equal to @old, then the exchange was successful.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+void *xa_cmpxchg(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+
+	do {
+		xas_lock(&xas);
+		curr = xas_load(&xas);
+		if (curr == old)
+			xas_store(&xas, entry);
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(xa_cmpxchg);
+
+/**
+ * __xa_cmpxchg() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
+ */
+void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
+			void *old, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	void *curr;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+
+	do {
+		curr = xas_load(&xas);
+		if (curr == old)
+			xas_store(&xas, entry);
+	} while (__xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(__xa_cmpxchg);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
-- 
cgit v1.2.3


From b803b42823d0d9e8b6deccf01ffc2aba5d0738df Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 14 Nov 2017 08:30:11 -0500
Subject: xarray: Add XArray iterators

The xa_for_each iterator allows the user to efficiently walk a range
of the array, executing the loop body once for each entry in that
range that matches the filter.  This commit also includes xa_find()
and xa_find_after() which are helper functions for xa_for_each() but
may also be useful in their own right.

In the xas family of functions, we have xas_for_each(), xas_find(),
xas_next_entry(), xas_for_each_tagged(), xas_find_tagged(),
xas_next_tagged() and xas_pause().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 165 ++++++++++++++++++++++++++++
 lib/test_xarray.c      | 183 +++++++++++++++++++++++++++++++
 lib/xarray.c           | 292 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 640 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 7f740f675b96..66b10efa5a50 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -280,6 +280,10 @@ void *xa_cmpxchg(struct xarray *, unsigned long index,
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
+void *xa_find(struct xarray *xa, unsigned long *index,
+		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
+void *xa_find_after(struct xarray *xa, unsigned long *index,
+		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
 
 /**
  * xa_init() - Initialise an empty XArray.
@@ -364,6 +368,35 @@ static inline int xa_insert(struct xarray *xa, unsigned long index,
 	return -EEXIST;
 }
 
+/**
+ * xa_for_each() - Iterate over a portion of an XArray.
+ * @xa: XArray.
+ * @entry: Entry retrieved from array.
+ * @index: Index of @entry.
+ * @max: Maximum index to retrieve from array.
+ * @filter: Selection criterion.
+ *
+ * Initialise @index to the lowest index you want to retrieve from the
+ * array.  During the iteration, @entry will have the value of the entry
+ * stored in @xa at @index.  The iteration will skip all entries in the
+ * array which do not match @filter.  You may modify @index during the
+ * iteration if you want to skip or reprocess indices.  It is safe to modify
+ * the array during the iteration.  At the end of the iteration, @entry will
+ * be set to NULL and @index will have a value less than or equal to max.
+ *
+ * xa_for_each() is O(n.log(n)) while xas_for_each() is O(n).  You have
+ * to handle your own locking with xas_for_each(), and if you have to unlock
+ * after each iteration, it will also end up being O(n.log(n)).  xa_for_each()
+ * will spin if it hits a retry entry; if you intend to see retry entries,
+ * you should use the xas_for_each() iterator instead.  The xas_for_each()
+ * iterator will expand into more inline code than xa_for_each().
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ */
+#define xa_for_each(xa, entry, index, max, filter) \
+	for (entry = xa_find(xa, &index, max, filter); entry; \
+	     entry = xa_find_after(xa, &index, max, filter))
+
 #define xa_trylock(xa)		spin_trylock(&(xa)->xa_lock)
 #define xa_lock(xa)		spin_lock(&(xa)->xa_lock)
 #define xa_unlock(xa)		spin_unlock(&(xa)->xa_lock)
@@ -835,13 +868,16 @@ static inline bool xas_retry(struct xa_state *xas, const void *entry)
 
 void *xas_load(struct xa_state *);
 void *xas_store(struct xa_state *, void *entry);
+void *xas_find(struct xa_state *, unsigned long max);
 
 bool xas_get_mark(const struct xa_state *, xa_mark_t);
 void xas_set_mark(const struct xa_state *, xa_mark_t);
 void xas_clear_mark(const struct xa_state *, xa_mark_t);
+void *xas_find_marked(struct xa_state *, unsigned long max, xa_mark_t);
 void xas_init_marks(const struct xa_state *);
 
 bool xas_nomem(struct xa_state *, gfp_t);
+void xas_pause(struct xa_state *);
 
 /**
  * xas_reload() - Refetch an entry from the xarray.
@@ -914,4 +950,133 @@ static inline void xas_set_update(struct xa_state *xas, xa_update_node_t update)
 	xas->xa_update = update;
 }
 
+/**
+ * xas_next_entry() - Advance iterator to next present entry.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ *
+ * xas_next_entry() is an inline function to optimise xarray traversal for
+ * speed.  It is equivalent to calling xas_find(), and will call xas_find()
+ * for all the hard cases.
+ *
+ * Return: The next present entry after the one currently referred to by @xas.
+ */
+static inline void *xas_next_entry(struct xa_state *xas, unsigned long max)
+{
+	struct xa_node *node = xas->xa_node;
+	void *entry;
+
+	if (unlikely(xas_not_node(node) || node->shift ||
+			xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)))
+		return xas_find(xas, max);
+
+	do {
+		if (unlikely(xas->xa_index >= max))
+			return xas_find(xas, max);
+		if (unlikely(xas->xa_offset == XA_CHUNK_MASK))
+			return xas_find(xas, max);
+		entry = xa_entry(xas->xa, node, xas->xa_offset + 1);
+		if (unlikely(xa_is_internal(entry)))
+			return xas_find(xas, max);
+		xas->xa_offset++;
+		xas->xa_index++;
+	} while (!entry);
+
+	return entry;
+}
+
+/* Private */
+static inline unsigned int xas_find_chunk(struct xa_state *xas, bool advance,
+		xa_mark_t mark)
+{
+	unsigned long *addr = xas->xa_node->marks[(__force unsigned)mark];
+	unsigned int offset = xas->xa_offset;
+
+	if (advance)
+		offset++;
+	if (XA_CHUNK_SIZE == BITS_PER_LONG) {
+		if (offset < XA_CHUNK_SIZE) {
+			unsigned long data = *addr & (~0UL << offset);
+			if (data)
+				return __ffs(data);
+		}
+		return XA_CHUNK_SIZE;
+	}
+
+	return find_next_bit(addr, XA_CHUNK_SIZE, offset);
+}
+
+/**
+ * xas_next_marked() - Advance iterator to next marked entry.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ * @mark: Mark to search for.
+ *
+ * xas_next_marked() is an inline function to optimise xarray traversal for
+ * speed.  It is equivalent to calling xas_find_marked(), and will call
+ * xas_find_marked() for all the hard cases.
+ *
+ * Return: The next marked entry after the one currently referred to by @xas.
+ */
+static inline void *xas_next_marked(struct xa_state *xas, unsigned long max,
+								xa_mark_t mark)
+{
+	struct xa_node *node = xas->xa_node;
+	unsigned int offset;
+
+	if (unlikely(xas_not_node(node) || node->shift))
+		return xas_find_marked(xas, max, mark);
+	offset = xas_find_chunk(xas, true, mark);
+	xas->xa_offset = offset;
+	xas->xa_index = (xas->xa_index & ~XA_CHUNK_MASK) + offset;
+	if (xas->xa_index > max)
+		return NULL;
+	if (offset == XA_CHUNK_SIZE)
+		return xas_find_marked(xas, max, mark);
+	return xa_entry(xas->xa, node, offset);
+}
+
+/*
+ * If iterating while holding a lock, drop the lock and reschedule
+ * every %XA_CHECK_SCHED loops.
+ */
+enum {
+	XA_CHECK_SCHED = 4096,
+};
+
+/**
+ * xas_for_each() - Iterate over a range of an XArray.
+ * @xas: XArray operation state.
+ * @entry: Entry retrieved from the array.
+ * @max: Maximum index to retrieve from array.
+ *
+ * The loop body will be executed for each entry present in the xarray
+ * between the current xas position and @max.  @entry will be set to
+ * the entry retrieved from the xarray.  It is safe to delete entries
+ * from the array in the loop body.  You should hold either the RCU lock
+ * or the xa_lock while iterating.  If you need to drop the lock, call
+ * xas_pause() first.
+ */
+#define xas_for_each(xas, entry, max) \
+	for (entry = xas_find(xas, max); entry; \
+	     entry = xas_next_entry(xas, max))
+
+/**
+ * xas_for_each_marked() - Iterate over a range of an XArray.
+ * @xas: XArray operation state.
+ * @entry: Entry retrieved from the array.
+ * @max: Maximum index to retrieve from array.
+ * @mark: Mark to search for.
+ *
+ * The loop body will be executed for each marked entry in the xarray
+ * between the current xas position and @max.  @entry will be set to
+ * the entry retrieved from the xarray.  It is safe to delete entries
+ * from the array in the loop body.  You should hold either the RCU lock
+ * or the xa_lock while iterating.  If you need to drop the lock, call
+ * xas_pause() first.
+ */
+#define xas_for_each_marked(xas, entry, max, mark) \
+	for (entry = xas_find_marked(xas, max, mark); entry; \
+	     entry = xas_next_marked(xas, max, mark))
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index fb472258b639..e3c2d4d00b15 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -75,6 +75,48 @@ static noinline void check_xa_err(struct xarray *xa)
 //	XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
 }
 
+static noinline void check_xas_retry(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	void *entry;
+
+	xa_store_index(xa, 0, GFP_KERNEL);
+	xa_store_index(xa, 1, GFP_KERNEL);
+
+	rcu_read_lock();
+	XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0));
+	xa_erase_index(xa, 1);
+	XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas)));
+	XA_BUG_ON(xa, xas_retry(&xas, NULL));
+	XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0)));
+	xas_reset(&xas);
+	XA_BUG_ON(xa, xas.xa_node != XAS_RESTART);
+	XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
+	XA_BUG_ON(xa, xas.xa_node != NULL);
+
+	XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, !xa_is_internal(xas_reload(&xas)));
+	xas.xa_node = XAS_RESTART;
+	XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
+	rcu_read_unlock();
+
+	/* Make sure we can iterate through retry entries */
+	xas_lock(&xas);
+	xas_set(&xas, 0);
+	xas_store(&xas, XA_RETRY_ENTRY);
+	xas_set(&xas, 1);
+	xas_store(&xas, XA_RETRY_ENTRY);
+
+	xas_set(&xas, 0);
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		xas_store(&xas, xa_mk_value(xas.xa_index));
+	}
+	xas_unlock(&xas);
+
+	xa_erase_index(xa, 0);
+	xa_erase_index(xa, 1);
+}
+
 static noinline void check_xa_load(struct xarray *xa)
 {
 	unsigned long i, j;
@@ -217,6 +259,44 @@ static noinline void check_cmpxchg(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_xas_erase(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	void *entry;
+	unsigned long i, j;
+
+	for (i = 0; i < 200; i++) {
+		for (j = i; j < 2 * i + 17; j++) {
+			xas_set(&xas, j);
+			do {
+				xas_lock(&xas);
+				xas_store(&xas, xa_mk_value(j));
+				xas_unlock(&xas);
+			} while (xas_nomem(&xas, GFP_KERNEL));
+		}
+
+		xas_set(&xas, ULONG_MAX);
+		do {
+			xas_lock(&xas);
+			xas_store(&xas, xa_mk_value(0));
+			xas_unlock(&xas);
+		} while (xas_nomem(&xas, GFP_KERNEL));
+
+		xas_lock(&xas);
+		xas_store(&xas, NULL);
+
+		xas_set(&xas, 0);
+		j = i;
+		xas_for_each(&xas, entry, ULONG_MAX) {
+			XA_BUG_ON(xa, entry != xa_mk_value(j));
+			xas_store(&xas, NULL);
+			j++;
+		}
+		xas_unlock(&xas);
+		XA_BUG_ON(xa, !xa_empty(xa));
+	}
+}
+
 static noinline void check_multi_store(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
@@ -285,16 +365,119 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
+static noinline void check_multi_find(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	unsigned long index;
+
+	xa_store_order(xa, 12, 2, xa_mk_value(12), GFP_KERNEL);
+	XA_BUG_ON(xa, xa_store_index(xa, 16, GFP_KERNEL) != NULL);
+
+	index = 0;
+	XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
+			xa_mk_value(12));
+	XA_BUG_ON(xa, index != 12);
+	index = 13;
+	XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
+			xa_mk_value(12));
+	XA_BUG_ON(xa, (index < 12) || (index >= 16));
+	XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT) !=
+			xa_mk_value(16));
+	XA_BUG_ON(xa, index != 16);
+
+	xa_erase_index(xa, 12);
+	xa_erase_index(xa, 16);
+	XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
+static noinline void check_multi_find_2(struct xarray *xa)
+{
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1;
+	unsigned int i, j;
+	void *entry;
+
+	for (i = 0; i < max_order; i++) {
+		unsigned long index = 1UL << i;
+		for (j = 0; j < index; j++) {
+			XA_STATE(xas, xa, j + index);
+			xa_store_index(xa, index - 1, GFP_KERNEL);
+			xa_store_order(xa, index, i, xa_mk_value(index),
+					GFP_KERNEL);
+			rcu_read_lock();
+			xas_for_each(&xas, entry, ULONG_MAX) {
+				xa_erase_index(xa, index);
+			}
+			rcu_read_unlock();
+			xa_erase_index(xa, index - 1);
+			XA_BUG_ON(xa, !xa_empty(xa));
+		}
+	}
+}
+
+static noinline void check_find(struct xarray *xa)
+{
+	unsigned long i, j, k;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/*
+	 * Check xa_find with all pairs between 0 and 99 inclusive,
+	 * starting at every index between 0 and 99
+	 */
+	for (i = 0; i < 100; i++) {
+		XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+		xa_set_mark(xa, i, XA_MARK_0);
+		for (j = 0; j < i; j++) {
+			XA_BUG_ON(xa, xa_store_index(xa, j, GFP_KERNEL) !=
+					NULL);
+			xa_set_mark(xa, j, XA_MARK_0);
+			for (k = 0; k < 100; k++) {
+				unsigned long index = k;
+				void *entry = xa_find(xa, &index, ULONG_MAX,
+								XA_PRESENT);
+				if (k <= j)
+					XA_BUG_ON(xa, index != j);
+				else if (k <= i)
+					XA_BUG_ON(xa, index != i);
+				else
+					XA_BUG_ON(xa, entry != NULL);
+
+				index = k;
+				entry = xa_find(xa, &index, ULONG_MAX,
+								XA_MARK_0);
+				if (k <= j)
+					XA_BUG_ON(xa, index != j);
+				else if (k <= i)
+					XA_BUG_ON(xa, index != i);
+				else
+					XA_BUG_ON(xa, entry != NULL);
+			}
+			xa_erase_index(xa, j);
+			XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0));
+			XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
+		}
+		xa_erase_index(xa, i);
+		XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0));
+	}
+	XA_BUG_ON(xa, !xa_empty(xa));
+	check_multi_find(xa);
+	check_multi_find_2(xa);
+}
+
 static DEFINE_XARRAY(array);
 
 static int xarray_checks(void)
 {
 	check_xa_err(&array);
+	check_xas_retry(&array);
 	check_xa_load(&array);
 	check_xa_mark(&array);
 	check_xa_shrink(&array);
+	check_xas_erase(&array);
 	check_cmpxchg(&array);
 	check_multi_store(&array);
+	check_find(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 2ba5a98ec618..24494f42daa6 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -128,6 +128,11 @@ static unsigned int get_offset(unsigned long index, struct xa_node *node)
 	return (index >> node->shift) & XA_CHUNK_MASK;
 }
 
+static void xas_set_offset(struct xa_state *xas)
+{
+	xas->xa_offset = get_offset(xas->xa_index, xas->xa_node);
+}
+
 /* move the index either forwards (find) or backwards (sibling slot) */
 static void xas_move_index(struct xa_state *xas, unsigned long offset)
 {
@@ -136,6 +141,12 @@ static void xas_move_index(struct xa_state *xas, unsigned long offset)
 	xas->xa_index += offset << shift;
 }
 
+static void xas_advance(struct xa_state *xas)
+{
+	xas->xa_offset++;
+	xas_move_index(xas, xas->xa_offset);
+}
+
 static void *set_bounds(struct xa_state *xas)
 {
 	xas->xa_node = XAS_BOUNDS;
@@ -829,6 +840,202 @@ void xas_init_marks(const struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_init_marks);
 
+/**
+ * xas_pause() - Pause a walk to drop a lock.
+ * @xas: XArray operation state.
+ *
+ * Some users need to pause a walk and drop the lock they're holding in
+ * order to yield to a higher priority thread or carry out an operation
+ * on an entry.  Those users should call this function before they drop
+ * the lock.  It resets the @xas to be suitable for the next iteration
+ * of the loop after the user has reacquired the lock.  If most entries
+ * found during a walk require you to call xas_pause(), the xa_for_each()
+ * iterator may be more appropriate.
+ *
+ * Note that xas_pause() only works for forward iteration.  If a user needs
+ * to pause a reverse iteration, we will need a xas_pause_rev().
+ */
+void xas_pause(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	if (xas_invalid(xas))
+		return;
+
+	if (node) {
+		unsigned int offset = xas->xa_offset;
+		while (++offset < XA_CHUNK_SIZE) {
+			if (!xa_is_sibling(xa_entry(xas->xa, node, offset)))
+				break;
+		}
+		xas->xa_index += (offset - xas->xa_offset) << node->shift;
+	} else {
+		xas->xa_index++;
+	}
+	xas->xa_node = XAS_RESTART;
+}
+EXPORT_SYMBOL_GPL(xas_pause);
+
+/**
+ * xas_find() - Find the next present entry in the XArray.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ *
+ * If the @xas has not yet been walked to an entry, return the entry
+ * which has an index >= xas.xa_index.  If it has been walked, the entry
+ * currently being pointed at has been processed, and so we move to the
+ * next entry.
+ *
+ * If no entry is found and the array is smaller than @max, the iterator
+ * is set to the smallest index not yet in the array.  This allows @xas
+ * to be immediately passed to xas_store().
+ *
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xas_find(struct xa_state *xas, unsigned long max)
+{
+	void *entry;
+
+	if (xas_error(xas))
+		return NULL;
+
+	if (!xas->xa_node) {
+		xas->xa_index = 1;
+		return set_bounds(xas);
+	} else if (xas_top(xas->xa_node)) {
+		entry = xas_load(xas);
+		if (entry || xas_not_node(xas->xa_node))
+			return entry;
+	} else if (!xas->xa_node->shift &&
+		    xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) {
+		xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1;
+	}
+
+	xas_advance(xas);
+
+	while (xas->xa_node && (xas->xa_index <= max)) {
+		if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
+			xas->xa_offset = xas->xa_node->offset + 1;
+			xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+			continue;
+		}
+
+		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+		if (xa_is_node(entry)) {
+			xas->xa_node = xa_to_node(entry);
+			xas->xa_offset = 0;
+			continue;
+		}
+		if (entry && !xa_is_sibling(entry))
+			return entry;
+
+		xas_advance(xas);
+	}
+
+	if (!xas->xa_node)
+		xas->xa_node = XAS_BOUNDS;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find);
+
+/**
+ * xas_find_marked() - Find the next marked entry in the XArray.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ * @mark: Mark number to search for.
+ *
+ * If the @xas has not yet been walked to an entry, return the marked entry
+ * which has an index >= xas.xa_index.  If it has been walked, the entry
+ * currently being pointed at has been processed, and so we return the
+ * first marked entry with an index > xas.xa_index.
+ *
+ * If no marked entry is found and the array is smaller than @max, @xas is
+ * set to the bounds state and xas->xa_index is set to the smallest index
+ * not yet in the array.  This allows @xas to be immediately passed to
+ * xas_store().
+ *
+ * If no entry is found before @max is reached, @xas is set to the restart
+ * state.
+ *
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
+{
+	bool advance = true;
+	unsigned int offset;
+	void *entry;
+
+	if (xas_error(xas))
+		return NULL;
+
+	if (!xas->xa_node) {
+		xas->xa_index = 1;
+		goto out;
+	} else if (xas_top(xas->xa_node)) {
+		advance = false;
+		entry = xa_head(xas->xa);
+		xas->xa_node = NULL;
+		if (xas->xa_index > max_index(entry))
+			goto bounds;
+		if (!xa_is_node(entry)) {
+			if (xa_marked(xas->xa, mark))
+				return entry;
+			xas->xa_index = 1;
+			goto out;
+		}
+		xas->xa_node = xa_to_node(entry);
+		xas->xa_offset = xas->xa_index >> xas->xa_node->shift;
+	}
+
+	while (xas->xa_index <= max) {
+		if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
+			xas->xa_offset = xas->xa_node->offset + 1;
+			xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+			if (!xas->xa_node)
+				break;
+			advance = false;
+			continue;
+		}
+
+		if (!advance) {
+			entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+			if (xa_is_sibling(entry)) {
+				xas->xa_offset = xa_to_sibling(entry);
+				xas_move_index(xas, xas->xa_offset);
+			}
+		}
+
+		offset = xas_find_chunk(xas, advance, mark);
+		if (offset > xas->xa_offset) {
+			advance = false;
+			xas_move_index(xas, offset);
+			/* Mind the wrap */
+			if ((xas->xa_index - 1) >= max)
+				goto max;
+			xas->xa_offset = offset;
+			if (offset == XA_CHUNK_SIZE)
+				continue;
+		}
+
+		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+		if (!xa_is_node(entry))
+			return entry;
+		xas->xa_node = xa_to_node(entry);
+		xas_set_offset(xas);
+	}
+
+out:
+	if (!max)
+		goto max;
+bounds:
+	xas->xa_node = XAS_BOUNDS;
+	return NULL;
+max:
+	xas->xa_node = XAS_RESTART;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find_marked);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
@@ -1152,6 +1359,91 @@ void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
 }
 EXPORT_SYMBOL(xa_clear_mark);
 
+/**
+ * xa_find() - Search the XArray for an entry.
+ * @xa: XArray.
+ * @indexp: Pointer to an index.
+ * @max: Maximum index to search to.
+ * @filter: Selection criterion.
+ *
+ * Finds the entry in @xa which matches the @filter, and has the lowest
+ * index that is at least @indexp and no more than @max.
+ * If an entry is found, @indexp is updated to be the index of the entry.
+ * This function is protected by the RCU read lock, so it may not find
+ * entries which are being simultaneously added.  It will not return an
+ * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xa_find(struct xarray *xa, unsigned long *indexp,
+			unsigned long max, xa_mark_t filter)
+{
+	XA_STATE(xas, xa, *indexp);
+	void *entry;
+
+	rcu_read_lock();
+	do {
+		if ((__force unsigned int)filter < XA_MAX_MARKS)
+			entry = xas_find_marked(&xas, max, filter);
+		else
+			entry = xas_find(&xas, max);
+	} while (xas_retry(&xas, entry));
+	rcu_read_unlock();
+
+	if (entry)
+		*indexp = xas.xa_index;
+	return entry;
+}
+EXPORT_SYMBOL(xa_find);
+
+/**
+ * xa_find_after() - Search the XArray for a present entry.
+ * @xa: XArray.
+ * @indexp: Pointer to an index.
+ * @max: Maximum index to search to.
+ * @filter: Selection criterion.
+ *
+ * Finds the entry in @xa which matches the @filter and has the lowest
+ * index that is above @indexp and no more than @max.
+ * If an entry is found, @indexp is updated to be the index of the entry.
+ * This function is protected by the RCU read lock, so it may miss entries
+ * which are being simultaneously added.  It will not return an
+ * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: The pointer, if found, otherwise %NULL.
+ */
+void *xa_find_after(struct xarray *xa, unsigned long *indexp,
+			unsigned long max, xa_mark_t filter)
+{
+	XA_STATE(xas, xa, *indexp + 1);
+	void *entry;
+
+	rcu_read_lock();
+	for (;;) {
+		if ((__force unsigned int)filter < XA_MAX_MARKS)
+			entry = xas_find_marked(&xas, max, filter);
+		else
+			entry = xas_find(&xas, max);
+		if (xas.xa_shift) {
+			if (xas.xa_index & ((1UL << xas.xa_shift) - 1))
+				continue;
+		} else {
+			if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK))
+				continue;
+		}
+		if (!xas_retry(&xas, entry))
+			break;
+	}
+	rcu_read_unlock();
+
+	if (entry)
+		*indexp = xas.xa_index;
+	return entry;
+}
+EXPORT_SYMBOL(xa_find_after);
+
 #ifdef XA_DEBUG
 void xa_dump_node(const struct xa_node *node)
 {
-- 
cgit v1.2.3


From 80a0a1a9a3cde9b23851e8eb7160e2786549306a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 14 Nov 2017 16:42:22 -0500
Subject: xarray: Extract entries from an XArray

The xa_extract function combines the functionality of
radix_tree_gang_lookup() and radix_tree_gang_lookup_tagged().
It extracts entries matching the specified filter into a normal array.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h |  2 ++
 lib/xarray.c           | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 66b10efa5a50..82ceed981924 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -284,6 +284,8 @@ void *xa_find(struct xarray *xa, unsigned long *index,
 		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
 void *xa_find_after(struct xarray *xa, unsigned long *index,
 		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
+unsigned int xa_extract(struct xarray *, void **dst, unsigned long start,
+		unsigned long max, unsigned int n, xa_mark_t);
 
 /**
  * xa_init() - Initialise an empty XArray.
diff --git a/lib/xarray.c b/lib/xarray.c
index 24494f42daa6..70e04810c8c2 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1444,6 +1444,86 @@ void *xa_find_after(struct xarray *xa, unsigned long *indexp,
 }
 EXPORT_SYMBOL(xa_find_after);
 
+static unsigned int xas_extract_present(struct xa_state *xas, void **dst,
+			unsigned long max, unsigned int n)
+{
+	void *entry;
+	unsigned int i = 0;
+
+	rcu_read_lock();
+	xas_for_each(xas, entry, max) {
+		if (xas_retry(xas, entry))
+			continue;
+		dst[i++] = entry;
+		if (i == n)
+			break;
+	}
+	rcu_read_unlock();
+
+	return i;
+}
+
+static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
+			unsigned long max, unsigned int n, xa_mark_t mark)
+{
+	void *entry;
+	unsigned int i = 0;
+
+	rcu_read_lock();
+	xas_for_each_marked(xas, entry, max, mark) {
+		if (xas_retry(xas, entry))
+			continue;
+		dst[i++] = entry;
+		if (i == n)
+			break;
+	}
+	rcu_read_unlock();
+
+	return i;
+}
+
+/**
+ * xa_extract() - Copy selected entries from the XArray into a normal array.
+ * @xa: The source XArray to copy from.
+ * @dst: The buffer to copy entries into.
+ * @start: The first index in the XArray eligible to be selected.
+ * @max: The last index in the XArray eligible to be selected.
+ * @n: The maximum number of entries to copy.
+ * @filter: Selection criterion.
+ *
+ * Copies up to @n entries that match @filter from the XArray.  The
+ * copied entries will have indices between @start and @max, inclusive.
+ *
+ * The @filter may be an XArray mark value, in which case entries which are
+ * marked with that mark will be copied.  It may also be %XA_PRESENT, in
+ * which case all entries which are not NULL will be copied.
+ *
+ * The entries returned may not represent a snapshot of the XArray at a
+ * moment in time.  For example, if another thread stores to index 5, then
+ * index 10, calling xa_extract() may return the old contents of index 5
+ * and the new contents of index 10.  Indices not modified while this
+ * function is running will not be skipped.
+ *
+ * If you need stronger guarantees, holding the xa_lock across calls to this
+ * function will prevent concurrent modification.
+ *
+ * Context: Any context.  Takes and releases the RCU lock.
+ * Return: The number of entries copied.
+ */
+unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
+			unsigned long max, unsigned int n, xa_mark_t filter)
+{
+	XA_STATE(xas, xa, start);
+
+	if (!n)
+		return 0;
+
+	if ((__force unsigned int)filter < XA_MAX_MARKS)
+		return xas_extract_marked(&xas, dst, max, n, filter);
+	return xas_extract_present(&xas, dst, max, n);
+}
+EXPORT_SYMBOL(xa_extract);
+
 #ifdef XA_DEBUG
 void xa_dump_node(const struct xa_node *node)
 {
-- 
cgit v1.2.3


From 687149fca1f37c447e5d161e0a4a04cb2c880cb6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 17 Nov 2017 08:16:34 -0500
Subject: xarray: Destroy an XArray

This function frees all the internal memory allocated to the xarray
and reinitialises it to be empty.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h |  1 +
 lib/test_xarray.c      | 34 ++++++++++++++++++++++++++++++++++
 lib/xarray.c           | 28 ++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 82ceed981924..0a758fa3ed2c 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -286,6 +286,7 @@ void *xa_find_after(struct xarray *xa, unsigned long *index,
 		unsigned long max, xa_mark_t) __attribute__((nonnull(2)));
 unsigned int xa_extract(struct xarray *, void **dst, unsigned long start,
 		unsigned long max, unsigned int n, xa_mark_t);
+void xa_destroy(struct xarray *);
 
 /**
  * xa_init() - Initialise an empty XArray.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index e3c2d4d00b15..a96f67caa1c2 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -465,6 +465,39 @@ static noinline void check_find(struct xarray *xa)
 	check_multi_find_2(xa);
 }
 
+static noinline void check_destroy(struct xarray *xa)
+{
+	unsigned long index;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Destroying an empty array is a no-op */
+	xa_destroy(xa);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Destroying an array with a single entry */
+	for (index = 0; index < 1000; index++) {
+		xa_store_index(xa, index, GFP_KERNEL);
+		XA_BUG_ON(xa, xa_empty(xa));
+		xa_destroy(xa);
+		XA_BUG_ON(xa, !xa_empty(xa));
+	}
+
+	/* Destroying an array with a single entry at ULONG_MAX */
+	xa_store(xa, ULONG_MAX, xa, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_empty(xa));
+	xa_destroy(xa);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+#ifdef CONFIG_XARRAY_MULTI
+	/* Destroying an array with a multi-index entry */
+	xa_store_order(xa, 1 << 11, 11, xa, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_empty(xa));
+	xa_destroy(xa);
+	XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
 static DEFINE_XARRAY(array);
 
 static int xarray_checks(void)
@@ -478,6 +511,7 @@ static int xarray_checks(void)
 	check_cmpxchg(&array);
 	check_multi_store(&array);
 	check_find(&array);
+	check_destroy(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 70e04810c8c2..057dbe0d3bf6 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1524,6 +1524,34 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
 }
 EXPORT_SYMBOL(xa_extract);
 
+/**
+ * xa_destroy() - Free all internal data structures.
+ * @xa: XArray.
+ *
+ * After calling this function, the XArray is empty and has freed all memory
+ * allocated for its internal data structures.  You are responsible for
+ * freeing the objects referenced by the XArray.
+ *
+ * Context: Any context.  Takes and releases the xa_lock, interrupt-safe.
+ */
+void xa_destroy(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	unsigned long flags;
+	void *entry;
+
+	xas.xa_node = NULL;
+	xas_lock_irqsave(&xas, flags);
+	entry = xa_head_locked(xa);
+	RCU_INIT_POINTER(xa->xa_head, NULL);
+	xas_init_marks(&xas);
+	/* lockdep checks we're still holding the lock in xas_free_nodes() */
+	if (xa_is_node(entry))
+		xas_free_nodes(&xas, xa_to_node(entry));
+	xas_unlock_irqrestore(&xas, flags);
+}
+EXPORT_SYMBOL(xa_destroy);
+
 #ifdef XA_DEBUG
 void xa_dump_node(const struct xa_node *node)
 {
-- 
cgit v1.2.3


From 64d3e9a9e0cc51957d243dd2b0adc5d74ff5e128 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 1 Dec 2017 00:06:52 -0500
Subject: xarray: Step through an XArray

The xas_next and xas_prev functions move the xas index by one position,
and adjust the rest of the iterator state to match it.  This is more
efficient than calling xas_set() as it keeps the iterator at the leaves
of the tree instead of walking the iterator from the root each time.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h |  67 ++++++++++++++++++++++++++++
 lib/test_xarray.c      | 115 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/xarray.c           |  74 +++++++++++++++++++++++++++++++
 3 files changed, 256 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 0a758fa3ed2c..381f94a66762 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -828,6 +828,12 @@ static inline bool xas_not_node(struct xa_node *node)
 	return ((unsigned long)node & 3) || !node;
 }
 
+/* True if the node represents RESTART or an error */
+static inline bool xas_frozen(struct xa_node *node)
+{
+	return (unsigned long)node & 2;
+}
+
 /* True if the node represents head-of-tree, RESTART or BOUNDS */
 static inline bool xas_top(struct xa_node *node)
 {
@@ -1082,4 +1088,65 @@ enum {
 	for (entry = xas_find_marked(xas, max, mark); entry; \
 	     entry = xas_next_marked(xas, max, mark))
 
+void *__xas_next(struct xa_state *);
+void *__xas_prev(struct xa_state *);
+
+/**
+ * xas_prev() - Move iterator to previous index.
+ * @xas: XArray operation state.
+ *
+ * If the @xas was in an error state, it will remain in an error state
+ * and this function will return %NULL.  If the @xas has never been walked,
+ * it will have the effect of calling xas_load().  Otherwise one will be
+ * subtracted from the index and the state will be walked to the correct
+ * location in the array for the next operation.
+ *
+ * If the iterator was referencing index 0, this function wraps
+ * around to %ULONG_MAX.
+ *
+ * Return: The entry at the new index.  This may be %NULL or an internal
+ * entry.
+ */
+static inline void *xas_prev(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	if (unlikely(xas_not_node(node) || node->shift ||
+				xas->xa_offset == 0))
+		return __xas_prev(xas);
+
+	xas->xa_index--;
+	xas->xa_offset--;
+	return xa_entry(xas->xa, node, xas->xa_offset);
+}
+
+/**
+ * xas_next() - Move state to next index.
+ * @xas: XArray operation state.
+ *
+ * If the @xas was in an error state, it will remain in an error state
+ * and this function will return %NULL.  If the @xas has never been walked,
+ * it will have the effect of calling xas_load().  Otherwise one will be
+ * added to the index and the state will be walked to the correct
+ * location in the array for the next operation.
+ *
+ * If the iterator was referencing index %ULONG_MAX, this function wraps
+ * around to 0.
+ *
+ * Return: The entry at the new index.  This may be %NULL or an internal
+ * entry.
+ */
+static inline void *xas_next(struct xa_state *xas)
+{
+	struct xa_node *node = xas->xa_node;
+
+	if (unlikely(xas_not_node(node) || node->shift ||
+				xas->xa_offset == XA_CHUNK_MASK))
+		return __xas_next(xas);
+
+	xas->xa_index++;
+	xas->xa_offset++;
+	return xa_entry(xas->xa, node, xas->xa_offset);
+}
+
 #endif /* _LINUX_XARRAY_H */
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index a96f67caa1c2..85ef1882dd3c 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -465,6 +465,120 @@ static noinline void check_find(struct xarray *xa)
 	check_multi_find_2(xa);
 }
 
+static noinline void check_move_small(struct xarray *xa, unsigned long idx)
+{
+	XA_STATE(xas, xa, 0);
+	unsigned long i;
+
+	xa_store_index(xa, 0, GFP_KERNEL);
+	xa_store_index(xa, idx, GFP_KERNEL);
+
+	rcu_read_lock();
+	for (i = 0; i < idx * 4; i++) {
+		void *entry = xas_next(&xas);
+		if (i <= idx)
+			XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
+		XA_BUG_ON(xa, xas.xa_index != i);
+		if (i == 0 || i == idx)
+			XA_BUG_ON(xa, entry != xa_mk_value(i));
+		else
+			XA_BUG_ON(xa, entry != NULL);
+	}
+	xas_next(&xas);
+	XA_BUG_ON(xa, xas.xa_index != i);
+
+	do {
+		void *entry = xas_prev(&xas);
+		i--;
+		if (i <= idx)
+			XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
+		XA_BUG_ON(xa, xas.xa_index != i);
+		if (i == 0 || i == idx)
+			XA_BUG_ON(xa, entry != xa_mk_value(i));
+		else
+			XA_BUG_ON(xa, entry != NULL);
+	} while (i > 0);
+
+	xas_set(&xas, ULONG_MAX);
+	XA_BUG_ON(xa, xas_next(&xas) != NULL);
+	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+	XA_BUG_ON(xa, xas_next(&xas) != xa_mk_value(0));
+	XA_BUG_ON(xa, xas.xa_index != 0);
+	XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+	rcu_read_unlock();
+
+	xa_erase_index(xa, 0);
+	xa_erase_index(xa, idx);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_move(struct xarray *xa)
+{
+	XA_STATE(xas, xa, (1 << 16) - 1);
+	unsigned long i;
+
+	for (i = 0; i < (1 << 16); i++)
+		XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+
+	rcu_read_lock();
+	do {
+		void *entry = xas_prev(&xas);
+		i--;
+		XA_BUG_ON(xa, entry != xa_mk_value(i));
+		XA_BUG_ON(xa, i != xas.xa_index);
+	} while (i != 0);
+
+	XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+
+	do {
+		void *entry = xas_next(&xas);
+		XA_BUG_ON(xa, entry != xa_mk_value(i));
+		XA_BUG_ON(xa, i != xas.xa_index);
+		i++;
+	} while (i < (1 << 16));
+	rcu_read_unlock();
+
+	for (i = (1 << 8); i < (1 << 15); i++)
+		xa_erase_index(xa, i);
+
+	i = xas.xa_index;
+
+	rcu_read_lock();
+	do {
+		void *entry = xas_prev(&xas);
+		i--;
+		if ((i < (1 << 8)) || (i >= (1 << 15)))
+			XA_BUG_ON(xa, entry != xa_mk_value(i));
+		else
+			XA_BUG_ON(xa, entry != NULL);
+		XA_BUG_ON(xa, i != xas.xa_index);
+	} while (i != 0);
+
+	XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+	XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+
+	do {
+		void *entry = xas_next(&xas);
+		if ((i < (1 << 8)) || (i >= (1 << 15)))
+			XA_BUG_ON(xa, entry != xa_mk_value(i));
+		else
+			XA_BUG_ON(xa, entry != NULL);
+		XA_BUG_ON(xa, i != xas.xa_index);
+		i++;
+	} while (i < (1 << 16));
+	rcu_read_unlock();
+
+	xa_destroy(xa);
+
+	for (i = 0; i < 16; i++)
+		check_move_small(xa, 1UL << i);
+
+	for (i = 2; i < 16; i++)
+		check_move_small(xa, (1UL << i) - 1);
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -512,6 +626,7 @@ static int xarray_checks(void)
 	check_multi_store(&array);
 	check_find(&array);
 	check_destroy(&array);
+	check_move(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 057dbe0d3bf6..303c46579598 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -876,6 +876,80 @@ void xas_pause(struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_pause);
 
+/*
+ * __xas_prev() - Find the previous entry in the XArray.
+ * @xas: XArray operation state.
+ *
+ * Helper function for xas_prev() which handles all the complex cases
+ * out of line.
+ */
+void *__xas_prev(struct xa_state *xas)
+{
+	void *entry;
+
+	if (!xas_frozen(xas->xa_node))
+		xas->xa_index--;
+	if (xas_not_node(xas->xa_node))
+		return xas_load(xas);
+
+	if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
+		xas->xa_offset--;
+
+	while (xas->xa_offset == 255) {
+		xas->xa_offset = xas->xa_node->offset - 1;
+		xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+		if (!xas->xa_node)
+			return set_bounds(xas);
+	}
+
+	for (;;) {
+		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+		if (!xa_is_node(entry))
+			return entry;
+
+		xas->xa_node = xa_to_node(entry);
+		xas_set_offset(xas);
+	}
+}
+EXPORT_SYMBOL_GPL(__xas_prev);
+
+/*
+ * __xas_next() - Find the next entry in the XArray.
+ * @xas: XArray operation state.
+ *
+ * Helper function for xas_next() which handles all the complex cases
+ * out of line.
+ */
+void *__xas_next(struct xa_state *xas)
+{
+	void *entry;
+
+	if (!xas_frozen(xas->xa_node))
+		xas->xa_index++;
+	if (xas_not_node(xas->xa_node))
+		return xas_load(xas);
+
+	if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
+		xas->xa_offset++;
+
+	while (xas->xa_offset == XA_CHUNK_SIZE) {
+		xas->xa_offset = xas->xa_node->offset + 1;
+		xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+		if (!xas->xa_node)
+			return set_bounds(xas);
+	}
+
+	for (;;) {
+		entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+		if (!xa_is_node(entry))
+			return entry;
+
+		xas->xa_node = xa_to_node(entry);
+		xas_set_offset(xas);
+	}
+}
+EXPORT_SYMBOL_GPL(__xas_next);
+
 /**
  * xas_find() - Find the next present entry in the XArray.
  * @xas: XArray operation state.
-- 
cgit v1.2.3


From 4e99d4e9579d3b950bf4b38d0d64eb1b9be78761 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 1 Jun 2018 22:46:02 -0400
Subject: xarray: Add xas_for_each_conflict

This iterator iterates over each entry that is stored in the index or
indices specified by the xa_state.  This is intended for use for a
conditional store of a multiindex entry, or to allow entries which are
about to be removed from the xarray to be disposed of properly.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h | 17 +++++++++++++
 lib/test_xarray.c      | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/xarray.c           | 61 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 146 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 381f94a66762..2664e718dbd3 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -878,6 +878,7 @@ static inline bool xas_retry(struct xa_state *xas, const void *entry)
 void *xas_load(struct xa_state *);
 void *xas_store(struct xa_state *, void *entry);
 void *xas_find(struct xa_state *, unsigned long max);
+void *xas_find_conflict(struct xa_state *);
 
 bool xas_get_mark(const struct xa_state *, xa_mark_t);
 void xas_set_mark(const struct xa_state *, xa_mark_t);
@@ -1088,6 +1089,22 @@ enum {
 	for (entry = xas_find_marked(xas, max, mark); entry; \
 	     entry = xas_next_marked(xas, max, mark))
 
+/**
+ * xas_for_each_conflict() - Iterate over a range of an XArray.
+ * @xas: XArray operation state.
+ * @entry: Entry retrieved from the array.
+ *
+ * The loop body will be executed for each entry in the XArray that lies
+ * within the range specified by @xas.  If the loop completes successfully,
+ * any entries that lie in this range will be replaced by @entry.  The caller
+ * may break out of the loop; if they do so, the contents of the XArray will
+ * be unchanged.  The operation may fail due to an out of memory condition.
+ * The caller may also call xa_set_err() to exit the loop while setting an
+ * error to record the reason.
+ */
+#define xas_for_each_conflict(xas, entry) \
+	while ((entry = xas_find_conflict(xas)))
+
 void *__xas_next(struct xa_state *);
 void *__xas_prev(struct xa_state *);
 
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 85ef1882dd3c..8eba3de1baea 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -365,6 +365,73 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
+static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
+			unsigned int order, unsigned int present)
+{
+	XA_STATE_ORDER(xas, xa, start, order);
+	void *entry;
+	unsigned int count = 0;
+
+retry:
+	xas_lock(&xas);
+	xas_for_each_conflict(&xas, entry) {
+		XA_BUG_ON(xa, !xa_is_value(entry));
+		XA_BUG_ON(xa, entry < xa_mk_value(start));
+		XA_BUG_ON(xa, entry > xa_mk_value(start + (1UL << order) - 1));
+		count++;
+	}
+	xas_store(&xas, xa_mk_value(start));
+	xas_unlock(&xas);
+	if (xas_nomem(&xas, GFP_KERNEL)) {
+		count = 0;
+		goto retry;
+	}
+	XA_BUG_ON(xa, xas_error(&xas));
+	XA_BUG_ON(xa, count != present);
+	XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_value(start));
+	XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) !=
+			xa_mk_value(start));
+	xa_erase_index(xa, start);
+}
+
+static noinline void check_store_iter(struct xarray *xa)
+{
+	unsigned int i, j;
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+
+	for (i = 0; i < max_order; i++) {
+		unsigned int min = 1 << i;
+		unsigned int max = (2 << i) - 1;
+		__check_store_iter(xa, 0, i, 0);
+		XA_BUG_ON(xa, !xa_empty(xa));
+		__check_store_iter(xa, min, i, 0);
+		XA_BUG_ON(xa, !xa_empty(xa));
+
+		xa_store_index(xa, min, GFP_KERNEL);
+		__check_store_iter(xa, min, i, 1);
+		XA_BUG_ON(xa, !xa_empty(xa));
+		xa_store_index(xa, max, GFP_KERNEL);
+		__check_store_iter(xa, min, i, 1);
+		XA_BUG_ON(xa, !xa_empty(xa));
+
+		for (j = 0; j < min; j++)
+			xa_store_index(xa, j, GFP_KERNEL);
+		__check_store_iter(xa, 0, i, min);
+		XA_BUG_ON(xa, !xa_empty(xa));
+		for (j = 0; j < min; j++)
+			xa_store_index(xa, min + j, GFP_KERNEL);
+		__check_store_iter(xa, min, i, min);
+		XA_BUG_ON(xa, !xa_empty(xa));
+	}
+#ifdef CONFIG_XARRAY_MULTI
+	xa_store_index(xa, 63, GFP_KERNEL);
+	xa_store_index(xa, 65, GFP_KERNEL);
+	__check_store_iter(xa, 64, 2, 1);
+	xa_erase_index(xa, 63);
+#endif
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
 static noinline void check_multi_find(struct xarray *xa)
 {
 #ifdef CONFIG_XARRAY_MULTI
@@ -627,6 +694,7 @@ static int xarray_checks(void)
 	check_find(&array);
 	check_destroy(&array);
 	check_move(&array);
+	check_store_iter(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
diff --git a/lib/xarray.c b/lib/xarray.c
index 303c46579598..41f8ebc651f5 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1110,6 +1110,67 @@ max:
 }
 EXPORT_SYMBOL_GPL(xas_find_marked);
 
+/**
+ * xas_find_conflict() - Find the next present entry in a range.
+ * @xas: XArray operation state.
+ *
+ * The @xas describes both a range and a position within that range.
+ *
+ * Context: Any context.  Expects xa_lock to be held.
+ * Return: The next entry in the range covered by @xas or %NULL.
+ */
+void *xas_find_conflict(struct xa_state *xas)
+{
+	void *curr;
+
+	if (xas_error(xas))
+		return NULL;
+
+	if (!xas->xa_node)
+		return NULL;
+
+	if (xas_top(xas->xa_node)) {
+		curr = xas_start(xas);
+		if (!curr)
+			return NULL;
+		while (xa_is_node(curr)) {
+			struct xa_node *node = xa_to_node(curr);
+			curr = xas_descend(xas, node);
+		}
+		if (curr)
+			return curr;
+	}
+
+	if (xas->xa_node->shift > xas->xa_shift)
+		return NULL;
+
+	for (;;) {
+		if (xas->xa_node->shift == xas->xa_shift) {
+			if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs)
+				break;
+		} else if (xas->xa_offset == XA_CHUNK_MASK) {
+			xas->xa_offset = xas->xa_node->offset;
+			xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node);
+			if (!xas->xa_node)
+				break;
+			continue;
+		}
+		curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset);
+		if (xa_is_sibling(curr))
+			continue;
+		while (xa_is_node(curr)) {
+			xas->xa_node = xa_to_node(curr);
+			xas->xa_offset = 0;
+			curr = xa_entry_locked(xas->xa, xas->xa_node, 0);
+		}
+		if (curr)
+			return curr;
+	}
+	xas->xa_offset -= xas->xa_sibs;
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find_conflict);
+
 /**
  * xa_init_flags() - Initialise an empty XArray with flags.
  * @xa: XArray.
-- 
cgit v1.2.3


From 2264f5132fe45571139727ebdeb78696b35d1506 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 4 Dec 2017 00:11:48 -0500
Subject: xarray: Add xas_create_range

This hopefully temporary function is useful for users who have not yet
been converted to multi-index entries.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/xarray.h |  13 ++++++
 lib/test_xarray.c      | 119 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/xarray.c           |  50 +++++++++++++++++++++
 3 files changed, 182 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 2664e718dbd3..deae17aa0ed7 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -822,6 +822,17 @@ static inline bool xas_valid(const struct xa_state *xas)
 	return !xas_invalid(xas);
 }
 
+/**
+ * xas_is_node() - Does the xas point to a node?
+ * @xas: XArray operation state.
+ *
+ * Return: %true if the xas currently references a node.
+ */
+static inline bool xas_is_node(const struct xa_state *xas)
+{
+	return xas_valid(xas) && xas->xa_node;
+}
+
 /* True if the pointer is something other than a node */
 static inline bool xas_not_node(struct xa_node *node)
 {
@@ -889,6 +900,8 @@ void xas_init_marks(const struct xa_state *);
 bool xas_nomem(struct xa_state *, gfp_t);
 void xas_pause(struct xa_state *);
 
+void xas_create_range(struct xa_state *);
+
 /**
  * xas_reload() - Refetch an entry from the xarray.
  * @xas: XArray operation state.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 8eba3de1baea..703370015d10 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -646,6 +646,124 @@ static noinline void check_move(struct xarray *xa)
 		check_move_small(xa, (1UL << i) - 1);
 }
 
+static noinline void xa_store_many_order(struct xarray *xa,
+		unsigned long index, unsigned order)
+{
+	XA_STATE_ORDER(xas, xa, index, order);
+	unsigned int i = 0;
+
+	do {
+		xas_lock(&xas);
+		XA_BUG_ON(xa, xas_find_conflict(&xas));
+		xas_create_range(&xas);
+		if (xas_error(&xas))
+			goto unlock;
+		for (i = 0; i < (1U << order); i++) {
+			XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(index + i)));
+			xas_next(&xas);
+		}
+unlock:
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
+
+	XA_BUG_ON(xa, xas_error(&xas));
+}
+
+static noinline void check_create_range_1(struct xarray *xa,
+		unsigned long index, unsigned order)
+{
+	unsigned long i;
+
+	xa_store_many_order(xa, index, order);
+	for (i = index; i < index + (1UL << order); i++)
+		xa_erase_index(xa, i);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range_2(struct xarray *xa, unsigned order)
+{
+	unsigned long i;
+	unsigned long nr = 1UL << order;
+
+	for (i = 0; i < nr * nr; i += nr)
+		xa_store_many_order(xa, i, order);
+	for (i = 0; i < nr * nr; i++)
+		xa_erase_index(xa, i);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range_3(void)
+{
+	XA_STATE(xas, NULL, 0);
+	xas_set_err(&xas, -EEXIST);
+	xas_create_range(&xas);
+	XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST);
+}
+
+static noinline void check_create_range_4(struct xarray *xa,
+		unsigned long index, unsigned order)
+{
+	XA_STATE_ORDER(xas, xa, index, order);
+	unsigned long base = xas.xa_index;
+	unsigned long i = 0;
+
+	xa_store_index(xa, index, GFP_KERNEL);
+	do {
+		xas_lock(&xas);
+		xas_create_range(&xas);
+		if (xas_error(&xas))
+			goto unlock;
+		for (i = 0; i < (1UL << order); i++) {
+			void *old = xas_store(&xas, xa_mk_value(base + i));
+			if (xas.xa_index == index)
+				XA_BUG_ON(xa, old != xa_mk_value(base + i));
+			else
+				XA_BUG_ON(xa, old != NULL);
+			xas_next(&xas);
+		}
+unlock:
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
+
+	XA_BUG_ON(xa, xas_error(&xas));
+
+	for (i = base; i < base + (1UL << order); i++)
+		xa_erase_index(xa, i);
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range(struct xarray *xa)
+{
+	unsigned int order;
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1;
+
+	for (order = 0; order < max_order; order++) {
+		check_create_range_1(xa, 0, order);
+		check_create_range_1(xa, 1U << order, order);
+		check_create_range_1(xa, 2U << order, order);
+		check_create_range_1(xa, 3U << order, order);
+		check_create_range_1(xa, 1U << 24, order);
+		if (order < 10)
+			check_create_range_2(xa, order);
+
+		check_create_range_4(xa, 0, order);
+		check_create_range_4(xa, 1U << order, order);
+		check_create_range_4(xa, 2U << order, order);
+		check_create_range_4(xa, 3U << order, order);
+		check_create_range_4(xa, 1U << 24, order);
+
+		check_create_range_4(xa, 1, order);
+		check_create_range_4(xa, (1U << order) + 1, order);
+		check_create_range_4(xa, (2U << order) + 1, order);
+		check_create_range_4(xa, (2U << order) - 1, order);
+		check_create_range_4(xa, (3U << order) + 1, order);
+		check_create_range_4(xa, (3U << order) - 1, order);
+		check_create_range_4(xa, (1U << 24) + 1, order);
+	}
+
+	check_create_range_3();
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -694,6 +812,7 @@ static int xarray_checks(void)
 	check_find(&array);
 	check_destroy(&array);
 	check_move(&array);
+	check_create_range(&array);
 	check_store_iter(&array);
 
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
diff --git a/lib/xarray.c b/lib/xarray.c
index 41f8ebc651f5..ff37516fe832 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -637,6 +637,56 @@ static void *xas_create(struct xa_state *xas)
 	return entry;
 }
 
+/**
+ * xas_create_range() - Ensure that stores to this range will succeed
+ * @xas: XArray operation state.
+ *
+ * Creates all of the slots in the range covered by @xas.  Sets @xas to
+ * create single-index entries and positions it at the beginning of the
+ * range.  This is for the benefit of users which have not yet been
+ * converted to use multi-index entries.
+ */
+void xas_create_range(struct xa_state *xas)
+{
+	unsigned long index = xas->xa_index;
+	unsigned char shift = xas->xa_shift;
+	unsigned char sibs = xas->xa_sibs;
+
+	xas->xa_index |= ((sibs + 1) << shift) - 1;
+	if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
+		xas->xa_offset |= sibs;
+	xas->xa_shift = 0;
+	xas->xa_sibs = 0;
+
+	for (;;) {
+		xas_create(xas);
+		if (xas_error(xas))
+			goto restore;
+		if (xas->xa_index <= (index | XA_CHUNK_MASK))
+			goto success;
+		xas->xa_index -= XA_CHUNK_SIZE;
+
+		for (;;) {
+			struct xa_node *node = xas->xa_node;
+			xas->xa_node = xa_parent_locked(xas->xa, node);
+			xas->xa_offset = node->offset - 1;
+			if (node->offset != 0)
+				break;
+		}
+	}
+
+restore:
+	xas->xa_shift = shift;
+	xas->xa_sibs = sibs;
+	xas->xa_index = index;
+	return;
+success:
+	xas->xa_index = index;
+	if (xas->xa_node)
+		xas_set_offset(xas);
+}
+EXPORT_SYMBOL_GPL(xas_create_range);
+
 static void update_node(struct xa_state *xas, struct xa_node *node,
 		int count, int values)
 {
-- 
cgit v1.2.3


From 9f14d4f1f1045f161fd4db8a8e194b7825c2874a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 1 Oct 2018 14:54:59 -0400
Subject: xarray: Add xa_reserve and xa_release

This function reserves a slot in the XArray for users which need
to acquire multiple locks before storing their entry in the tree and
so cannot use a plain xa_store().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  6 +++++
 include/linux/xarray.h            | 34 ++++++++++++++++++++++++++--
 lib/test_xarray.c                 | 40 +++++++++++++++++++++++++++++++++
 lib/xarray.c                      | 47 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 125 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 4b101b4f3aa1..2b397da84bcd 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -293,6 +293,12 @@ to :c:func:`xas_retry`, and retry the operation if it returns ``true``.
        of this RCU period.  You should restart the lookup from the head
        of the array.
 
+   * - Zero
+     - :c:func:`xa_is_zero`
+     - Zero entries appear as ``NULL`` through the Normal API, but occupy
+       an entry in the XArray which can be used to reserve the index for
+       future use.
+
 Other internal entries may be added in the future.  As far as possible, they
 will be handled by :c:func:`xas_retry`.
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index deae17aa0ed7..bed63d3cfea8 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -32,7 +32,8 @@
  * The following internal entries have a special meaning:
  *
  * 0-62: Sibling entries
- * 256: Retry entry
+ * 256: Zero entry
+ * 257: Retry entry
  *
  * Errors are also represented as internal entries, but use the negative
  * space (-4094 to -2).  They're never stored in the slots array; only
@@ -277,6 +278,7 @@ void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_cmpxchg(struct xarray *, unsigned long index,
 			void *old, void *entry, gfp_t);
+int xa_reserve(struct xarray *, unsigned long index, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
@@ -371,6 +373,20 @@ static inline int xa_insert(struct xarray *xa, unsigned long index,
 	return -EEXIST;
 }
 
+/**
+ * xa_release() - Release a reserved entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ *
+ * After calling xa_reserve(), you can call this function to release the
+ * reservation.  If the entry at @index has been stored to, this function
+ * will do nothing.
+ */
+static inline void xa_release(struct xarray *xa, unsigned long index)
+{
+	xa_cmpxchg(xa, index, NULL, NULL, 0);
+}
+
 /**
  * xa_for_each() - Iterate over a portion of an XArray.
  * @xa: XArray.
@@ -658,7 +674,19 @@ static inline bool xa_is_sibling(const void *entry)
 		(entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
 }
 
-#define XA_RETRY_ENTRY		xa_mk_internal(256)
+#define XA_ZERO_ENTRY		xa_mk_internal(256)
+#define XA_RETRY_ENTRY		xa_mk_internal(257)
+
+/**
+ * xa_is_zero() - Is the entry a zero entry?
+ * @entry: Entry retrieved from the XArray
+ *
+ * Return: %true if the entry is a zero entry.
+ */
+static inline bool xa_is_zero(const void *entry)
+{
+	return unlikely(entry == XA_ZERO_ENTRY);
+}
 
 /**
  * xa_is_retry() - Is the entry a retry entry?
@@ -880,6 +908,8 @@ static inline void xas_reset(struct xa_state *xas)
  */
 static inline bool xas_retry(struct xa_state *xas, const void *entry)
 {
+	if (xa_is_zero(entry))
+		return true;
 	if (!xa_is_retry(entry))
 		return false;
 	xas_reset(xas);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 703370015d10..6aafd411a5c3 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -259,6 +259,45 @@ static noinline void check_cmpxchg(struct xarray *xa)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_reserve(struct xarray *xa)
+{
+	void *entry;
+	unsigned long index = 0;
+
+	/* An array with a reserved entry is not empty */
+	XA_BUG_ON(xa, !xa_empty(xa));
+	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_empty(xa));
+	XA_BUG_ON(xa, xa_load(xa, 12345678));
+	xa_release(xa, 12345678);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Releasing a used entry does nothing */
+	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
+	xa_release(xa, 12345678);
+	xa_erase_index(xa, 12345678);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* cmpxchg sees a reserved entry as NULL */
+	xa_reserve(xa, 12345678, GFP_KERNEL);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678),
+				GFP_NOWAIT) != NULL);
+	xa_release(xa, 12345678);
+	xa_erase_index(xa, 12345678);
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	/* Can iterate through a reserved entry */
+	xa_store_index(xa, 5, GFP_KERNEL);
+	xa_reserve(xa, 6, GFP_KERNEL);
+	xa_store_index(xa, 7, GFP_KERNEL);
+
+	xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+		XA_BUG_ON(xa, index != 5 && index != 7);
+	}
+	xa_destroy(xa);
+}
+
 static noinline void check_xas_erase(struct xarray *xa)
 {
 	XA_STATE(xas, xa, 0);
@@ -808,6 +847,7 @@ static int xarray_checks(void)
 	check_xa_shrink(&array);
 	check_xas_erase(&array);
 	check_cmpxchg(&array);
+	check_reserve(&array);
 	check_multi_store(&array);
 	check_find(&array);
 	check_destroy(&array);
diff --git a/lib/xarray.c b/lib/xarray.c
index ff37516fe832..546461914282 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1266,6 +1266,8 @@ void *xa_load(struct xarray *xa, unsigned long index)
 	rcu_read_lock();
 	do {
 		entry = xas_load(&xas);
+		if (xa_is_zero(entry))
+			entry = NULL;
 	} while (xas_retry(&xas, entry));
 	rcu_read_unlock();
 
@@ -1275,6 +1277,8 @@ EXPORT_SYMBOL(xa_load);
 
 static void *xas_result(struct xa_state *xas, void *curr)
 {
+	if (xa_is_zero(curr))
+		return NULL;
 	XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
 	if (xas_error(xas))
 		curr = xas->xa_node;
@@ -1394,6 +1398,8 @@ void *xa_cmpxchg(struct xarray *xa, unsigned long index,
 	do {
 		xas_lock(&xas);
 		curr = xas_load(&xas);
+		if (curr == XA_ZERO_ENTRY)
+			curr = NULL;
 		if (curr == old)
 			xas_store(&xas, entry);
 		xas_unlock(&xas);
@@ -1430,6 +1436,8 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 
 	do {
 		curr = xas_load(&xas);
+		if (curr == XA_ZERO_ENTRY)
+			curr = NULL;
 		if (curr == old)
 			xas_store(&xas, entry);
 	} while (__xas_nomem(&xas, gfp));
@@ -1438,6 +1446,43 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 }
 EXPORT_SYMBOL(__xa_cmpxchg);
 
+/**
+ * xa_reserve() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * Ensures there is somewhere to store an entry at @index in the array.
+ * If there is already something stored at @index, this function does
+ * nothing.  If there was nothing there, the entry is marked as reserved.
+ * Loads from @index will continue to see a %NULL pointer until a
+ * subsequent store to @index.
+ *
+ * If you do not use the entry that you have reserved, call xa_release()
+ * or xa_erase() to free any unnecessary memory.
+ *
+ * Context: Process context.  Takes and releases the xa_lock, IRQ or BH safe
+ * if specified in XArray flags.  May sleep if the @gfp flags permit.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	XA_STATE(xas, xa, index);
+	unsigned int lock_type = xa_lock_type(xa);
+	void *curr;
+
+	do {
+		xas_lock_type(&xas, lock_type);
+		curr = xas_load(&xas);
+		if (!curr)
+			xas_store(&xas, XA_ZERO_ENTRY);
+		xas_unlock_type(&xas, lock_type);
+	} while (xas_nomem(&xas, gfp));
+
+	return xas_error(&xas);
+}
+EXPORT_SYMBOL(xa_reserve);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
@@ -1797,6 +1842,8 @@ void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
 		pr_cont("retry (%ld)\n", xa_to_internal(entry));
 	else if (xa_is_sibling(entry))
 		pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
+	else if (xa_is_zero(entry))
+		pr_cont("zero (%ld)\n", xa_to_internal(entry));
 	else
 		pr_cont("UNKNOWN ENTRY (%px)\n", entry);
 }
-- 
cgit v1.2.3


From 371c752dc66948714ee3b66c3306f3ff1ff71d2e Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 4 Jul 2018 10:50:12 -0400
Subject: xarray: Track free entries in an XArray

Add the optional ability to track which entries in an XArray are free
and provide xa_alloc() to replace most of the functionality of the IDR.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  23 +++++++--
 include/linux/xarray.h            | 101 ++++++++++++++++++++++++++++++++++++++
 lib/test_xarray.c                 |  61 +++++++++++++++++++++++
 lib/xarray.c                      |  88 +++++++++++++++++++++++++++++++--
 4 files changed, 266 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 2b397da84bcd..463e4c798ec1 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -103,12 +103,25 @@ Finally, you can remove all entries from an XArray by calling
 to free the entries first.  You can do this by iterating over all present
 entries in the XArray using the :c:func:`xa_for_each` iterator.
 
+ID assignment
+-------------
+
+You can call :c:func:`xa_alloc` to store the entry at any unused index
+in the XArray.  If you need to modify the array from interrupt context,
+you can use :c:func:`xa_alloc_bh` or :c:func:`xa_alloc_irq` to disable
+interrupts while allocating the ID.  Unlike :c:func:`xa_store`, allocating
+a ``NULL`` pointer does not delete an entry.  Instead it reserves an
+entry like :c:func:`xa_reserve` and you can release it using either
+:c:func:`xa_erase` or :c:func:`xa_release`.  To use ID assignment, the
+XArray must be defined with :c:func:`DEFINE_XARRAY_ALLOC`, or initialised
+by passing ``XA_FLAGS_ALLOC`` to :c:func:`xa_init_flags`,
+
 Memory allocation
 -----------------
 
-The :c:func:`xa_store`, :c:func:`xa_cmpxchg`, :c:func:`xa_reserve`
-and :c:func:`xa_insert` functions take a gfp_t parameter in case
-the XArray needs to allocate memory to store this entry.
+The :c:func:`xa_store`, :c:func:`xa_cmpxchg`, :c:func:`xa_alloc`,
+:c:func:`xa_reserve` and :c:func:`xa_insert` functions take a gfp_t
+parameter in case the XArray needs to allocate memory to store this entry.
 If the entry is being deleted, no memory allocation needs to be performed,
 and the GFP flags specified will be ignored.
 
@@ -143,6 +156,9 @@ Takes xa_lock internally:
  * :c:func:`xa_erase_bh`
  * :c:func:`xa_erase_irq`
  * :c:func:`xa_cmpxchg`
+ * :c:func:`xa_alloc`
+ * :c:func:`xa_alloc_bh`
+ * :c:func:`xa_alloc_irq`
  * :c:func:`xa_destroy`
  * :c:func:`xa_set_mark`
  * :c:func:`xa_clear_mark`
@@ -152,6 +168,7 @@ Assumes xa_lock held on entry:
  * :c:func:`__xa_insert`
  * :c:func:`__xa_erase`
  * :c:func:`__xa_cmpxchg`
+ * :c:func:`__xa_alloc`
  * :c:func:`__xa_set_mark`
  * :c:func:`__xa_clear_mark`
 
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index bed63d3cfea8..e0b57af52e9b 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -205,6 +205,7 @@ typedef unsigned __bitwise xa_mark_t;
 #define XA_MARK_2		((__force xa_mark_t)2U)
 #define XA_PRESENT		((__force xa_mark_t)8U)
 #define XA_MARK_MAX		XA_MARK_2
+#define XA_FREE_MARK		XA_MARK_0
 
 enum xa_lock_type {
 	XA_LOCK_IRQ = 1,
@@ -217,9 +218,12 @@ enum xa_lock_type {
  */
 #define XA_FLAGS_LOCK_IRQ	((__force gfp_t)XA_LOCK_IRQ)
 #define XA_FLAGS_LOCK_BH	((__force gfp_t)XA_LOCK_BH)
+#define XA_FLAGS_TRACK_FREE	((__force gfp_t)4U)
 #define XA_FLAGS_MARK(mark)	((__force gfp_t)((1U << __GFP_BITS_SHIFT) << \
 						(__force unsigned)(mark)))
 
+#define XA_FLAGS_ALLOC	(XA_FLAGS_TRACK_FREE | XA_FLAGS_MARK(XA_FREE_MARK))
+
 /**
  * struct xarray - The anchor of the XArray.
  * @xa_lock: Lock that protects the contents of the XArray.
@@ -273,6 +277,15 @@ struct xarray {
  */
 #define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)
 
+/**
+ * DEFINE_XARRAY_ALLOC() - Define an XArray which can allocate IDs.
+ * @name: A string that names your XArray.
+ *
+ * This is intended for file scope definitions of allocating XArrays.
+ * See also DEFINE_XARRAY().
+ */
+#define DEFINE_XARRAY_ALLOC(name) DEFINE_XARRAY_FLAGS(name, XA_FLAGS_ALLOC)
+
 void xa_init_flags(struct xarray *, gfp_t flags);
 void *xa_load(struct xarray *, unsigned long index);
 void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
@@ -439,6 +452,7 @@ void *__xa_erase(struct xarray *, unsigned long index);
 void *__xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *__xa_cmpxchg(struct xarray *, unsigned long index, void *old,
 		void *entry, gfp_t);
+int __xa_alloc(struct xarray *, u32 *id, u32 max, void *entry, gfp_t);
 void __xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void __xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
 
@@ -518,6 +532,93 @@ static inline void *xa_erase_irq(struct xarray *xa, unsigned long index)
 	return entry;
 }
 
+/**
+ * xa_alloc() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index.  A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.  May sleep if
+ * the @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+static inline int xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry,
+		gfp_t gfp)
+{
+	int err;
+
+	xa_lock(xa);
+	err = __xa_alloc(xa, id, max, entry, gfp);
+	xa_unlock(xa);
+
+	return err;
+}
+
+/**
+ * xa_alloc_bh() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index.  A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling softirqs.  May sleep if the @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+static inline int xa_alloc_bh(struct xarray *xa, u32 *id, u32 max, void *entry,
+		gfp_t gfp)
+{
+	int err;
+
+	xa_lock_bh(xa);
+	err = __xa_alloc(xa, id, max, entry, gfp);
+	xa_unlock_bh(xa);
+
+	return err;
+}
+
+/**
+ * xa_alloc_irq() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index.  A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Process context.  Takes and releases the xa_lock while
+ * disabling interrupts.  May sleep if the @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+static inline int xa_alloc_irq(struct xarray *xa, u32 *id, u32 max, void *entry,
+		gfp_t gfp)
+{
+	int err;
+
+	xa_lock_irq(xa);
+	err = __xa_alloc(xa, id, max, entry, gfp);
+	xa_unlock_irq(xa);
+
+	return err;
+}
+
 /* Everything below here is the Advanced API.  Proceed with caution. */
 
 /*
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 6aafd411a5c3..a752e6a37e6f 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -33,6 +33,15 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
 	return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp);
 }
 
+static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+	u32 id = 0;
+
+	XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_value(index & LONG_MAX),
+				gfp) != 0);
+	XA_BUG_ON(xa, id != index);
+}
+
 static void xa_erase_index(struct xarray *xa, unsigned long index)
 {
 	XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX));
@@ -404,6 +413,57 @@ static noinline void check_multi_store(struct xarray *xa)
 #endif
 }
 
+static DEFINE_XARRAY_ALLOC(xa0);
+
+static noinline void check_xa_alloc(void)
+{
+	int i;
+	u32 id;
+
+	/* An empty array should assign 0 to the first alloc */
+	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+	/* Erasing it should make the array empty again */
+	xa_erase_index(&xa0, 0);
+	XA_BUG_ON(&xa0, !xa_empty(&xa0));
+
+	/* And it should assign 0 again */
+	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+	/* The next assigned ID should be 1 */
+	xa_alloc_index(&xa0, 1, GFP_KERNEL);
+	xa_erase_index(&xa0, 1);
+
+	/* Storing a value should mark it used */
+	xa_store_index(&xa0, 1, GFP_KERNEL);
+	xa_alloc_index(&xa0, 2, GFP_KERNEL);
+
+	/* If we then erase 0, it should be free */
+	xa_erase_index(&xa0, 0);
+	xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+	xa_erase_index(&xa0, 1);
+	xa_erase_index(&xa0, 2);
+
+	for (i = 1; i < 5000; i++) {
+		xa_alloc_index(&xa0, i, GFP_KERNEL);
+	}
+
+	xa_destroy(&xa0);
+
+	id = 0xfffffffeU;
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+				GFP_KERNEL) != 0);
+	XA_BUG_ON(&xa0, id != 0xfffffffeU);
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+				GFP_KERNEL) != 0);
+	XA_BUG_ON(&xa0, id != 0xffffffffU);
+	XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+				GFP_KERNEL) != -ENOSPC);
+	XA_BUG_ON(&xa0, id != 0xffffffffU);
+	xa_destroy(&xa0);
+}
+
 static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
 			unsigned int order, unsigned int present)
 {
@@ -849,6 +909,7 @@ static int xarray_checks(void)
 	check_cmpxchg(&array);
 	check_reserve(&array);
 	check_multi_store(&array);
+	check_xa_alloc();
 	check_find(&array);
 	check_destroy(&array);
 	check_move(&array);
diff --git a/lib/xarray.c b/lib/xarray.c
index 546461914282..9a0d49d4b5f0 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -52,6 +52,11 @@ static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
 		xas_unlock(xas);
 }
 
+static inline bool xa_track_free(const struct xarray *xa)
+{
+	return xa->xa_flags & XA_FLAGS_TRACK_FREE;
+}
+
 static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
 {
 	if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
@@ -94,6 +99,11 @@ static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
 	return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
 }
 
+static inline void node_mark_all(struct xa_node *node, xa_mark_t mark)
+{
+	bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE);
+}
+
 #define mark_inc(mark) do { \
 	mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
 } while (0)
@@ -416,6 +426,8 @@ static void xas_shrink(struct xa_state *xas)
 		xas->xa_node = XAS_BOUNDS;
 
 		RCU_INIT_POINTER(xa->xa_head, entry);
+		if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK))
+			xa_mark_clear(xa, XA_FREE_MARK);
 
 		node->count = 0;
 		node->nr_values = 0;
@@ -549,8 +561,15 @@ static int xas_expand(struct xa_state *xas, void *head)
 
 		/* Propagate the aggregated mark info to the new child */
 		for (;;) {
-			if (xa_marked(xa, mark))
+			if (xa_track_free(xa) && mark == XA_FREE_MARK) {
+				node_mark_all(node, XA_FREE_MARK);
+				if (!xa_marked(xa, XA_FREE_MARK)) {
+					node_clear_mark(node, 0, XA_FREE_MARK);
+					xa_mark_set(xa, XA_FREE_MARK);
+				}
+			} else if (xa_marked(xa, mark)) {
 				node_set_mark(node, 0, mark);
+			}
 			if (mark == XA_MARK_MAX)
 				break;
 			mark_inc(mark);
@@ -624,6 +643,8 @@ static void *xas_create(struct xa_state *xas)
 			node = xas_alloc(xas, shift);
 			if (!node)
 				break;
+			if (xa_track_free(xa))
+				node_mark_all(node, XA_FREE_MARK);
 			rcu_assign_pointer(*slot, xa_mk_node(node));
 		} else if (xa_is_node(entry)) {
 			node = xa_to_node(entry);
@@ -882,7 +903,10 @@ void xas_init_marks(const struct xa_state *xas)
 	xa_mark_t mark = 0;
 
 	for (;;) {
-		xas_clear_mark(xas, mark);
+		if (xa_track_free(xas->xa) && mark == XA_FREE_MARK)
+			xas_set_mark(xas, mark);
+		else
+			xas_clear_mark(xas, mark);
 		if (mark == XA_MARK_MAX)
 			break;
 		mark_inc(mark);
@@ -1333,6 +1357,8 @@ void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 	do {
 		xas_lock(&xas);
 		curr = xas_store(&xas, entry);
+		if (xa_track_free(xa) && entry)
+			xas_clear_mark(&xas, XA_FREE_MARK);
 		xas_unlock(&xas);
 	} while (xas_nomem(&xas, gfp));
 
@@ -1365,6 +1391,8 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
 
 	do {
 		curr = xas_store(&xas, entry);
+		if (xa_track_free(xa) && entry)
+			xas_clear_mark(&xas, XA_FREE_MARK);
 	} while (__xas_nomem(&xas, gfp));
 
 	return xas_result(&xas, curr);
@@ -1400,8 +1428,11 @@ void *xa_cmpxchg(struct xarray *xa, unsigned long index,
 		curr = xas_load(&xas);
 		if (curr == XA_ZERO_ENTRY)
 			curr = NULL;
-		if (curr == old)
+		if (curr == old) {
 			xas_store(&xas, entry);
+			if (xa_track_free(xa) && entry)
+				xas_clear_mark(&xas, XA_FREE_MARK);
+		}
 		xas_unlock(&xas);
 	} while (xas_nomem(&xas, gfp));
 
@@ -1438,8 +1469,11 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
 		curr = xas_load(&xas);
 		if (curr == XA_ZERO_ENTRY)
 			curr = NULL;
-		if (curr == old)
+		if (curr == old) {
 			xas_store(&xas, entry);
+			if (xa_track_free(xa) && entry)
+				xas_clear_mark(&xas, XA_FREE_MARK);
+		}
 	} while (__xas_nomem(&xas, gfp));
 
 	return xas_result(&xas, curr);
@@ -1483,6 +1517,52 @@ int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 }
 EXPORT_SYMBOL(xa_reserve);
 
+/**
+ * __xa_alloc() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index.  A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Any context.  Expects xa_lock to be held on entry.  May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, 0);
+	int err;
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return -EINVAL;
+	if (WARN_ON_ONCE(!xa_track_free(xa)))
+		return -EINVAL;
+
+	if (!entry)
+		entry = XA_ZERO_ENTRY;
+
+	do {
+		xas.xa_index = *id;
+		xas_find_marked(&xas, max, XA_FREE_MARK);
+		if (xas.xa_node == XAS_RESTART)
+			xas_set_err(&xas, -ENOSPC);
+		xas_store(&xas, entry);
+		xas_clear_mark(&xas, XA_FREE_MARK);
+	} while (__xas_nomem(&xas, gfp));
+
+	err = xas_error(&xas);
+	if (!err)
+		*id = xas.xa_index;
+	return err;
+}
+EXPORT_SYMBOL(__xa_alloc);
+
 /**
  * __xa_set_mark() - Set this mark on this entry while locked.
  * @xa: XArray.
-- 
cgit v1.2.3


From f32f004cddf86d63a9c42994bbce9f4e2f07c9fa Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 4 Jul 2018 15:42:46 -0400
Subject: ida: Convert to XArray

Use the XA_TRACK_FREE ability to track which entries have a free bit,
similarly to how it uses the radix tree's IDR_FREE tag.  This eliminates
the per-cpu ida_bitmap preload, and fixes the memory consumption
regression I introduced when making the IDR able to store any pointer.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/idr.h                 |  18 +-
 lib/idr.c                           | 379 +++++++++++++++++++-----------------
 lib/radix-tree.c                    |  71 -------
 tools/testing/radix-tree/idr-test.c |   8 +-
 4 files changed, 213 insertions(+), 263 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3ec8628ce17f..60daf34b625d 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -214,8 +214,7 @@ static inline void idr_preload_end(void)
 	     ++id, (entry) = idr_get_next((idr), &(id)))
 
 /*
- * IDA - IDR based id allocator, use when translation from id to
- * pointer isn't necessary.
+ * IDA - ID Allocator, use when translation from id to pointer isn't necessary.
  */
 #define IDA_CHUNK_SIZE		128	/* 128 bytes per chunk */
 #define IDA_BITMAP_LONGS	(IDA_CHUNK_SIZE / sizeof(long))
@@ -225,14 +224,14 @@ struct ida_bitmap {
 	unsigned long		bitmap[IDA_BITMAP_LONGS];
 };
 
-DECLARE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-
 struct ida {
-	struct radix_tree_root	ida_rt;
+	struct xarray xa;
 };
 
+#define IDA_INIT_FLAGS	(XA_FLAGS_LOCK_IRQ | XA_FLAGS_ALLOC)
+
 #define IDA_INIT(name)	{						\
-	.ida_rt = RADIX_TREE_INIT(name, IDR_RT_MARKER | GFP_NOWAIT),	\
+	.xa = XARRAY_INIT(name, IDA_INIT_FLAGS)				\
 }
 #define DEFINE_IDA(name)	struct ida name = IDA_INIT(name)
 
@@ -292,7 +291,7 @@ static inline int ida_alloc_max(struct ida *ida, unsigned int max, gfp_t gfp)
 
 static inline void ida_init(struct ida *ida)
 {
-	INIT_RADIX_TREE(&ida->ida_rt, IDR_RT_MARKER | GFP_NOWAIT);
+	xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
 }
 
 #define ida_simple_get(ida, start, end, gfp)	\
@@ -301,9 +300,6 @@ static inline void ida_init(struct ida *ida)
 
 static inline bool ida_is_empty(const struct ida *ida)
 {
-	return radix_tree_empty(&ida->ida_rt);
+	return xa_empty(&ida->xa);
 }
-
-/* in lib/radix-tree.c */
-int ida_pre_get(struct ida *ida, gfp_t gfp_mask);
 #endif /* __IDR_H__ */
diff --git a/lib/idr.c b/lib/idr.c
index 9a366b5be2c2..3c20ad9b0463 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,8 +6,6 @@
 #include <linux/spinlock.h>
 #include <linux/xarray.h>
 
-DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-
 /**
  * idr_alloc_u32() - Allocate an ID.
  * @idr: IDR handle.
@@ -320,6 +318,9 @@ EXPORT_SYMBOL(idr_replace);
  * free the individual IDs in it.  You can use ida_is_empty() to find
  * out whether the IDA has any IDs currently allocated.
  *
+ * The IDA handles its own locking.  It is safe to call any of the IDA
+ * functions without synchronisation in your code.
+ *
  * IDs are currently limited to the range [0-INT_MAX].  If this is an awkward
  * limitation, it should be quite straightforward to raise the maximum.
  */
@@ -327,151 +328,197 @@ EXPORT_SYMBOL(idr_replace);
 /*
  * Developer's notes:
  *
- * The IDA uses the functionality provided by the IDR & radix tree to store
- * bitmaps in each entry.  The IDR_FREE tag means there is at least one bit
- * free, unlike the IDR where it means at least one entry is free.
+ * The IDA uses the functionality provided by the XArray to store bitmaps in
+ * each entry.  The XA_FREE_MARK is only cleared when all bits in the bitmap
+ * have been set.
  *
- * I considered telling the radix tree that each slot is an order-10 node
- * and storing the bit numbers in the radix tree, but the radix tree can't
- * allow a single multiorder entry at index 0, which would significantly
- * increase memory consumption for the IDA.  So instead we divide the index
- * by the number of bits in the leaf bitmap before doing a radix tree lookup.
+ * I considered telling the XArray that each slot is an order-10 node
+ * and indexing by bit number, but the XArray can't allow a single multi-index
+ * entry in the head, which would significantly increase memory consumption
+ * for the IDA.  So instead we divide the index by the number of bits in the
+ * leaf bitmap before doing a radix tree lookup.
  *
  * As an optimisation, if there are only a few low bits set in any given
  * leaf, instead of allocating a 128-byte bitmap, we store the bits
- * directly in the entry.
- *
- * We allow the radix tree 'exceptional' count to get out of date.  Nothing
- * in the IDA nor the radix tree code checks it.  If it becomes important
- * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
- * calls to radix_tree_iter_replace() which will correct the exceptional
- * count.
- *
- * The IDA always requires a lock to alloc/free.  If we add a 'test_bit'
+ * as a value entry.  Value entries never have the XA_FREE_MARK cleared
+ * because we can always convert them into a bitmap entry.
+ *
+ * It would be possible to optimise further; once we've run out of a
+ * single 128-byte bitmap, we currently switch to a 576-byte node, put
+ * the 128-byte bitmap in the first entry and then start allocating extra
+ * 128-byte entries.  We could instead use the 512 bytes of the node's
+ * data as a bitmap before moving to that scheme.  I do not believe this
+ * is a worthwhile optimisation; Rasmus Villemoes surveyed the current
+ * users of the IDA and almost none of them use more than 1024 entries.
+ * Those that do use more than the 8192 IDs that the 512 bytes would
+ * provide.
+ *
+ * The IDA always uses a lock to alloc/free.  If we add a 'test_bit'
  * equivalent, it will still need locking.  Going to RCU lookup would require
  * using RCU to free bitmaps, and that's not trivial without embedding an
  * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
  * bitmap, which is excessive.
  */
 
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
-
-static int ida_get_new_above(struct ida *ida, int start)
+/**
+ * ida_alloc_range() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between @min and @max, inclusive.  The allocated ID will
+ * not exceed %INT_MAX, even if @max is larger.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
+			gfp_t gfp)
 {
-	struct radix_tree_root *root = &ida->ida_rt;
-	void __rcu **slot;
-	struct radix_tree_iter iter;
-	struct ida_bitmap *bitmap;
-	unsigned long index;
-	unsigned bit;
-	int new;
-
-	index = start / IDA_BITMAP_BITS;
-	bit = start % IDA_BITMAP_BITS;
-
-	slot = radix_tree_iter_init(&iter, index);
-	for (;;) {
-		if (slot)
-			slot = radix_tree_next_slot(slot, &iter,
-						RADIX_TREE_ITER_TAGGED);
-		if (!slot) {
-			slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
-			if (IS_ERR(slot)) {
-				if (slot == ERR_PTR(-ENOMEM))
-					return -EAGAIN;
-				return PTR_ERR(slot);
+	XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
+	unsigned bit = min % IDA_BITMAP_BITS;
+	unsigned long flags;
+	struct ida_bitmap *bitmap, *alloc = NULL;
+
+	if ((int)min < 0)
+		return -ENOSPC;
+
+	if ((int)max < 0)
+		max = INT_MAX;
+
+retry:
+	xas_lock_irqsave(&xas, flags);
+next:
+	bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
+	if (xas.xa_index > min / IDA_BITMAP_BITS)
+		bit = 0;
+	if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+		goto nospc;
+
+	if (xa_is_value(bitmap)) {
+		unsigned long tmp = xa_to_value(bitmap);
+
+		if (bit < BITS_PER_XA_VALUE) {
+			bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
+			if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+				goto nospc;
+			if (bit < BITS_PER_XA_VALUE) {
+				tmp |= 1UL << bit;
+				xas_store(&xas, xa_mk_value(tmp));
+				goto out;
 			}
 		}
-		if (iter.index > index)
-			bit = 0;
-		new = iter.index * IDA_BITMAP_BITS;
-		bitmap = rcu_dereference_raw(*slot);
-		if (xa_is_value(bitmap)) {
-			unsigned long tmp = xa_to_value(bitmap);
-			int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE,
-							bit);
-			if (vbit < BITS_PER_XA_VALUE) {
-				tmp |= 1UL << vbit;
-				rcu_assign_pointer(*slot, xa_mk_value(tmp));
-				return new + vbit;
-			}
-			bitmap = this_cpu_xchg(ida_bitmap, NULL);
-			if (!bitmap)
-				return -EAGAIN;
-			bitmap->bitmap[0] = tmp;
-			rcu_assign_pointer(*slot, bitmap);
+		bitmap = alloc;
+		if (!bitmap)
+			bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+		if (!bitmap)
+			goto alloc;
+		bitmap->bitmap[0] = tmp;
+		xas_store(&xas, bitmap);
+		if (xas_error(&xas)) {
+			bitmap->bitmap[0] = 0;
+			goto out;
 		}
+	}
 
-		if (bitmap) {
-			bit = find_next_zero_bit(bitmap->bitmap,
-							IDA_BITMAP_BITS, bit);
-			new += bit;
-			if (new < 0)
-				return -ENOSPC;
-			if (bit == IDA_BITMAP_BITS)
-				continue;
+	if (bitmap) {
+		bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
+		if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+			goto nospc;
+		if (bit == IDA_BITMAP_BITS)
+			goto next;
 
-			__set_bit(bit, bitmap->bitmap);
-			if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
-				radix_tree_iter_tag_clear(root, &iter,
-								IDR_FREE);
+		__set_bit(bit, bitmap->bitmap);
+		if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+			xas_clear_mark(&xas, XA_FREE_MARK);
+	} else {
+		if (bit < BITS_PER_XA_VALUE) {
+			bitmap = xa_mk_value(1UL << bit);
 		} else {
-			new += bit;
-			if (new < 0)
-				return -ENOSPC;
-			if (bit < BITS_PER_XA_VALUE) {
-				bitmap = xa_mk_value(1UL << bit);
-			} else {
-				bitmap = this_cpu_xchg(ida_bitmap, NULL);
-				if (!bitmap)
-					return -EAGAIN;
-				__set_bit(bit, bitmap->bitmap);
-			}
-			radix_tree_iter_replace(root, &iter, slot, bitmap);
+			bitmap = alloc;
+			if (!bitmap)
+				bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+			if (!bitmap)
+				goto alloc;
+			__set_bit(bit, bitmap->bitmap);
 		}
-
-		return new;
+		xas_store(&xas, bitmap);
+	}
+out:
+	xas_unlock_irqrestore(&xas, flags);
+	if (xas_nomem(&xas, gfp)) {
+		xas.xa_index = min / IDA_BITMAP_BITS;
+		bit = min % IDA_BITMAP_BITS;
+		goto retry;
 	}
+	if (bitmap != alloc)
+		kfree(alloc);
+	if (xas_error(&xas))
+		return xas_error(&xas);
+	return xas.xa_index * IDA_BITMAP_BITS + bit;
+alloc:
+	xas_unlock_irqrestore(&xas, flags);
+	alloc = kzalloc(sizeof(*bitmap), gfp);
+	if (!alloc)
+		return -ENOMEM;
+	xas_set(&xas, min / IDA_BITMAP_BITS);
+	bit = min % IDA_BITMAP_BITS;
+	goto retry;
+nospc:
+	xas_unlock_irqrestore(&xas, flags);
+	return -ENOSPC;
 }
+EXPORT_SYMBOL(ida_alloc_range);
 
-static void ida_remove(struct ida *ida, int id)
+/**
+ * ida_free() - Release an allocated ID.
+ * @ida: IDA handle.
+ * @id: Previously allocated ID.
+ *
+ * Context: Any context.
+ */
+void ida_free(struct ida *ida, unsigned int id)
 {
-	unsigned long index = id / IDA_BITMAP_BITS;
-	unsigned offset = id % IDA_BITMAP_BITS;
+	XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
+	unsigned bit = id % IDA_BITMAP_BITS;
 	struct ida_bitmap *bitmap;
-	unsigned long *btmp;
-	struct radix_tree_iter iter;
-	void __rcu **slot;
+	unsigned long flags;
 
-	slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
-	if (!slot)
-		goto err;
+	BUG_ON((int)id < 0);
+
+	xas_lock_irqsave(&xas, flags);
+	bitmap = xas_load(&xas);
 
-	bitmap = rcu_dereference_raw(*slot);
 	if (xa_is_value(bitmap)) {
-		btmp = (unsigned long *)slot;
-		offset += 1; /* Intimate knowledge of the value encoding */
-		if (offset >= BITS_PER_LONG)
+		unsigned long v = xa_to_value(bitmap);
+		if (bit >= BITS_PER_XA_VALUE)
+			goto err;
+		if (!(v & (1UL << bit)))
 			goto err;
+		v &= ~(1UL << bit);
+		if (!v)
+			goto delete;
+		xas_store(&xas, xa_mk_value(v));
 	} else {
-		btmp = bitmap->bitmap;
-	}
-	if (!test_bit(offset, btmp))
-		goto err;
-
-	__clear_bit(offset, btmp);
-	radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
-	if (xa_is_value(bitmap)) {
-		if (xa_to_value(rcu_dereference_raw(*slot)) == 0)
-			radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
-	} else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
-		kfree(bitmap);
-		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+		if (!test_bit(bit, bitmap->bitmap))
+			goto err;
+		__clear_bit(bit, bitmap->bitmap);
+		xas_set_mark(&xas, XA_FREE_MARK);
+		if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+			kfree(bitmap);
+delete:
+			xas_store(&xas, NULL);
+		}
 	}
+	xas_unlock_irqrestore(&xas, flags);
 	return;
  err:
+	xas_unlock_irqrestore(&xas, flags);
 	WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
 }
+EXPORT_SYMBOL(ida_free);
 
 /**
  * ida_destroy() - Free all IDs.
@@ -486,80 +533,60 @@ static void ida_remove(struct ida *ida, int id)
  */
 void ida_destroy(struct ida *ida)
 {
+	XA_STATE(xas, &ida->xa, 0);
+	struct ida_bitmap *bitmap;
 	unsigned long flags;
-	struct radix_tree_iter iter;
-	void __rcu **slot;
 
-	xa_lock_irqsave(&ida->ida_rt, flags);
-	radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
-		struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
+	xas_lock_irqsave(&xas, flags);
+	xas_for_each(&xas, bitmap, ULONG_MAX) {
 		if (!xa_is_value(bitmap))
 			kfree(bitmap);
-		radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+		xas_store(&xas, NULL);
 	}
-	xa_unlock_irqrestore(&ida->ida_rt, flags);
+	xas_unlock_irqrestore(&xas, flags);
 }
 EXPORT_SYMBOL(ida_destroy);
 
-/**
- * ida_alloc_range() - Allocate an unused ID.
- * @ida: IDA handle.
- * @min: Lowest ID to allocate.
- * @max: Highest ID to allocate.
- * @gfp: Memory allocation flags.
- *
- * Allocate an ID between @min and @max, inclusive.  The allocated ID will
- * not exceed %INT_MAX, even if @max is larger.
- *
- * Context: Any context.
- * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
- * or %-ENOSPC if there are no free IDs.
- */
-int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
-			gfp_t gfp)
-{
-	int id = 0;
-	unsigned long flags;
-
-	if ((int)min < 0)
-		return -ENOSPC;
+#ifndef __KERNEL__
+extern void xa_dump_index(unsigned long index, unsigned int shift);
+#define IDA_CHUNK_SHIFT		ilog2(IDA_BITMAP_BITS)
 
-	if ((int)max < 0)
-		max = INT_MAX;
-
-again:
-	xa_lock_irqsave(&ida->ida_rt, flags);
-	id = ida_get_new_above(ida, min);
-	if (id > (int)max) {
-		ida_remove(ida, id);
-		id = -ENOSPC;
-	}
-	xa_unlock_irqrestore(&ida->ida_rt, flags);
+static void ida_dump_entry(void *entry, unsigned long index)
+{
+	unsigned long i;
+
+	if (!entry)
+		return;
+
+	if (xa_is_node(entry)) {
+		struct xa_node *node = xa_to_node(entry);
+		unsigned int shift = node->shift + IDA_CHUNK_SHIFT +
+			XA_CHUNK_SHIFT;
+
+		xa_dump_index(index * IDA_BITMAP_BITS, shift);
+		xa_dump_node(node);
+		for (i = 0; i < XA_CHUNK_SIZE; i++)
+			ida_dump_entry(node->slots[i],
+					index | (i << node->shift));
+	} else if (xa_is_value(entry)) {
+		xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG));
+		pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry);
+	} else {
+		struct ida_bitmap *bitmap = entry;
 
-	if (unlikely(id == -EAGAIN)) {
-		if (!ida_pre_get(ida, gfp))
-			return -ENOMEM;
-		goto again;
+		xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT);
+		pr_cont("bitmap: %p data", bitmap);
+		for (i = 0; i < IDA_BITMAP_LONGS; i++)
+			pr_cont(" %lx", bitmap->bitmap[i]);
+		pr_cont("\n");
 	}
-
-	return id;
 }
-EXPORT_SYMBOL(ida_alloc_range);
 
-/**
- * ida_free() - Release an allocated ID.
- * @ida: IDA handle.
- * @id: Previously allocated ID.
- *
- * Context: Any context.
- */
-void ida_free(struct ida *ida, unsigned int id)
+static void ida_dump(struct ida *ida)
 {
-	unsigned long flags;
-
-	BUG_ON((int)id < 0);
-	xa_lock_irqsave(&ida->ida_rt, flags);
-	ida_remove(ida, id);
-	xa_unlock_irqrestore(&ida->ida_rt, flags);
+	struct xarray *xa = &ida->xa;
+	pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head,
+				xa->xa_flags >> ROOT_TAG_SHIFT);
+	ida_dump_entry(xa->xa_head, 0);
 }
-EXPORT_SYMBOL(ida_free);
+#endif
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 3479d93c32b9..68702061464f 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -255,54 +255,6 @@ static unsigned long next_index(unsigned long index,
 	return (index & ~node_maxindex(node)) + (offset << node->shift);
 }
 
-#ifndef __KERNEL__
-static void dump_ida_node(void *entry, unsigned long index)
-{
-	unsigned long i;
-
-	if (!entry)
-		return;
-
-	if (radix_tree_is_internal_node(entry)) {
-		struct radix_tree_node *node = entry_to_node(entry);
-
-		pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
-			node, node->offset, index * IDA_BITMAP_BITS,
-			((index | node_maxindex(node)) + 1) *
-				IDA_BITMAP_BITS - 1,
-			node->parent, node->tags[0][0], node->shift,
-			node->count);
-		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
-			dump_ida_node(node->slots[i],
-					index | (i << node->shift));
-	} else if (xa_is_value(entry)) {
-		pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
-				entry, (int)(index & RADIX_TREE_MAP_MASK),
-				index * IDA_BITMAP_BITS,
-				index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE,
-				xa_to_value(entry));
-	} else {
-		struct ida_bitmap *bitmap = entry;
-
-		pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
-				(int)(index & RADIX_TREE_MAP_MASK),
-				index * IDA_BITMAP_BITS,
-				(index + 1) * IDA_BITMAP_BITS - 1);
-		for (i = 0; i < IDA_BITMAP_LONGS; i++)
-			pr_cont(" %lx", bitmap->bitmap[i]);
-		pr_cont("\n");
-	}
-}
-
-static void ida_dump(struct ida *ida)
-{
-	struct radix_tree_root *root = &ida->ida_rt;
-	pr_debug("ida: %p node %p free %d\n", ida, root->xa_head,
-				root->xa_flags >> ROOT_TAG_SHIFT);
-	dump_ida_node(root->xa_head, 0);
-}
-#endif
-
 /*
  * This assumes that the caller has performed appropriate preallocation, and
  * that the caller has pinned this thread of control to the current CPU.
@@ -2039,27 +1991,6 @@ void idr_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(idr_preload);
 
-int ida_pre_get(struct ida *ida, gfp_t gfp)
-{
-	/*
-	 * The IDA API has no preload_end() equivalent.  Instead,
-	 * ida_get_new() can return -EAGAIN, prompting the caller
-	 * to return to the ida_pre_get() step.
-	 */
-	if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE))
-		preempt_enable();
-
-	if (!this_cpu_read(ida_bitmap)) {
-		struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
-		if (!bitmap)
-			return 0;
-		if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
-			kfree(bitmap);
-	}
-
-	return 1;
-}
-
 void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
 			      unsigned long max)
@@ -2201,8 +2132,6 @@ static int radix_tree_cpu_dead(unsigned int cpu)
 		kmem_cache_free(radix_tree_node_cachep, node);
 		rtp->nr--;
 	}
-	kfree(per_cpu(ida_bitmap, cpu));
-	per_cpu(ida_bitmap, cpu) = NULL;
 	return 0;
 }
 
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index a5a4494922a6..1b63bdb7688f 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -402,16 +402,15 @@ void ida_check_nomem(void)
  */
 void ida_check_conv_user(void)
 {
-#if 0
 	DEFINE_IDA(ida);
 	unsigned long i;
 
-	radix_tree_cpu_dead(1);
 	for (i = 0; i < 1000000; i++) {
 		int id = ida_alloc(&ida, GFP_NOWAIT);
 		if (id == -ENOMEM) {
-			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
-					BITS_PER_XA_VALUE);
+			IDA_BUG_ON(&ida, ((i % IDA_BITMAP_BITS) !=
+					  BITS_PER_XA_VALUE) &&
+					 ((i % IDA_BITMAP_BITS) != 0));
 			id = ida_alloc(&ida, GFP_KERNEL);
 		} else {
 			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
@@ -420,7 +419,6 @@ void ida_check_conv_user(void)
 		IDA_BUG_ON(&ida, id != i);
 	}
 	ida_destroy(&ida);
-#endif
 }
 
 void ida_check_random(void)
-- 
cgit v1.2.3


From eb797a8ee0ab4cd03df556980ce7bf167cadaa50 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 5 Mar 2018 22:46:03 -0500
Subject: page cache: Rearrange address_space

Change i_pages from a radix_tree_root to an xarray, convert the
documentation into kernel-doc format and change the order of the elements
to pack them better on 64-bit systems.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/fs.h | 46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6c0b4a1c22ff..d126cad0f621 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -403,24 +403,40 @@ int pagecache_write_end(struct file *, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned copied,
 				struct page *page, void *fsdata);
 
+/**
+ * struct address_space - Contents of a cacheable, mappable object.
+ * @host: Owner, either the inode or the block_device.
+ * @i_pages: Cached pages.
+ * @gfp_mask: Memory allocation flags to use for allocating pages.
+ * @i_mmap_writable: Number of VM_SHARED mappings.
+ * @i_mmap: Tree of private and shared mappings.
+ * @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
+ * @nrpages: Number of page entries, protected by the i_pages lock.
+ * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock.
+ * @writeback_index: Writeback starts here.
+ * @a_ops: Methods.
+ * @flags: Error bits and flags (AS_*).
+ * @wb_err: The most recent error which has occurred.
+ * @private_lock: For use by the owner of the address_space.
+ * @private_list: For use by the owner of the address_space.
+ * @private_data: For use by the owner of the address_space.
+ */
 struct address_space {
-	struct inode		*host;		/* owner: inode, block_device */
-	struct radix_tree_root	i_pages;	/* cached pages */
-	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
-	struct rb_root_cached	i_mmap;		/* tree of private and shared mappings */
-	struct rw_semaphore	i_mmap_rwsem;	/* protect tree, count, list */
-	/* Protected by the i_pages lock */
-	unsigned long		nrpages;	/* number of total pages */
-	/* number of shadow or DAX exceptional entries */
+	struct inode		*host;
+	struct xarray		i_pages;
+	gfp_t			gfp_mask;
+	atomic_t		i_mmap_writable;
+	struct rb_root_cached	i_mmap;
+	struct rw_semaphore	i_mmap_rwsem;
+	unsigned long		nrpages;
 	unsigned long		nrexceptional;
-	pgoff_t			writeback_index;/* writeback starts here */
-	const struct address_space_operations *a_ops;	/* methods */
-	unsigned long		flags;		/* error bits */
-	spinlock_t		private_lock;	/* for use by the address_space */
-	gfp_t			gfp_mask;	/* implicit gfp mask for allocations */
-	struct list_head	private_list;	/* for use by the address_space */
-	void			*private_data;	/* ditto */
+	pgoff_t			writeback_index;
+	const struct address_space_operations *a_ops;
+	unsigned long		flags;
 	errseq_t		wb_err;
+	spinlock_t		private_lock;
+	struct list_head	private_list;
+	void			*private_data;
 } __attribute__((aligned(sizeof(long)))) __randomize_layout;
 	/*
 	 * On most architectures that alignment is already the case; but
-- 
cgit v1.2.3


From 0d3f92966629e536b0c5c2355c1ada8e21c245f6 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 21 Nov 2017 14:07:06 -0500
Subject: page cache: Convert hole search to XArray

The page cache offers the ability to search for a miss in the previous or
next N locations.  Rather than teach the XArray about the page cache's
definition of a miss, use xas_prev() and xas_next() to search the page
array.  This should be more efficient as it does not have to start the
lookup from the top for each index.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/nfs/blocklayout/blocklayout.c |   2 +-
 include/linux/pagemap.h          |   4 +-
 mm/filemap.c                     | 110 ++++++++++++++++++---------------------
 mm/readahead.c                   |   4 +-
 4 files changed, 55 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 06cb0c1d9aee..d3781cd983f6 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -896,7 +896,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
 	end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
 	if (end != inode->i_mapping->nrpages) {
 		rcu_read_lock();
-		end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
+		end = page_cache_next_miss(mapping, idx + 1, ULONG_MAX);
 		rcu_read_unlock();
 	}
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b1bd2186e6d2..cf9ad413eee9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -241,9 +241,9 @@ static inline gfp_t readahead_gfp_mask(struct address_space *x)
 
 typedef int filler_t(void *, struct page *);
 
-pgoff_t page_cache_next_hole(struct address_space *mapping,
+pgoff_t page_cache_next_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan);
-pgoff_t page_cache_prev_hole(struct address_space *mapping,
+pgoff_t page_cache_prev_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan);
 
 #define FGP_ACCESSED		0x00000001
diff --git a/mm/filemap.c b/mm/filemap.c
index 4de14e75c4ec..714d3d0f60f5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1326,86 +1326,76 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
 }
 
 /**
- * page_cache_next_hole - find the next hole (not-present entry)
- * @mapping: mapping
- * @index: index
- * @max_scan: maximum range to search
- *
- * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the
- * lowest indexed hole.
- *
- * Returns: the index of the hole if found, otherwise returns an index
- * outside of the set specified (in which case 'return - index >=
- * max_scan' will be true). In rare cases of index wrap-around, 0 will
- * be returned.
- *
- * page_cache_next_hole may be called under rcu_read_lock. However,
- * like radix_tree_gang_lookup, this will not atomically search a
- * snapshot of the tree at a single point in time. For example, if a
- * hole is created at index 5, then subsequently a hole is created at
- * index 10, page_cache_next_hole covering both indexes may return 10
- * if called under rcu_read_lock.
+ * page_cache_next_miss() - Find the next gap in the page cache.
+ * @mapping: Mapping.
+ * @index: Index.
+ * @max_scan: Maximum range to search.
+ *
+ * Search the range [index, min(index + max_scan - 1, ULONG_MAX)] for the
+ * gap with the lowest index.
+ *
+ * This function may be called under the rcu_read_lock.  However, this will
+ * not atomically search a snapshot of the cache at a single point in time.
+ * For example, if a gap is created at index 5, then subsequently a gap is
+ * created at index 10, page_cache_next_miss covering both indices may
+ * return 10 if called under the rcu_read_lock.
+ *
+ * Return: The index of the gap if found, otherwise an index outside the
+ * range specified (in which case 'return - index >= max_scan' will be true).
+ * In the rare case of index wrap-around, 0 will be returned.
  */
-pgoff_t page_cache_next_hole(struct address_space *mapping,
+pgoff_t page_cache_next_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan)
 {
-	unsigned long i;
+	XA_STATE(xas, &mapping->i_pages, index);
 
-	for (i = 0; i < max_scan; i++) {
-		struct page *page;
-
-		page = radix_tree_lookup(&mapping->i_pages, index);
-		if (!page || xa_is_value(page))
+	while (max_scan--) {
+		void *entry = xas_next(&xas);
+		if (!entry || xa_is_value(entry))
 			break;
-		index++;
-		if (index == 0)
+		if (xas.xa_index == 0)
 			break;
 	}
 
-	return index;
+	return xas.xa_index;
 }
-EXPORT_SYMBOL(page_cache_next_hole);
+EXPORT_SYMBOL(page_cache_next_miss);
 
 /**
- * page_cache_prev_hole - find the prev hole (not-present entry)
- * @mapping: mapping
- * @index: index
- * @max_scan: maximum range to search
- *
- * Search backwards in the range [max(index-max_scan+1, 0), index] for
- * the first hole.
- *
- * Returns: the index of the hole if found, otherwise returns an index
- * outside of the set specified (in which case 'index - return >=
- * max_scan' will be true). In rare cases of wrap-around, ULONG_MAX
- * will be returned.
- *
- * page_cache_prev_hole may be called under rcu_read_lock. However,
- * like radix_tree_gang_lookup, this will not atomically search a
- * snapshot of the tree at a single point in time. For example, if a
- * hole is created at index 10, then subsequently a hole is created at
- * index 5, page_cache_prev_hole covering both indexes may return 5 if
- * called under rcu_read_lock.
+ * page_cache_prev_miss() - Find the next gap in the page cache.
+ * @mapping: Mapping.
+ * @index: Index.
+ * @max_scan: Maximum range to search.
+ *
+ * Search the range [max(index - max_scan + 1, 0), index] for the
+ * gap with the highest index.
+ *
+ * This function may be called under the rcu_read_lock.  However, this will
+ * not atomically search a snapshot of the cache at a single point in time.
+ * For example, if a gap is created at index 10, then subsequently a gap is
+ * created at index 5, page_cache_prev_miss() covering both indices may
+ * return 5 if called under the rcu_read_lock.
+ *
+ * Return: The index of the gap if found, otherwise an index outside the
+ * range specified (in which case 'index - return >= max_scan' will be true).
+ * In the rare case of wrap-around, ULONG_MAX will be returned.
  */
-pgoff_t page_cache_prev_hole(struct address_space *mapping,
+pgoff_t page_cache_prev_miss(struct address_space *mapping,
 			     pgoff_t index, unsigned long max_scan)
 {
-	unsigned long i;
-
-	for (i = 0; i < max_scan; i++) {
-		struct page *page;
+	XA_STATE(xas, &mapping->i_pages, index);
 
-		page = radix_tree_lookup(&mapping->i_pages, index);
-		if (!page || xa_is_value(page))
+	while (max_scan--) {
+		void *entry = xas_prev(&xas);
+		if (!entry || xa_is_value(entry))
 			break;
-		index--;
-		if (index == ULONG_MAX)
+		if (xas.xa_index == ULONG_MAX)
 			break;
 	}
 
-	return index;
+	return xas.xa_index;
 }
-EXPORT_SYMBOL(page_cache_prev_hole);
+EXPORT_SYMBOL(page_cache_prev_miss);
 
 /**
  * find_get_entry - find and get a page cache entry
diff --git a/mm/readahead.c b/mm/readahead.c
index de657077d41d..fc4dd364b37a 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -336,7 +336,7 @@ static pgoff_t count_history_pages(struct address_space *mapping,
 	pgoff_t head;
 
 	rcu_read_lock();
-	head = page_cache_prev_hole(mapping, offset - 1, max);
+	head = page_cache_prev_miss(mapping, offset - 1, max);
 	rcu_read_unlock();
 
 	return offset - 1 - head;
@@ -425,7 +425,7 @@ ondemand_readahead(struct address_space *mapping,
 		pgoff_t start;
 
 		rcu_read_lock();
-		start = page_cache_next_hole(mapping, offset + 1, max_pages);
+		start = page_cache_next_miss(mapping, offset + 1, max_pages);
 		rcu_read_unlock();
 
 		if (!start || start - offset > max_pages)
-- 
cgit v1.2.3


From 74d609585d8bd6083bd9d75bc1fd2c0d3851bcc5 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 17 Nov 2017 10:01:45 -0500
Subject: page cache: Add and replace pages using the XArray

Use the XArray APIs to add and replace pages in the page cache.  This
removes two uses of the radix tree preload API and is significantly
shorter code.  It also removes the last user of __radix_tree_create()
outside radix-tree.c itself, so make it static.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h |   3 -
 include/linux/swap.h       |   8 ++-
 lib/radix-tree.c           |   6 +-
 mm/filemap.c               | 139 +++++++++++++++++++--------------------------
 4 files changed, 66 insertions(+), 90 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 15388b7e38b9..d55de428a589 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -231,9 +231,6 @@ static inline int radix_tree_exception(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-int __radix_tree_create(struct radix_tree_root *, unsigned long index,
-			unsigned order, struct radix_tree_node **nodep,
-			void __rcu ***slotp);
 int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
 			unsigned order, void *);
 static inline int radix_tree_insert(struct radix_tree_root *root,
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8e2c11e692ba..6ea50cf41b4c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -299,8 +299,12 @@ void *workingset_eviction(struct address_space *mapping, struct page *page);
 bool workingset_refault(void *shadow);
 void workingset_activation(struct page *page);
 
-/* Do not use directly, use workingset_lookup_update */
-void workingset_update_node(struct radix_tree_node *node);
+/* Only track the nodes of mappings with shadow entries */
+void workingset_update_node(struct xa_node *node);
+#define mapping_set_update(xas, mapping) do {				\
+	if (!dax_mapping(mapping) && !shmem_mapping(mapping))		\
+		xas_set_update(xas, workingset_update_node);		\
+} while (0)
 
 /* Returns workingset_update_node() if the mapping has shadow entries. */
 #define workingset_lookup_update(mapping)				\
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 68702061464f..8a58051eb5b3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -700,9 +700,9 @@ static bool delete_node(struct radix_tree_root *root,
  *
  *	Returns -ENOMEM, or 0 for success.
  */
-int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
-			unsigned order, struct radix_tree_node **nodep,
-			void __rcu ***slotp)
+static int __radix_tree_create(struct radix_tree_root *root,
+		unsigned long index, unsigned order,
+		struct radix_tree_node **nodep, void __rcu ***slotp)
 {
 	struct radix_tree_node *node = NULL, *child;
 	void __rcu **slot = (void __rcu **)&root->xa_head;
diff --git a/mm/filemap.c b/mm/filemap.c
index 714d3d0f60f5..b63caebd1367 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -111,35 +111,6 @@
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
-static int page_cache_tree_insert(struct address_space *mapping,
-				  struct page *page, void **shadowp)
-{
-	struct radix_tree_node *node;
-	void **slot;
-	int error;
-
-	error = __radix_tree_create(&mapping->i_pages, page->index, 0,
-				    &node, &slot);
-	if (error)
-		return error;
-	if (*slot) {
-		void *p;
-
-		p = radix_tree_deref_slot_protected(slot,
-						    &mapping->i_pages.xa_lock);
-		if (!xa_is_value(p))
-			return -EEXIST;
-
-		mapping->nrexceptional--;
-		if (shadowp)
-			*shadowp = p;
-	}
-	__radix_tree_replace(&mapping->i_pages, node, slot, page,
-			     workingset_lookup_update(mapping));
-	mapping->nrpages++;
-	return 0;
-}
-
 static void page_cache_tree_delete(struct address_space *mapping,
 				   struct page *page, void *shadow)
 {
@@ -775,51 +746,44 @@ EXPORT_SYMBOL(file_write_and_wait_range);
  * locked.  This function does not add the new page to the LRU, the
  * caller must do that.
  *
- * The remove + add is atomic.  The only way this function can fail is
- * memory allocation failure.
+ * The remove + add is atomic.  This function cannot fail.
  */
 int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 {
-	int error;
+	struct address_space *mapping = old->mapping;
+	void (*freepage)(struct page *) = mapping->a_ops->freepage;
+	pgoff_t offset = old->index;
+	XA_STATE(xas, &mapping->i_pages, offset);
+	unsigned long flags;
 
 	VM_BUG_ON_PAGE(!PageLocked(old), old);
 	VM_BUG_ON_PAGE(!PageLocked(new), new);
 	VM_BUG_ON_PAGE(new->mapping, new);
 
-	error = radix_tree_preload(gfp_mask & GFP_RECLAIM_MASK);
-	if (!error) {
-		struct address_space *mapping = old->mapping;
-		void (*freepage)(struct page *);
-		unsigned long flags;
-
-		pgoff_t offset = old->index;
-		freepage = mapping->a_ops->freepage;
+	get_page(new);
+	new->mapping = mapping;
+	new->index = offset;
 
-		get_page(new);
-		new->mapping = mapping;
-		new->index = offset;
+	xas_lock_irqsave(&xas, flags);
+	xas_store(&xas, new);
 
-		xa_lock_irqsave(&mapping->i_pages, flags);
-		__delete_from_page_cache(old, NULL);
-		error = page_cache_tree_insert(mapping, new, NULL);
-		BUG_ON(error);
-
-		/*
-		 * hugetlb pages do not participate in page cache accounting.
-		 */
-		if (!PageHuge(new))
-			__inc_node_page_state(new, NR_FILE_PAGES);
-		if (PageSwapBacked(new))
-			__inc_node_page_state(new, NR_SHMEM);
-		xa_unlock_irqrestore(&mapping->i_pages, flags);
-		mem_cgroup_migrate(old, new);
-		radix_tree_preload_end();
-		if (freepage)
-			freepage(old);
-		put_page(old);
-	}
+	old->mapping = NULL;
+	/* hugetlb pages do not participate in page cache accounting. */
+	if (!PageHuge(old))
+		__dec_node_page_state(new, NR_FILE_PAGES);
+	if (!PageHuge(new))
+		__inc_node_page_state(new, NR_FILE_PAGES);
+	if (PageSwapBacked(old))
+		__dec_node_page_state(new, NR_SHMEM);
+	if (PageSwapBacked(new))
+		__inc_node_page_state(new, NR_SHMEM);
+	xas_unlock_irqrestore(&xas, flags);
+	mem_cgroup_migrate(old, new);
+	if (freepage)
+		freepage(old);
+	put_page(old);
 
-	return error;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_page);
 
@@ -828,12 +792,15 @@ static int __add_to_page_cache_locked(struct page *page,
 				      pgoff_t offset, gfp_t gfp_mask,
 				      void **shadowp)
 {
+	XA_STATE(xas, &mapping->i_pages, offset);
 	int huge = PageHuge(page);
 	struct mem_cgroup *memcg;
 	int error;
+	void *old;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(PageSwapBacked(page), page);
+	mapping_set_update(&xas, mapping);
 
 	if (!huge) {
 		error = mem_cgroup_try_charge(page, current->mm,
@@ -842,39 +809,47 @@ static int __add_to_page_cache_locked(struct page *page,
 			return error;
 	}
 
-	error = radix_tree_maybe_preload(gfp_mask & GFP_RECLAIM_MASK);
-	if (error) {
-		if (!huge)
-			mem_cgroup_cancel_charge(page, memcg, false);
-		return error;
-	}
-
 	get_page(page);
 	page->mapping = mapping;
 	page->index = offset;
 
-	xa_lock_irq(&mapping->i_pages);
-	error = page_cache_tree_insert(mapping, page, shadowp);
-	radix_tree_preload_end();
-	if (unlikely(error))
-		goto err_insert;
+	do {
+		xas_lock_irq(&xas);
+		old = xas_load(&xas);
+		if (old && !xa_is_value(old))
+			xas_set_err(&xas, -EEXIST);
+		xas_store(&xas, page);
+		if (xas_error(&xas))
+			goto unlock;
+
+		if (xa_is_value(old)) {
+			mapping->nrexceptional--;
+			if (shadowp)
+				*shadowp = old;
+		}
+		mapping->nrpages++;
+
+		/* hugetlb pages do not participate in page cache accounting */
+		if (!huge)
+			__inc_node_page_state(page, NR_FILE_PAGES);
+unlock:
+		xas_unlock_irq(&xas);
+	} while (xas_nomem(&xas, gfp_mask & GFP_RECLAIM_MASK));
+
+	if (xas_error(&xas))
+		goto error;
 
-	/* hugetlb pages do not participate in page cache accounting. */
-	if (!huge)
-		__inc_node_page_state(page, NR_FILE_PAGES);
-	xa_unlock_irq(&mapping->i_pages);
 	if (!huge)
 		mem_cgroup_commit_charge(page, memcg, false, false);
 	trace_mm_filemap_add_to_page_cache(page);
 	return 0;
-err_insert:
+error:
 	page->mapping = NULL;
 	/* Leave page->index set: truncation relies upon it */
-	xa_unlock_irq(&mapping->i_pages);
 	if (!huge)
 		mem_cgroup_cancel_charge(page, memcg, false);
 	put_page(page);
-	return error;
+	return xas_error(&xas);
 }
 
 /**
-- 
cgit v1.2.3


From 3ece58a270cd1e5026282abe778bd50db7a11d08 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 16 May 2018 18:00:33 -0400
Subject: page cache: Convert find_get_pages_contig to XArray

There's no direct replacement for radix_tree_for_each_contig()
in the XArray API as it's an unusual thing to do.  Instead,
open-code a loop using xas_next().  This removes the only user of
radix_tree_for_each_contig() so delete the iterator from the API and
the test suite code for it.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 .clang-format                          |  1 -
 include/linux/radix-tree.h             | 17 -----------
 mm/filemap.c                           | 53 ++++++++++++++--------------------
 tools/testing/radix-tree/regression3.c | 23 ---------------
 4 files changed, 22 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/.clang-format b/.clang-format
index 1d5da22e0ba5..e6080f5834a3 100644
--- a/.clang-format
+++ b/.clang-format
@@ -323,7 +323,6 @@ ForEachMacros:
   - 'protocol_for_each_card'
   - 'protocol_for_each_dev'
   - 'queue_for_each_hw_ctx'
-  - 'radix_tree_for_each_contig'
   - 'radix_tree_for_each_slot'
   - 'radix_tree_for_each_tagged'
   - 'rbtree_postorder_for_each_entry_safe'
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index d55de428a589..023e888e1163 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -522,23 +522,6 @@ static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot,
 	     slot || (slot = radix_tree_next_chunk(root, iter, 0)) ;	\
 	     slot = radix_tree_next_slot(slot, iter, 0))
 
-/**
- * radix_tree_for_each_contig - iterate over contiguous slots
- *
- * @slot:	the void** variable for pointer to slot
- * @root:	the struct radix_tree_root pointer
- * @iter:	the struct radix_tree_iter pointer
- * @start:	iteration starting index
- *
- * @slot points to radix tree slot, @iter->index contains its index.
- */
-#define radix_tree_for_each_contig(slot, root, iter, start)		\
-	for (slot = radix_tree_iter_init(iter, start) ;			\
-	     slot || (slot = radix_tree_next_chunk(root, iter,		\
-				RADIX_TREE_ITER_CONTIG)) ;		\
-	     slot = radix_tree_next_slot(slot, iter,			\
-				RADIX_TREE_ITER_CONTIG))
-
 /**
  * radix_tree_for_each_tagged - iterate over tagged slots
  *
diff --git a/mm/filemap.c b/mm/filemap.c
index b72c39fe61c2..089b67598100 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1721,57 +1721,43 @@ out:
 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index,
 			       unsigned int nr_pages, struct page **pages)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &mapping->i_pages, index);
+	struct page *page;
 	unsigned int ret = 0;
 
 	if (unlikely(!nr_pages))
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_contig(slot, &mapping->i_pages, &iter, index) {
-		struct page *head, *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		/* The hole, there no reason to continue */
-		if (unlikely(!page))
-			break;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page,
-			 * or a swap entry from shmem/tmpfs.  Stop
-			 * looking for contiguous pages.
-			 */
+	for (page = xas_load(&xas); page; page = xas_next(&xas)) {
+		struct page *head;
+		if (xas_retry(&xas, page))
+			continue;
+		/*
+		 * If the entry has been swapped out, we can stop looking.
+		 * No current caller is looking for DAX entries.
+		 */
+		if (xa_is_value(page))
 			break;
-		}
 
 		head = compound_head(page);
 		if (!page_cache_get_speculative(head))
-			goto repeat;
+			goto retry;
 
 		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
+		if (compound_head(page) != head)
+			goto put_page;
 
 		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			put_page(head);
-			goto repeat;
-		}
+		if (unlikely(page != xas_reload(&xas)))
+			goto put_page;
 
 		/*
 		 * must check mapping and index after taking the ref.
 		 * otherwise we can get both false positives and false
 		 * negatives, which is just confusing to the caller.
 		 */
-		if (page->mapping == NULL || page_to_pgoff(page) != iter.index) {
+		if (!page->mapping || page_to_pgoff(page) != xas.xa_index) {
 			put_page(page);
 			break;
 		}
@@ -1779,6 +1765,11 @@ repeat:
 		pages[ret] = page;
 		if (++ret == nr_pages)
 			break;
+		continue;
+put_page:
+		put_page(head);
+retry:
+		xas_reset(&xas);
 	}
 	rcu_read_unlock();
 	return ret;
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
index ace2543c3eda..9f9a3b280f56 100644
--- a/tools/testing/radix-tree/regression3.c
+++ b/tools/testing/radix-tree/regression3.c
@@ -69,21 +69,6 @@ void regression3_test(void)
 			continue;
 		}
 	}
-	radix_tree_delete(&root, 1);
-
-	first = true;
-	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printv(2, "contig %ld %p\n", iter.index, *slot);
-		if (first) {
-			radix_tree_insert(&root, 1, ptr);
-			first = false;
-		}
-		if (radix_tree_deref_retry(*slot)) {
-			printv(2, "retry at %ld\n", iter.index);
-			slot = radix_tree_iter_retry(&iter);
-			continue;
-		}
-	}
 
 	radix_tree_for_each_slot(slot, &root, &iter, 0) {
 		printv(2, "slot %ld %p\n", iter.index, *slot);
@@ -93,14 +78,6 @@ void regression3_test(void)
 		}
 	}
 
-	radix_tree_for_each_contig(slot, &root, &iter, 0) {
-		printv(2, "contig %ld %p\n", iter.index, *slot);
-		if (!iter.index) {
-			printv(2, "next at %ld\n", iter.index);
-			slot = radix_tree_iter_resume(slot, &iter);
-		}
-	}
-
 	radix_tree_tag_set(&root, 0, 0);
 	radix_tree_tag_set(&root, 1, 0);
 	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
-- 
cgit v1.2.3


From a6906972fe67fb6f73ba04088f7897227fd1cd8f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 16 May 2018 18:12:54 -0400
Subject: page cache; Convert find_get_pages_range_tag to XArray

The 'end' parameter of the xas_for_each iterator avoids a useless
iteration at the end of the range.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/pagemap.h |  4 +--
 mm/filemap.c            | 68 +++++++++++++++++++------------------------------
 2 files changed, 28 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index cf9ad413eee9..c9bbb9a05764 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -363,10 +363,10 @@ static inline unsigned find_get_pages(struct address_space *mapping,
 unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start,
 			       unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
-			pgoff_t end, int tag, unsigned int nr_pages,
+			pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
 			struct page **pages);
 static inline unsigned find_get_pages_tag(struct address_space *mapping,
-			pgoff_t *index, int tag, unsigned int nr_pages,
+			pgoff_t *index, xa_mark_t tag, unsigned int nr_pages,
 			struct page **pages)
 {
 	return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag,
diff --git a/mm/filemap.c b/mm/filemap.c
index 089b67598100..0df28aa6411c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1789,74 +1789,58 @@ EXPORT_SYMBOL(find_get_pages_contig);
  * @tag.   We update @index to index the next page for the traversal.
  */
 unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
-			pgoff_t end, int tag, unsigned int nr_pages,
+			pgoff_t end, xa_mark_t tag, unsigned int nr_pages,
 			struct page **pages)
 {
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &mapping->i_pages, *index);
+	struct page *page;
 	unsigned ret = 0;
 
 	if (unlikely(!nr_pages))
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, *index, tag) {
-		struct page *head, *page;
-
-		if (iter.index > end)
-			break;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
+	xas_for_each_marked(&xas, page, end, tag) {
+		struct page *head;
+		if (xas_retry(&xas, page))
 			continue;
-
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-			/*
-			 * A shadow entry of a recently evicted page.
-			 *
-			 * Those entries should never be tagged, but
-			 * this tree walk is lockless and the tags are
-			 * looked up in bulk, one radix tree node at a
-			 * time, so there is a sizable window for page
-			 * reclaim to evict a page we saw tagged.
-			 *
-			 * Skip over it.
-			 */
+		/*
+		 * Shadow entries should never be tagged, but this iteration
+		 * is lockless so there is a window for page reclaim to evict
+		 * a page we saw tagged.  Skip over it.
+		 */
+		if (xa_is_value(page))
 			continue;
-		}
 
 		head = compound_head(page);
 		if (!page_cache_get_speculative(head))
-			goto repeat;
+			goto retry;
 
 		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
+		if (compound_head(page) != head)
+			goto put_page;
 
 		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			put_page(head);
-			goto repeat;
-		}
+		if (unlikely(page != xas_reload(&xas)))
+			goto put_page;
 
 		pages[ret] = page;
 		if (++ret == nr_pages) {
-			*index = pages[ret - 1]->index + 1;
+			*index = page->index + 1;
 			goto out;
 		}
+		continue;
+put_page:
+		put_page(head);
+retry:
+		xas_reset(&xas);
 	}
 
 	/*
-	 * We come here when we got at @end. We take care to not overflow the
+	 * We come here when we got to @end. We take care to not overflow the
 	 * index @index as it confuses some of the callers. This breaks the
-	 * iteration when there is page at index -1 but that is already broken
-	 * anyway.
+	 * iteration when there is a page at index -1 but that is already
+	 * broken anyway.
 	 */
 	if (end == (pgoff_t)-1)
 		*index = (pgoff_t)-1;
-- 
cgit v1.2.3


From c1901cd33cf407d77a181f8dd4ffff98041ef480 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 16 May 2018 23:56:04 -0400
Subject: page cache: Convert find_get_entries_tag to XArray

Slightly shorter and simpler code.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/pagemap.h |  2 +-
 mm/filemap.c            | 54 ++++++++++++++++++++++---------------------------
 2 files changed, 25 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c9bbb9a05764..226f96f0dee0 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -373,7 +373,7 @@ static inline unsigned find_get_pages_tag(struct address_space *mapping,
 					nr_pages, pages);
 }
 unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
+			xa_mark_t tag, unsigned int nr_entries,
 			struct page **entries, pgoff_t *indices);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
diff --git a/mm/filemap.c b/mm/filemap.c
index 0df28aa6411c..35ae011971e1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1866,57 +1866,51 @@ EXPORT_SYMBOL(find_get_pages_range_tag);
  * @tag.
  */
 unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start,
-			int tag, unsigned int nr_entries,
+			xa_mark_t tag, unsigned int nr_entries,
 			struct page **entries, pgoff_t *indices)
 {
-	void **slot;
+	XA_STATE(xas, &mapping->i_pages, start);
+	struct page *page;
 	unsigned int ret = 0;
-	struct radix_tree_iter iter;
 
 	if (!nr_entries)
 		return 0;
 
 	rcu_read_lock();
-	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start, tag) {
-		struct page *head, *page;
-repeat:
-		page = radix_tree_deref_slot(slot);
-		if (unlikely(!page))
+	xas_for_each_marked(&xas, page, ULONG_MAX, tag) {
+		struct page *head;
+		if (xas_retry(&xas, page))
 			continue;
-		if (radix_tree_exception(page)) {
-			if (radix_tree_deref_retry(page)) {
-				slot = radix_tree_iter_retry(&iter);
-				continue;
-			}
-
-			/*
-			 * A shadow entry of a recently evicted page, a swap
-			 * entry from shmem/tmpfs or a DAX entry.  Return it
-			 * without attempting to raise page count.
-			 */
+		/*
+		 * A shadow entry of a recently evicted page, a swap
+		 * entry from shmem/tmpfs or a DAX entry.  Return it
+		 * without attempting to raise page count.
+		 */
+		if (xa_is_value(page))
 			goto export;
-		}
 
 		head = compound_head(page);
 		if (!page_cache_get_speculative(head))
-			goto repeat;
+			goto retry;
 
 		/* The page was split under us? */
-		if (compound_head(page) != head) {
-			put_page(head);
-			goto repeat;
-		}
+		if (compound_head(page) != head)
+			goto put_page;
 
 		/* Has the page moved? */
-		if (unlikely(page != *slot)) {
-			put_page(head);
-			goto repeat;
-		}
+		if (unlikely(page != xas_reload(&xas)))
+			goto put_page;
+
 export:
-		indices[ret] = iter.index;
+		indices[ret] = xas.xa_index;
 		entries[ret] = page;
 		if (++ret == nr_entries)
 			break;
+		continue;
+put_page:
+		put_page(head);
+retry:
+		xas_reset(&xas);
 	}
 	rcu_read_unlock();
 	return ret;
-- 
cgit v1.2.3


From ff9c745b81ff1e482167fd73558450e66ad43a33 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 22 Nov 2017 11:41:23 -0500
Subject: mm: Convert page-writeback to XArray

Includes moving mapping_tagged() to fs.h as a static inline, and
changing it to return bool.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/fs.h  | 17 +++++++------
 mm/page-writeback.c | 72 +++++++++++++++++++----------------------------------
 2 files changed, 36 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d126cad0f621..e10278e4db66 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -483,15 +483,18 @@ struct block_device {
 	struct mutex		bd_fsfreeze_mutex;
 } __randomize_layout;
 
+/* XArray tags, for tagging dirty and writeback pages in the pagecache. */
+#define PAGECACHE_TAG_DIRTY	XA_MARK_0
+#define PAGECACHE_TAG_WRITEBACK	XA_MARK_1
+#define PAGECACHE_TAG_TOWRITE	XA_MARK_2
+
 /*
- * Radix-tree tags, for tagging dirty and writeback pages within the pagecache
- * radix trees
+ * Returns true if any of the pages in the mapping are marked with the tag.
  */
-#define PAGECACHE_TAG_DIRTY	0
-#define PAGECACHE_TAG_WRITEBACK	1
-#define PAGECACHE_TAG_TOWRITE	2
-
-int mapping_tagged(struct address_space *mapping, int tag);
+static inline bool mapping_tagged(struct address_space *mapping, xa_mark_t tag)
+{
+	return xa_marked(&mapping->i_pages, tag);
+}
 
 static inline void i_mmap_lock_write(struct address_space *mapping)
 {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 84ae9bf5858a..fc6e5743b0bf 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2097,34 +2097,25 @@ void __init page_writeback_init(void)
  * dirty pages in the file (thus it is important for this function to be quick
  * so that it can tag pages faster than a dirtying process can create them).
  */
-/*
- * We tag pages in batches of WRITEBACK_TAG_BATCH to reduce the i_pages lock
- * latency.
- */
 void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end)
 {
-#define WRITEBACK_TAG_BATCH 4096
-	unsigned long tagged = 0;
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, &mapping->i_pages, start);
+	unsigned int tagged = 0;
+	void *page;
 
-	xa_lock_irq(&mapping->i_pages);
-	radix_tree_for_each_tagged(slot, &mapping->i_pages, &iter, start,
-							PAGECACHE_TAG_DIRTY) {
-		if (iter.index > end)
-			break;
-		radix_tree_iter_tag_set(&mapping->i_pages, &iter,
-							PAGECACHE_TAG_TOWRITE);
-		tagged++;
-		if ((tagged % WRITEBACK_TAG_BATCH) != 0)
+	xas_lock_irq(&xas);
+	xas_for_each_marked(&xas, page, end, PAGECACHE_TAG_DIRTY) {
+		xas_set_mark(&xas, PAGECACHE_TAG_TOWRITE);
+		if (++tagged % XA_CHECK_SCHED)
 			continue;
-		slot = radix_tree_iter_resume(slot, &iter);
-		xa_unlock_irq(&mapping->i_pages);
+
+		xas_pause(&xas);
+		xas_unlock_irq(&xas);
 		cond_resched();
-		xa_lock_irq(&mapping->i_pages);
+		xas_lock_irq(&xas);
 	}
-	xa_unlock_irq(&mapping->i_pages);
+	xas_unlock_irq(&xas);
 }
 EXPORT_SYMBOL(tag_pages_for_writeback);
 
@@ -2164,7 +2155,7 @@ int write_cache_pages(struct address_space *mapping,
 	pgoff_t done_index;
 	int cycled;
 	int range_whole = 0;
-	int tag;
+	xa_mark_t tag;
 
 	pagevec_init(&pvec);
 	if (wbc->range_cyclic) {
@@ -2445,7 +2436,7 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
 
 /*
  * For address_spaces which do not use buffers.  Just tag the page as dirty in
- * its radix tree.
+ * the xarray.
  *
  * This is also used when a single buffer is being dirtied: we want to set the
  * page dirty in that case, but not all the buffers.  This is a "bottom-up"
@@ -2471,7 +2462,7 @@ int __set_page_dirty_nobuffers(struct page *page)
 		BUG_ON(page_mapping(page) != mapping);
 		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
 		account_page_dirtied(page, mapping);
-		radix_tree_tag_set(&mapping->i_pages, page_index(page),
+		__xa_set_mark(&mapping->i_pages, page_index(page),
 				   PAGECACHE_TAG_DIRTY);
 		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		unlock_page_memcg(page);
@@ -2634,13 +2625,13 @@ EXPORT_SYMBOL(__cancel_dirty_page);
  * Returns true if the page was previously dirty.
  *
  * This is for preparing to put the page under writeout.  We leave the page
- * tagged as dirty in the radix tree so that a concurrent write-for-sync
+ * tagged as dirty in the xarray so that a concurrent write-for-sync
  * can discover it via a PAGECACHE_TAG_DIRTY walk.  The ->writepage
  * implementation will run either set_page_writeback() or set_page_dirty(),
- * at which stage we bring the page's dirty flag and radix-tree dirty tag
+ * at which stage we bring the page's dirty flag and xarray dirty tag
  * back into sync.
  *
- * This incoherency between the page's dirty flag and radix-tree tag is
+ * This incoherency between the page's dirty flag and xarray tag is
  * unfortunate, but it only exists while the page is locked.
  */
 int clear_page_dirty_for_io(struct page *page)
@@ -2721,7 +2712,7 @@ int test_clear_page_writeback(struct page *page)
 		xa_lock_irqsave(&mapping->i_pages, flags);
 		ret = TestClearPageWriteback(page);
 		if (ret) {
-			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
+			__xa_clear_mark(&mapping->i_pages, page_index(page),
 						PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi)) {
 				struct bdi_writeback *wb = inode_to_wb(inode);
@@ -2761,11 +2752,13 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 
 	lock_page_memcg(page);
 	if (mapping && mapping_use_writeback_tags(mapping)) {
+		XA_STATE(xas, &mapping->i_pages, page_index(page));
 		struct inode *inode = mapping->host;
 		struct backing_dev_info *bdi = inode_to_bdi(inode);
 		unsigned long flags;
 
-		xa_lock_irqsave(&mapping->i_pages, flags);
+		xas_lock_irqsave(&xas, flags);
+		xas_load(&xas);
 		ret = TestSetPageWriteback(page);
 		if (!ret) {
 			bool on_wblist;
@@ -2773,8 +2766,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 			on_wblist = mapping_tagged(mapping,
 						   PAGECACHE_TAG_WRITEBACK);
 
-			radix_tree_tag_set(&mapping->i_pages, page_index(page),
-						PAGECACHE_TAG_WRITEBACK);
+			xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
 			if (bdi_cap_account_writeback(bdi))
 				inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
 
@@ -2787,12 +2779,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 				sb_mark_inode_writeback(mapping->host);
 		}
 		if (!PageDirty(page))
-			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
-						PAGECACHE_TAG_DIRTY);
+			xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
 		if (!keep_write)
-			radix_tree_tag_clear(&mapping->i_pages, page_index(page),
-						PAGECACHE_TAG_TOWRITE);
-		xa_unlock_irqrestore(&mapping->i_pages, flags);
+			xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
+		xas_unlock_irqrestore(&xas, flags);
 	} else {
 		ret = TestSetPageWriteback(page);
 	}
@@ -2806,16 +2796,6 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 }
 EXPORT_SYMBOL(__test_set_page_writeback);
 
-/*
- * Return true if any of the pages in the mapping are marked with the
- * passed tag.
- */
-int mapping_tagged(struct address_space *mapping, int tag)
-{
-	return radix_tree_tagged(&mapping->i_pages, tag);
-}
-EXPORT_SYMBOL(mapping_tagged);
-
 /**
  * wait_for_stable_page() - wait for writeback to finish, if necessary.
  * @page:	The page to wait on.
-- 
cgit v1.2.3


From a97e7904c0806309fd77103005bb7820c3f1c5e4 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 24 Nov 2017 14:24:59 -0500
Subject: mm: Convert workingset to XArray

We construct an XA_STATE and use it to delete the node with
xas_store() rather than adding a special function for this unique
use case.  Includes a test that simulates this usage for the
test suite.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/swap.h |  9 --------
 lib/test_xarray.c    | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/workingset.c      | 51 +++++++++++++++++------------------------
 3 files changed, 86 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 6ea50cf41b4c..112cebcf0402 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -306,15 +306,6 @@ void workingset_update_node(struct xa_node *node);
 		xas_set_update(xas, workingset_update_node);		\
 } while (0)
 
-/* Returns workingset_update_node() if the mapping has shadow entries. */
-#define workingset_lookup_update(mapping)				\
-({									\
-	radix_tree_update_node_t __helper = workingset_update_node;	\
-	if (dax_mapping(mapping) || shmem_mapping(mapping))		\
-		__helper = NULL;					\
-	__helper;							\
-})
-
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
 extern unsigned long totalreserve_pages;
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index a752e6a37e6f..128c6489082f 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -863,6 +863,67 @@ static noinline void check_create_range(struct xarray *xa)
 	check_create_range_3();
 }
 
+static LIST_HEAD(shadow_nodes);
+
+static void test_update_node(struct xa_node *node)
+{
+	if (node->count && node->count == node->nr_values) {
+		if (list_empty(&node->private_list))
+			list_add(&shadow_nodes, &node->private_list);
+	} else {
+		if (!list_empty(&node->private_list))
+			list_del_init(&node->private_list);
+	}
+}
+
+static noinline void shadow_remove(struct xarray *xa)
+{
+	struct xa_node *node;
+
+	xa_lock(xa);
+	while ((node = list_first_entry_or_null(&shadow_nodes,
+					struct xa_node, private_list))) {
+		XA_STATE(xas, node->array, 0);
+		XA_BUG_ON(xa, node->array != xa);
+		list_del_init(&node->private_list);
+		xas.xa_node = xa_parent_locked(node->array, node);
+		xas.xa_offset = node->offset;
+		xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
+		xas_set_update(&xas, test_update_node);
+		xas_store(&xas, NULL);
+	}
+	xa_unlock(xa);
+}
+
+static noinline void check_workingset(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	xas_set_update(&xas, test_update_node);
+
+	do {
+		xas_lock(&xas);
+		xas_store(&xas, xa_mk_value(0));
+		xas_next(&xas);
+		xas_store(&xas, xa_mk_value(1));
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
+
+	XA_BUG_ON(xa, list_empty(&shadow_nodes));
+
+	xas_lock(&xas);
+	xas_next(&xas);
+	xas_store(&xas, &xas);
+	XA_BUG_ON(xa, !list_empty(&shadow_nodes));
+
+	xas_store(&xas, xa_mk_value(2));
+	xas_unlock(&xas);
+	XA_BUG_ON(xa, list_empty(&shadow_nodes));
+
+	shadow_remove(xa);
+	XA_BUG_ON(xa, !list_empty(&shadow_nodes));
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -916,6 +977,10 @@ static int xarray_checks(void)
 	check_create_range(&array);
 	check_store_iter(&array);
 
+	check_workingset(&array, 0);
+	check_workingset(&array, 64);
+	check_workingset(&array, 4096);
+
 	printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
 	return (tests_run == tests_passed) ? 0 : -EINVAL;
 }
diff --git a/mm/workingset.c b/mm/workingset.c
index c201cbb8c00f..5cfb29ec3fd9 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -148,7 +148,7 @@
  * and activations is maintained (node->inactive_age).
  *
  * On eviction, a snapshot of this counter (along with some bits to
- * identify the node) is stored in the now empty page cache radix tree
+ * identify the node) is stored in the now empty page cache
  * slot of the evicted page.  This is called a shadow entry.
  *
  * On cache misses for which there are shadow entries, an eligible
@@ -162,7 +162,7 @@
 
 /*
  * Eviction timestamps need to be able to cover the full range of
- * actionable refaults. However, bits are tight in the radix tree
+ * actionable refaults. However, bits are tight in the xarray
  * entry, and after storing the identifier for the lruvec there might
  * not be enough left to represent every single actionable refault. In
  * that case, we have to sacrifice granularity for distance, and group
@@ -339,7 +339,7 @@ out:
 
 static struct list_lru shadow_nodes;
 
-void workingset_update_node(struct radix_tree_node *node)
+void workingset_update_node(struct xa_node *node)
 {
 	/*
 	 * Track non-empty nodes that contain only shadow entries;
@@ -368,7 +368,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	nodes = list_lru_shrink_count(&shadow_nodes, sc);
 
 	/*
-	 * Approximate a reasonable limit for the radix tree nodes
+	 * Approximate a reasonable limit for the nodes
 	 * containing shadow entries. We don't need to keep more
 	 * shadow entries than possible pages on the active list,
 	 * since refault distances bigger than that are dismissed.
@@ -383,11 +383,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 	 * worst-case density of 1/8th. Below that, not all eligible
 	 * refaults can be detected anymore.
 	 *
-	 * On 64-bit with 7 radix_tree_nodes per page and 64 slots
+	 * On 64-bit with 7 xa_nodes per page and 64 slots
 	 * each, this will reclaim shadow entries when they consume
 	 * ~1.8% of available memory:
 	 *
-	 * PAGE_SIZE / radix_tree_nodes / node_entries * 8 / PAGE_SIZE
+	 * PAGE_SIZE / xa_nodes / node_entries * 8 / PAGE_SIZE
 	 */
 	if (sc->memcg) {
 		cache = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
@@ -396,7 +396,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 		cache = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
 			node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
 	}
-	max_nodes = cache >> (RADIX_TREE_MAP_SHIFT - 3);
+	max_nodes = cache >> (XA_CHUNK_SHIFT - 3);
 
 	if (!nodes)
 		return SHRINK_EMPTY;
@@ -409,11 +409,11 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 static enum lru_status shadow_lru_isolate(struct list_head *item,
 					  struct list_lru_one *lru,
 					  spinlock_t *lru_lock,
-					  void *arg)
+					  void *arg) __must_hold(lru_lock)
 {
+	struct xa_node *node = container_of(item, struct xa_node, private_list);
+	XA_STATE(xas, node->array, 0);
 	struct address_space *mapping;
-	struct radix_tree_node *node;
-	unsigned int i;
 	int ret;
 
 	/*
@@ -421,14 +421,13 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	 * the shadow node LRU under the i_pages lock and the
 	 * lru_lock.  Because the page cache tree is emptied before
 	 * the inode can be destroyed, holding the lru_lock pins any
-	 * address_space that has radix tree nodes on the LRU.
+	 * address_space that has nodes on the LRU.
 	 *
 	 * We can then safely transition to the i_pages lock to
 	 * pin only the address_space of the particular node we want
 	 * to reclaim, take the node off-LRU, and drop the lru_lock.
 	 */
 
-	node = container_of(item, struct xa_node, private_list);
 	mapping = container_of(node->array, struct address_space, i_pages);
 
 	/* Coming from the list, invert the lock order */
@@ -450,25 +449,17 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 		goto out_invalid;
 	if (WARN_ON_ONCE(node->count != node->nr_values))
 		goto out_invalid;
-	for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
-		if (node->slots[i]) {
-			if (WARN_ON_ONCE(!xa_is_value(node->slots[i])))
-				goto out_invalid;
-			if (WARN_ON_ONCE(!node->nr_values))
-				goto out_invalid;
-			if (WARN_ON_ONCE(!mapping->nrexceptional))
-				goto out_invalid;
-			node->slots[i] = NULL;
-			node->nr_values--;
-			node->count--;
-			mapping->nrexceptional--;
-		}
-	}
-	if (WARN_ON_ONCE(node->nr_values))
-		goto out_invalid;
+	mapping->nrexceptional -= node->nr_values;
+	xas.xa_node = xa_parent_locked(&mapping->i_pages, node);
+	xas.xa_offset = node->offset;
+	xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
+	xas_set_update(&xas, workingset_update_node);
+	/*
+	 * We could store a shadow entry here which was the minimum of the
+	 * shadow entries we were tracking ...
+	 */
+	xas_store(&xas, NULL);
 	inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
-	__radix_tree_delete_node(&mapping->i_pages, node,
-				 workingset_lookup_update(mapping));
 
 out_invalid:
 	xa_unlock_irq(&mapping->i_pages);
-- 
cgit v1.2.3


From 4e17ec250fce0eba9b70a91c9622da2748a3ec50 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 29 Nov 2017 08:32:39 -0500
Subject: mm: Convert delete_from_swap_cache to XArray

Both callers of __delete_from_swap_cache have the swp_entry_t already,
so pass that in to make constructing the XA_STATE easier.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/swap.h |  5 +++--
 mm/swap_state.c      | 24 ++++++++++--------------
 mm/vmscan.c          |  2 +-
 3 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 112cebcf0402..cb479bf5842e 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -403,7 +403,7 @@ extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *page);
 extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
 extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
-extern void __delete_from_swap_cache(struct page *);
+extern void __delete_from_swap_cache(struct page *, swp_entry_t entry);
 extern void delete_from_swap_cache(struct page *);
 extern void free_page_and_swap_cache(struct page *);
 extern void free_pages_and_swap_cache(struct page **, int);
@@ -557,7 +557,8 @@ static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
 	return -1;
 }
 
-static inline void __delete_from_swap_cache(struct page *page)
+static inline void __delete_from_swap_cache(struct page *page,
+							swp_entry_t entry)
 {
 }
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 850314c2c3ab..f393c994cc60 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -154,23 +154,22 @@ unlock:
  * This must be called only on pages that have
  * been verified to be in the swap cache.
  */
-void __delete_from_swap_cache(struct page *page)
+void __delete_from_swap_cache(struct page *page, swp_entry_t entry)
 {
-	struct address_space *address_space;
+	struct address_space *address_space = swap_address_space(entry);
 	int i, nr = hpage_nr_pages(page);
-	swp_entry_t entry;
-	pgoff_t idx;
+	pgoff_t idx = swp_offset(entry);
+	XA_STATE(xas, &address_space->i_pages, idx);
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
 	VM_BUG_ON_PAGE(PageWriteback(page), page);
 
-	entry.val = page_private(page);
-	address_space = swap_address_space(entry);
-	idx = swp_offset(entry);
 	for (i = 0; i < nr; i++) {
-		radix_tree_delete(&address_space->i_pages, idx + i);
+		void *entry = xas_store(&xas, NULL);
+		VM_BUG_ON_PAGE(entry != page + i, entry);
 		set_page_private(page + i, 0);
+		xas_next(&xas);
 	}
 	ClearPageSwapCache(page);
 	address_space->nrpages -= nr;
@@ -243,14 +242,11 @@ fail:
  */
 void delete_from_swap_cache(struct page *page)
 {
-	swp_entry_t entry;
-	struct address_space *address_space;
-
-	entry.val = page_private(page);
+	swp_entry_t entry = { .val = page_private(page) };
+	struct address_space *address_space = swap_address_space(entry);
 
-	address_space = swap_address_space(entry);
 	xa_lock_irq(&address_space->i_pages);
-	__delete_from_swap_cache(page);
+	__delete_from_swap_cache(page, entry);
 	xa_unlock_irq(&address_space->i_pages);
 
 	put_swap_page(page, entry);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c7ce2c161225..80f731cf974e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -923,7 +923,7 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
 	if (PageSwapCache(page)) {
 		swp_entry_t swap = { .val = page_private(page) };
 		mem_cgroup_swapout(page, swap);
-		__delete_from_swap_cache(page);
+		__delete_from_swap_cache(page, swap);
 		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		put_swap_page(page, swap);
 	} else {
-- 
cgit v1.2.3


From 10bbd235859bf483f9a8a4ebe95463d700bae394 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Tue, 5 Dec 2017 17:30:38 -0500
Subject: pagevec: Use xa_mark_t

Removes sparse warnings.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 fs/btrfs/extent_io.c    | 4 ++--
 fs/ext4/inode.c         | 2 +-
 fs/f2fs/data.c          | 2 +-
 fs/gfs2/aops.c          | 2 +-
 include/linux/pagevec.h | 8 +++++---
 mm/swap.c               | 4 ++--
 6 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4dd6faab02bb..fc7ca7d991ad 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3778,7 +3778,7 @@ int btree_write_cache_pages(struct address_space *mapping,
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	int scanned = 0;
-	int tag;
+	xa_mark_t tag;
 
 	pagevec_init(&pvec);
 	if (wbc->range_cyclic) {
@@ -3903,7 +3903,7 @@ static int extent_write_cache_pages(struct address_space *mapping,
 	pgoff_t done_index;
 	int range_whole = 0;
 	int scanned = 0;
-	int tag;
+	xa_mark_t tag;
 
 	/*
 	 * We have to hold onto the inode so that ordered extents can do their
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d767e993591d..57bad3edfbed 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2613,7 +2613,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
 	long left = mpd->wbc->nr_to_write;
 	pgoff_t index = mpd->first_page;
 	pgoff_t end = mpd->last_page;
-	int tag;
+	xa_mark_t tag;
 	int i, err = 0;
 	int blkbits = mpd->inode->i_blkbits;
 	ext4_lblk_t lblk;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 382c1ef9a9e4..5b760809eecc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2003,7 +2003,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 	pgoff_t last_idx = ULONG_MAX;
 	int cycled;
 	int range_whole = 0;
-	int tag;
+	xa_mark_t tag;
 
 	pagevec_init(&pvec);
 
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 31e8270d0b26..8afbb35559b9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -366,7 +366,7 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
 	pgoff_t done_index;
 	int cycled;
 	int range_whole = 0;
-	int tag;
+	xa_mark_t tag;
 
 	pagevec_init(&pvec);
 	if (wbc->range_cyclic) {
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 6dc456ac6136..081d934eda64 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -9,6 +9,8 @@
 #ifndef _LINUX_PAGEVEC_H
 #define _LINUX_PAGEVEC_H
 
+#include <linux/xarray.h>
+
 /* 15 pointers + header align the pagevec structure to a power of two */
 #define PAGEVEC_SIZE	15
 
@@ -40,12 +42,12 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec,
 
 unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, pgoff_t end,
-		int tag);
+		xa_mark_t tag);
 unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, pgoff_t end,
-		int tag, unsigned max_pages);
+		xa_mark_t tag, unsigned max_pages);
 static inline unsigned pagevec_lookup_tag(struct pagevec *pvec,
-		struct address_space *mapping, pgoff_t *index, int tag)
+		struct address_space *mapping, pgoff_t *index, xa_mark_t tag)
 {
 	return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag);
 }
diff --git a/mm/swap.c b/mm/swap.c
index 4c5c7fcc6e46..6861f3140a13 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1002,7 +1002,7 @@ EXPORT_SYMBOL(pagevec_lookup_range);
 
 unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, pgoff_t end,
-		int tag)
+		xa_mark_t tag)
 {
 	pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
 					PAGEVEC_SIZE, pvec->pages);
@@ -1012,7 +1012,7 @@ EXPORT_SYMBOL(pagevec_lookup_range_tag);
 
 unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, pgoff_t end,
-		int tag, unsigned max_pages)
+		xa_mark_t tag, unsigned max_pages)
 {
 	pvec->nr = find_get_pages_range_tag(mapping, index, end, tag,
 		min_t(unsigned int, max_pages, PAGEVEC_SIZE), pvec->pages);
-- 
cgit v1.2.3


From 7b8d046fba91d62beb8a8f78244aaa3c23a60cdd Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 1 Dec 2017 22:13:06 -0500
Subject: shmem: Convert shmem_alloc_hugepage to XArray

xa_find() is a slightly easier API to use than
radix_tree_gang_lookup_slot() because it contains its own RCU locking.
This commit removes the last user of radix_tree_gang_lookup_slot()
so remove the function too.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h |  6 +-----
 lib/radix-tree.c           | 44 +-------------------------------------------
 mm/shmem.c                 | 14 ++++----------
 3 files changed, 6 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 023e888e1163..8a4280bc350f 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -147,12 +147,11 @@ static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
  * radix_tree_lookup_slot
  * radix_tree_tag_get
  * radix_tree_gang_lookup
- * radix_tree_gang_lookup_slot
  * radix_tree_gang_lookup_tag
  * radix_tree_gang_lookup_tag_slot
  * radix_tree_tagged
  *
- * The first 8 functions are able to be called locklessly, using RCU. The
+ * The first 7 functions are able to be called locklessly, using RCU. The
  * caller must ensure calls to these functions are made within rcu_read_lock()
  * regions. Other readers (lock-free or otherwise) and modifications may be
  * running concurrently.
@@ -263,9 +262,6 @@ void radix_tree_clear_tags(struct radix_tree_root *, struct radix_tree_node *,
 unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
-unsigned int radix_tree_gang_lookup_slot(const struct radix_tree_root *,
-			void __rcu ***results, unsigned long *indices,
-			unsigned long first_index, unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8a58051eb5b3..2f9c0e45eeb7 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1097,7 +1097,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
  * @slot:	pointer to slot
  * @item:	new item to store in the slot.
  *
- * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
+ * For use with radix_tree_lookup_slot() and
  * radix_tree_gang_lookup_tag_slot().  Caller must hold tree write locked
  * across slot lookup and replacement.
  *
@@ -1731,48 +1731,6 @@ radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup);
 
-/**
- *	radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
- *	@root:		radix tree root
- *	@results:	where the results of the lookup are placed
- *	@indices:	where their indices should be placed (but usually NULL)
- *	@first_index:	start the lookup from this key
- *	@max_items:	place up to this many items at *results
- *
- *	Performs an index-ascending scan of the tree for present items.  Places
- *	their slots at *@results and returns the number of items which were
- *	placed at *@results.
- *
- *	The implementation is naive.
- *
- *	Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
- *	be dereferenced with radix_tree_deref_slot, and if using only RCU
- *	protection, radix_tree_deref_slot may fail requiring a retry.
- */
-unsigned int
-radix_tree_gang_lookup_slot(const struct radix_tree_root *root,
-			void __rcu ***results, unsigned long *indices,
-			unsigned long first_index, unsigned int max_items)
-{
-	struct radix_tree_iter iter;
-	void __rcu **slot;
-	unsigned int ret = 0;
-
-	if (unlikely(!max_items))
-		return 0;
-
-	radix_tree_for_each_slot(slot, root, &iter, first_index) {
-		results[ret] = slot;
-		if (indices)
-			indices[ret] = iter.index;
-		if (++ret == max_items)
-			break;
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
-
 /**
  *	radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
  *	                             based on a tag
diff --git a/mm/shmem.c b/mm/shmem.c
index 8633bd3dc433..608c9252248f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1431,23 +1431,17 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 		struct shmem_inode_info *info, pgoff_t index)
 {
 	struct vm_area_struct pvma;
-	struct inode *inode = &info->vfs_inode;
-	struct address_space *mapping = inode->i_mapping;
-	pgoff_t idx, hindex;
-	void __rcu **results;
+	struct address_space *mapping = info->vfs_inode.i_mapping;
+	pgoff_t hindex;
 	struct page *page;
 
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGE_PAGECACHE))
 		return NULL;
 
 	hindex = round_down(index, HPAGE_PMD_NR);
-	rcu_read_lock();
-	if (radix_tree_gang_lookup_slot(&mapping->i_pages, &results, &idx,
-				hindex, 1) && idx < hindex + HPAGE_PMD_NR) {
-		rcu_read_unlock();
+	if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1,
+								XA_PRESENT))
 		return NULL;
-	}
-	rcu_read_unlock();
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-- 
cgit v1.2.3


From 1cf56f9d670b88b2e947a7ccdb8ba32e6477915d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 9 Apr 2018 16:24:45 -0400
Subject: radix tree: Remove radix_tree_update_node_t

The only user of this functionality was the workingset code, and it's
now been converted to the XArray.  Remove __radix_tree_delete_node()
entirely as it was also only used by the workingset code.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h            |  7 +-----
 lib/idr.c                             |  2 +-
 lib/radix-tree.c                      | 42 +++++++----------------------------
 tools/testing/radix-tree/multiorder.c |  2 +-
 4 files changed, 11 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 8a4280bc350f..7513d0df984b 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -242,17 +242,12 @@ void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
 void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *,
 					unsigned long index);
-typedef void (*radix_tree_update_node_t)(struct radix_tree_node *);
 void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *,
-			  void __rcu **slot, void *entry,
-			  radix_tree_update_node_t update_node);
+			  void __rcu **slot, void *entry);
 void radix_tree_iter_replace(struct radix_tree_root *,
 		const struct radix_tree_iter *, void __rcu **slot, void *entry);
 void radix_tree_replace_slot(struct radix_tree_root *,
 			     void __rcu **slot, void *entry);
-void __radix_tree_delete_node(struct radix_tree_root *,
-			      struct radix_tree_node *,
-			      radix_tree_update_node_t update_node);
 void radix_tree_iter_delete(struct radix_tree_root *,
 			struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
diff --git a/lib/idr.c b/lib/idr.c
index 3c20ad9b0463..cb1db9b8d3f6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -297,7 +297,7 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
 	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
 		return ERR_PTR(-ENOENT);
 
-	__radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL);
+	__radix_tree_replace(&idr->idr_rt, node, slot, ptr);
 
 	return entry;
 }
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 2f9c0e45eeb7..c4c252185734 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -562,8 +562,7 @@ out:
  *	radix_tree_shrink    -    shrink radix tree to minimum height
  *	@root		radix tree root
  */
-static inline bool radix_tree_shrink(struct radix_tree_root *root,
-				     radix_tree_update_node_t update_node)
+static inline bool radix_tree_shrink(struct radix_tree_root *root)
 {
 	bool shrunk = false;
 
@@ -631,8 +630,6 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 		node->count = 0;
 		if (!radix_tree_is_internal_node(child)) {
 			node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
-			if (update_node)
-				update_node(node);
 		}
 
 		WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -644,8 +641,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 }
 
 static bool delete_node(struct radix_tree_root *root,
-			struct radix_tree_node *node,
-			radix_tree_update_node_t update_node)
+			struct radix_tree_node *node)
 {
 	bool deleted = false;
 
@@ -655,7 +651,7 @@ static bool delete_node(struct radix_tree_root *root,
 		if (node->count) {
 			if (node_to_entry(node) ==
 					rcu_dereference_raw(root->xa_head))
-				deleted |= radix_tree_shrink(root, update_node);
+				deleted |= radix_tree_shrink(root);
 			return deleted;
 		}
 
@@ -1059,15 +1055,13 @@ static int calculate_count(struct radix_tree_root *root,
  * @node:		pointer to tree node
  * @slot:		pointer to slot in @node
  * @item:		new item to store in the slot.
- * @update_node:	callback for changing leaf nodes
  *
  * For use with __radix_tree_lookup().  Caller must hold tree write locked
  * across slot lookup and replacement.
  */
 void __radix_tree_replace(struct radix_tree_root *root,
 			  struct radix_tree_node *node,
-			  void __rcu **slot, void *item,
-			  radix_tree_update_node_t update_node)
+			  void __rcu **slot, void *item)
 {
 	void *old = rcu_dereference_raw(*slot);
 	int values = !!xa_is_value(item) - !!xa_is_value(old);
@@ -1085,10 +1079,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 	if (!node)
 		return;
 
-	if (update_node)
-		update_node(node);
-
-	delete_node(root, node, update_node);
+	delete_node(root, node);
 }
 
 /**
@@ -1110,7 +1101,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 void radix_tree_replace_slot(struct radix_tree_root *root,
 			     void __rcu **slot, void *item)
 {
-	__radix_tree_replace(root, NULL, slot, item, NULL);
+	__radix_tree_replace(root, NULL, slot, item);
 }
 EXPORT_SYMBOL(radix_tree_replace_slot);
 
@@ -1127,7 +1118,7 @@ void radix_tree_iter_replace(struct radix_tree_root *root,
 				const struct radix_tree_iter *iter,
 				void __rcu **slot, void *item)
 {
-	__radix_tree_replace(root, iter->node, slot, item, NULL);
+	__radix_tree_replace(root, iter->node, slot, item);
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
@@ -1807,23 +1798,6 @@ radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
 
-/**
- *	__radix_tree_delete_node    -    try to free node after clearing a slot
- *	@root:		radix tree root
- *	@node:		node containing @index
- *	@update_node:	callback for changing leaf nodes
- *
- *	After clearing the slot at @index in @node from radix tree
- *	rooted at @root, call this function to attempt freeing the
- *	node and shrinking the tree.
- */
-void __radix_tree_delete_node(struct radix_tree_root *root,
-			      struct radix_tree_node *node,
-			      radix_tree_update_node_t update_node)
-{
-	delete_node(root, node, update_node);
-}
-
 static bool __radix_tree_delete(struct radix_tree_root *root,
 				struct radix_tree_node *node, void __rcu **slot)
 {
@@ -1839,7 +1813,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
 			node_tag_clear(root, node, tag, offset);
 
 	replace_slot(slot, NULL, node, -1, values);
-	return node && delete_node(root, node, NULL);
+	return node && delete_node(root, node);
 }
 
 /**
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 60786fa55302..0e0ff26c9bcb 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -618,7 +618,7 @@ static void multiorder_account(void)
 	__radix_tree_insert(&tree, 1 << 5, 5, xa_mk_value(5));
 	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
 	assert(node->count == node->nr_values * 2);
-	__radix_tree_replace(&tree, node, slot, NULL, NULL);
+	__radix_tree_replace(&tree, node, slot, NULL);
 	assert(node->nr_values == 0);
 
 	item_kill_tree(&tree);
-- 
cgit v1.2.3


From 2956c6644bfd9aab9f6b21a12e1bd75876d9dd73 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 19 May 2018 16:47:47 -0400
Subject: radix tree: Remove split/join code

radix_tree_split and radix_tree_join were never used upstream.  Remove
them; if they're needed in future they will be replaced by XArray
equivalents.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h            |   6 -
 lib/radix-tree.c                      | 171 +----------------------
 tools/testing/radix-tree/benchmark.c  |  91 -------------
 tools/testing/radix-tree/multiorder.c | 247 ----------------------------------
 4 files changed, 2 insertions(+), 513 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7513d0df984b..44f9abdcb5ab 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -284,12 +284,6 @@ static inline void radix_tree_preload_end(void)
 	preempt_enable();
 }
 
-int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
-int radix_tree_split(struct radix_tree_root *, unsigned long index,
-			unsigned new_order);
-int radix_tree_join(struct radix_tree_root *, unsigned long index,
-			unsigned new_order, void *);
-
 void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
 			      unsigned long max);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c4c252185734..c52e0bef5264 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -415,28 +415,6 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/*
- * Preload with enough objects to ensure that we can split a single entry
- * of order @old_order into many entries of size @new_order
- */
-int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
-							gfp_t gfp_mask)
-{
-	unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
-	unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
-				(new_order / RADIX_TREE_MAP_SHIFT);
-	unsigned nr = 0;
-
-	WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
-	BUG_ON(new_order >= old_order);
-
-	while (layers--)
-		nr = nr * RADIX_TREE_MAP_SIZE + 1;
-	return __radix_tree_preload(gfp_mask, top * nr);
-}
-#endif
-
 /*
  * The same as function above, but preload number of nodes required to insert
  * (1 << order) continuous naturally-aligned elements.
@@ -1111,8 +1089,8 @@ EXPORT_SYMBOL(radix_tree_replace_slot);
  * @slot:	pointer to slot
  * @item:	new item to store in the slot.
  *
- * For use with radix_tree_split() and radix_tree_for_each_slot().
- * Caller must hold tree write locked across split and replacement.
+ * For use with radix_tree_for_each_slot().
+ * Caller must hold tree write locked.
  */
 void radix_tree_iter_replace(struct radix_tree_root *root,
 				const struct radix_tree_iter *iter,
@@ -1121,151 +1099,6 @@ void radix_tree_iter_replace(struct radix_tree_root *root,
 	__radix_tree_replace(root, iter->node, slot, item);
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/**
- * radix_tree_join - replace multiple entries with one multiorder entry
- * @root: radix tree root
- * @index: an index inside the new entry
- * @order: order of the new entry
- * @item: new entry
- *
- * Call this function to replace several entries with one larger entry.
- * The existing entries are presumed to not need freeing as a result of
- * this call.
- *
- * The replacement entry will have all the tags set on it that were set
- * on any of the entries it is replacing.
- */
-int radix_tree_join(struct radix_tree_root *root, unsigned long index,
-			unsigned order, void *item)
-{
-	struct radix_tree_node *node;
-	void __rcu **slot;
-	int error;
-
-	BUG_ON(radix_tree_is_internal_node(item));
-
-	error = __radix_tree_create(root, index, order, &node, &slot);
-	if (!error)
-		error = insert_entries(node, slot, item, order, true);
-	if (error > 0)
-		error = 0;
-
-	return error;
-}
-
-/**
- * radix_tree_split - Split an entry into smaller entries
- * @root: radix tree root
- * @index: An index within the large entry
- * @order: Order of new entries
- *
- * Call this function as the first step in replacing a multiorder entry
- * with several entries of lower order.  After this function returns,
- * loop over the relevant portion of the tree using radix_tree_for_each_slot()
- * and call radix_tree_iter_replace() to set up each new entry.
- *
- * The tags from this entry are replicated to all the new entries.
- *
- * The radix tree should be locked against modification during the entire
- * replacement operation.  Lock-free lookups will see RADIX_TREE_RETRY which
- * should prompt RCU walkers to restart the lookup from the root.
- */
-int radix_tree_split(struct radix_tree_root *root, unsigned long index,
-				unsigned order)
-{
-	struct radix_tree_node *parent, *node, *child;
-	void __rcu **slot;
-	unsigned int offset, end;
-	unsigned n, tag, tags = 0;
-	gfp_t gfp = root_gfp_mask(root);
-
-	if (!__radix_tree_lookup(root, index, &parent, &slot))
-		return -ENOENT;
-	if (!parent)
-		return -ENOENT;
-
-	offset = get_slot_offset(parent, slot);
-
-	for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-		if (tag_get(parent, tag, offset))
-			tags |= 1 << tag;
-
-	for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
-		if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end])))
-			break;
-		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-			if (tags & (1 << tag))
-				tag_set(parent, tag, end);
-		/* rcu_assign_pointer ensures tags are set before RETRY */
-		rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
-	}
-	rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
-	parent->nr_values -= (end - offset);
-
-	if (order == parent->shift)
-		return 0;
-	if (order > parent->shift) {
-		while (offset < end)
-			offset += insert_entries(parent, &parent->slots[offset],
-					RADIX_TREE_RETRY, order, true);
-		return 0;
-	}
-
-	node = parent;
-
-	for (;;) {
-		if (node->shift > order) {
-			child = radix_tree_node_alloc(gfp, node, root,
-					node->shift - RADIX_TREE_MAP_SHIFT,
-					offset, 0, 0);
-			if (!child)
-				goto nomem;
-			if (node != parent) {
-				node->count++;
-				rcu_assign_pointer(node->slots[offset],
-							node_to_entry(child));
-				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-					if (tags & (1 << tag))
-						tag_set(node, tag, offset);
-			}
-
-			node = child;
-			offset = 0;
-			continue;
-		}
-
-		n = insert_entries(node, &node->slots[offset],
-					RADIX_TREE_RETRY, order, false);
-		BUG_ON(n > RADIX_TREE_MAP_SIZE);
-
-		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-			if (tags & (1 << tag))
-				tag_set(node, tag, offset);
-		offset += n;
-
-		while (offset == RADIX_TREE_MAP_SIZE) {
-			if (node == parent)
-				break;
-			offset = node->offset;
-			child = node;
-			node = node->parent;
-			rcu_assign_pointer(node->slots[offset],
-						node_to_entry(child));
-			offset++;
-		}
-		if ((node == parent) && (offset == end))
-			return 0;
-	}
-
- nomem:
-	/* Shouldn't happen; did user forget to preload? */
-	/* TODO: free all the allocated nodes */
-	WARN_ON(1);
-	return -ENOMEM;
-}
-#endif
-
 static void node_tag_set(struct radix_tree_root *root,
 				struct radix_tree_node *node,
 				unsigned int tag, unsigned int offset)
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 99c40f3ed133..35741b9c2a4a 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -146,90 +146,6 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
 	rcu_barrier();
 }
 
-static long long  __benchmark_split(unsigned long index,
-				    int old_order, int new_order)
-{
-	struct timespec start, finish;
-	long long nsec;
-	RADIX_TREE(tree, GFP_ATOMIC);
-
-	item_insert_order(&tree, index, old_order);
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	radix_tree_split(&tree, index, new_order);
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
-	       (finish.tv_nsec - start.tv_nsec);
-
-	item_kill_tree(&tree);
-
-	return nsec;
-
-}
-
-static void benchmark_split(unsigned long size, unsigned long step)
-{
-	int i, j, idx;
-	long long nsec = 0;
-
-
-	for (idx = 0; idx < size; idx += step) {
-		for (i = 3; i < 11; i++) {
-			for (j = 0; j < i; j++) {
-				nsec += __benchmark_split(idx, i, j);
-			}
-		}
-	}
-
-	printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
-			size, step, nsec);
-
-}
-
-static long long  __benchmark_join(unsigned long index,
-			     unsigned order1, unsigned order2)
-{
-	unsigned long loc;
-	struct timespec start, finish;
-	long long nsec;
-	void *item, *item2 = item_create(index + 1, order1);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert_order(&tree, index, order2);
-	item = radix_tree_lookup(&tree, index);
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	radix_tree_join(&tree, index + 1, order1, item2);
-	clock_gettime(CLOCK_MONOTONIC, &finish);
-	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
-		(finish.tv_nsec - start.tv_nsec);
-
-	loc = find_item(&tree, item);
-	if (loc == -1)
-		free(item);
-
-	item_kill_tree(&tree);
-
-	return nsec;
-}
-
-static void benchmark_join(unsigned long step)
-{
-	int i, j, idx;
-	long long nsec = 0;
-
-	for (idx = 0; idx < 1 << 10; idx += step) {
-		for (i = 1; i < 15; i++) {
-			for (j = 0; j < i; j++) {
-				nsec += __benchmark_join(idx, i, j);
-			}
-		}
-	}
-
-	printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
-			1 << 10, step, nsec);
-}
-
 void benchmark(void)
 {
 	unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -247,11 +163,4 @@ void benchmark(void)
 	for (c = 0; size[c]; c++)
 		for (s = 0; step[s]; s++)
 			benchmark_size(size[c], step[s] << 9, 9);
-
-	for (c = 0; size[c]; c++)
-		for (s = 0; step[s]; s++)
-			benchmark_split(size[c], step[s]);
-
-	for (s = 0; step[s]; s++)
-		benchmark_join(step[s]);
 }
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 0e0ff26c9bcb..221e042d1b89 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -356,251 +356,6 @@ void multiorder_tagged_iteration(void)
 	item_kill_tree(&tree);
 }
 
-/*
- * Basic join checks: make sure we can't find an entry in the tree after
- * a larger entry has replaced it
- */
-static void multiorder_join1(unsigned long index,
-				unsigned order1, unsigned order2)
-{
-	unsigned long loc;
-	void *item, *item2 = item_create(index + 1, order1);
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert_order(&tree, index, order2);
-	item = radix_tree_lookup(&tree, index);
-	radix_tree_join(&tree, index + 1, order1, item2);
-	loc = find_item(&tree, item);
-	if (loc == -1)
-		free(item);
-	item = radix_tree_lookup(&tree, index + 1);
-	assert(item == item2);
-	item_kill_tree(&tree);
-}
-
-/*
- * Check that the accounting of value entries is handled correctly
- * by joining a value entry to a normal pointer.
- */
-static void multiorder_join2(unsigned order1, unsigned order2)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void *item1 = item_create(0, order1);
-	void *item2;
-
-	item_insert_order(&tree, 0, order2);
-	radix_tree_insert(&tree, 1 << order2, xa_mk_value(5));
-	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == xa_mk_value(5));
-	assert(node->nr_values == 1);
-
-	item2 = radix_tree_lookup(&tree, 0);
-	free(item2);
-
-	radix_tree_join(&tree, 0, order1, item1);
-	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
-	assert(item2 == item1);
-	assert(node->nr_values == 0);
-	item_kill_tree(&tree);
-}
-
-/*
- * This test revealed an accounting bug for value entries at one point.
- * Nodes were being freed back into the pool with an elevated exception count
- * by radix_tree_join() and then radix_tree_split() was failing to zero the
- * count of value entries.
- */
-static void multiorder_join3(unsigned int order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	struct radix_tree_node *node;
-	void **slot;
-	struct radix_tree_iter iter;
-	unsigned long i;
-
-	for (i = 0; i < (1 << order); i++) {
-		radix_tree_insert(&tree, i, xa_mk_value(5));
-	}
-
-	radix_tree_join(&tree, 0, order, xa_mk_value(7));
-	rcu_barrier();
-
-	radix_tree_split(&tree, 0, 0);
-
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(5));
-	}
-
-	__radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(node->nr_values == node->count);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_join(void)
-{
-	int i, j, idx;
-
-	for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
-		for (i = 1; i < 15; i++) {
-			for (j = 0; j < i; j++) {
-				multiorder_join1(idx, i, j);
-			}
-		}
-	}
-
-	for (i = 1; i < 15; i++) {
-		for (j = 0; j < i; j++) {
-			multiorder_join2(i, j);
-		}
-	}
-
-	for (i = 3; i < 10; i++) {
-		multiorder_join3(i);
-	}
-}
-
-static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
-{
-	struct radix_tree_preload *rtp = &radix_tree_preloads;
-	if (rtp->nr != 0)
-		printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
-							rtp->nr);
-	/*
-	 * Can't check for equality here as some nodes may have been
-	 * RCU-freed while we ran.  But we should never finish with more
-	 * nodes allocated since they should have all been preloaded.
-	 */
-	if (nr_allocated > alloc)
-		printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
-							alloc, nr_allocated);
-}
-
-static void __multiorder_split(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_ATOMIC);
-	void **slot;
-	struct radix_tree_iter iter;
-	unsigned alloc;
-	struct item *item;
-
-	radix_tree_preload(GFP_KERNEL);
-	assert(item_insert_order(&tree, 0, old_order) == 0);
-	radix_tree_preload_end();
-
-	/* Wipe out the preloaded cache or it'll confuse check_mem() */
-	radix_tree_cpu_dead(0);
-
-	item = radix_tree_tag_set(&tree, 0, 2);
-
-	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
-	alloc = nr_allocated;
-	radix_tree_split(&tree, 0, new_order);
-	check_mem(old_order, new_order, alloc);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot,
-					item_create(iter.index, new_order));
-	}
-	radix_tree_preload_end();
-
-	item_kill_tree(&tree);
-	free(item);
-}
-
-static void __multiorder_split2(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	void **slot;
-	struct radix_tree_iter iter;
-	struct radix_tree_node *node;
-	void *item;
-
-	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == xa_mk_value(5));
-	assert(node->nr_values > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot,
-					item_create(iter.index, new_order));
-	}
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item != xa_mk_value(5));
-	assert(node->nr_values == 0);
-
-	item_kill_tree(&tree);
-}
-
-static void __multiorder_split3(int old_order, int new_order)
-{
-	RADIX_TREE(tree, GFP_KERNEL);
-	void **slot;
-	struct radix_tree_iter iter;
-	struct radix_tree_node *node;
-	void *item;
-
-	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == xa_mk_value(5));
-	assert(node->nr_values > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(7));
-	}
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == xa_mk_value(7));
-	assert(node->nr_values > 0);
-
-	item_kill_tree(&tree);
-
-	__radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
-
-	item = __radix_tree_lookup(&tree, 0, &node, NULL);
-	assert(item == xa_mk_value(5));
-	assert(node->nr_values > 0);
-
-	radix_tree_split(&tree, 0, new_order);
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		if (iter.index == (1 << new_order))
-			radix_tree_iter_replace(&tree, &iter, slot,
-						xa_mk_value(7));
-		else
-			radix_tree_iter_replace(&tree, &iter, slot, NULL);
-	}
-
-	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
-	assert(item == xa_mk_value(7));
-	assert(node->count == node->nr_values);
-	do {
-		node = node->parent;
-		if (!node)
-			break;
-		assert(node->count == 1);
-		assert(node->nr_values == 0);
-	} while (1);
-
-	item_kill_tree(&tree);
-}
-
-static void multiorder_split(void)
-{
-	int i, j;
-
-	for (i = 3; i < 11; i++)
-		for (j = 0; j < i; j++) {
-			__multiorder_split(i, j);
-			__multiorder_split2(i, j);
-			__multiorder_split3(i, j);
-		}
-}
-
 static void multiorder_account(void)
 {
 	RADIX_TREE(tree, GFP_KERNEL);
@@ -702,8 +457,6 @@ void multiorder_checks(void)
 	multiorder_tag_tests();
 	multiorder_iteration();
 	multiorder_tagged_iteration();
-	multiorder_join();
-	multiorder_split();
 	multiorder_account();
 	multiorder_iteration_race();
 
-- 
cgit v1.2.3


From 8cf2f98411e3a0865026a1061af637161b16d32b Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 19 May 2018 16:47:47 -0400
Subject: radix tree: Remove radix_tree_maybe_preload_order

This function was only used by the page cache which is now converted
to the XArray.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h |  1 -
 lib/radix-tree.c           | 74 ----------------------------------------------
 2 files changed, 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 44f9abdcb5ab..664d50c5c2ff 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -259,7 +259,6 @@ unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			unsigned int max_items);
 int radix_tree_preload(gfp_t gfp_mask);
 int radix_tree_maybe_preload(gfp_t gfp_mask);
-int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order);
 void radix_tree_init(void);
 void *radix_tree_tag_set(struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c52e0bef5264..b57ddc3dbbbd 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -41,9 +41,6 @@
 #include <linux/xarray.h>
 
 
-/* Number of nodes in fully populated tree of given height */
-static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
-
 /*
  * Radix tree node cache.
  */
@@ -415,51 +412,6 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
-/*
- * The same as function above, but preload number of nodes required to insert
- * (1 << order) continuous naturally-aligned elements.
- */
-int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
-{
-	unsigned long nr_subtrees;
-	int nr_nodes, subtree_height;
-
-	/* Preloading doesn't help anything with this gfp mask, skip it */
-	if (!gfpflags_allow_blocking(gfp_mask)) {
-		preempt_disable();
-		return 0;
-	}
-
-	/*
-	 * Calculate number and height of fully populated subtrees it takes to
-	 * store (1 << order) elements.
-	 */
-	nr_subtrees = 1 << order;
-	for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
-			subtree_height++)
-		nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
-
-	/*
-	 * The worst case is zero height tree with a single item at index 0 and
-	 * then inserting items starting at ULONG_MAX - (1 << order).
-	 *
-	 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
-	 * 0-index item.
-	 */
-	nr_nodes = RADIX_TREE_MAX_PATH;
-
-	/* Plus branch to fully populated subtrees. */
-	nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
-
-	/* Root node is shared. */
-	nr_nodes--;
-
-	/* Plus nodes required to build subtrees. */
-	nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
-
-	return __radix_tree_preload(gfp_mask, nr_nodes);
-}
-
 static unsigned radix_tree_load_root(const struct radix_tree_root *root,
 		struct radix_tree_node **nodep, unsigned long *maxindex)
 {
@@ -1859,31 +1811,6 @@ radix_tree_node_ctor(void *arg)
 	INIT_LIST_HEAD(&node->private_list);
 }
 
-static __init unsigned long __maxindex(unsigned int height)
-{
-	unsigned int width = height * RADIX_TREE_MAP_SHIFT;
-	int shift = RADIX_TREE_INDEX_BITS - width;
-
-	if (shift < 0)
-		return ~0UL;
-	if (shift >= BITS_PER_LONG)
-		return 0UL;
-	return ~0UL >> shift;
-}
-
-static __init void radix_tree_init_maxnodes(void)
-{
-	unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
-	unsigned int i, j;
-
-	for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
-		height_to_maxindex[i] = __maxindex(i);
-	for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
-		for (j = i; j > 0; j--)
-			height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
-	}
-}
-
 static int radix_tree_cpu_dead(unsigned int cpu)
 {
 	struct radix_tree_preload *rtp;
@@ -1911,7 +1838,6 @@ void __init radix_tree_init(void)
 			sizeof(struct radix_tree_node), 0,
 			SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
 			radix_tree_node_ctor);
-	radix_tree_init_maxnodes();
 	ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
 					NULL, radix_tree_cpu_dead);
 	WARN_ON(ret < 0);
-- 
cgit v1.2.3


From adb9d9c4ccb1ff0bf1d376e65f36aa5573c75c1a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Mon, 9 Apr 2018 16:52:21 -0400
Subject: radix tree: Remove radix_tree_clear_tags

The page cache was the only user of this interface and it has now
been converted to the XArray.  Transform the test into a test of
xas_init_marks().

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h           |  2 --
 lib/radix-tree.c                     | 13 ------------
 lib/test_xarray.c                    | 40 ++++++++++++++++++++++++++++++++++++
 tools/testing/radix-tree/tag_check.c | 29 --------------------------
 4 files changed, 40 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 664d50c5c2ff..7b009af3bb1e 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -252,8 +252,6 @@ void radix_tree_iter_delete(struct radix_tree_root *,
 			struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
 void *radix_tree_delete(struct radix_tree_root *, unsigned long);
-void radix_tree_clear_tags(struct radix_tree_root *, struct radix_tree_node *,
-			   void __rcu **slot);
 unsigned int radix_tree_gang_lookup(const struct radix_tree_root *,
 			void **results, unsigned long first_index,
 			unsigned int max_items);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index b57ddc3dbbbd..e92f2b6ae9c5 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1670,19 +1670,6 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_delete);
 
-void radix_tree_clear_tags(struct radix_tree_root *root,
-			   struct radix_tree_node *node,
-			   void __rcu **slot)
-{
-	if (node) {
-		unsigned int tag, offset = get_slot_offset(node, slot);
-		for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-			node_tag_clear(root, node, tag, offset);
-	} else {
-		root_tag_clear_all(root);
-	}
-}
-
 /**
  *	radix_tree_tagged - test whether any items in the tree are tagged
  *	@root:		radix tree root
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 815daffdd8c9..ca86141641cb 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -213,12 +213,52 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
 	XA_BUG_ON(xa, !xa_empty(xa));
 }
 
+static noinline void check_xa_mark_2(struct xarray *xa)
+{
+	XA_STATE(xas, xa, 0);
+	unsigned long index;
+	unsigned int count = 0;
+	void *entry;
+
+	xa_store_index(xa, 0, GFP_KERNEL);
+	xa_set_mark(xa, 0, XA_MARK_0);
+	xas_lock(&xas);
+	xas_load(&xas);
+	xas_init_marks(&xas);
+	xas_unlock(&xas);
+	XA_BUG_ON(xa, !xa_get_mark(xa, 0, XA_MARK_0) == 0);
+
+	for (index = 3500; index < 4500; index++) {
+		xa_store_index(xa, index, GFP_KERNEL);
+		xa_set_mark(xa, index, XA_MARK_0);
+	}
+
+	xas_reset(&xas);
+	rcu_read_lock();
+	xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
+		count++;
+	rcu_read_unlock();
+	XA_BUG_ON(xa, count != 1000);
+
+	xas_lock(&xas);
+	xas_for_each(&xas, entry, ULONG_MAX) {
+		xas_init_marks(&xas);
+		XA_BUG_ON(xa, !xa_get_mark(xa, xas.xa_index, XA_MARK_0));
+		XA_BUG_ON(xa, !xas_get_mark(&xas, XA_MARK_0));
+	}
+	xas_unlock(&xas);
+
+	xa_destroy(xa);
+}
+
 static noinline void check_xa_mark(struct xarray *xa)
 {
 	unsigned long index;
 
 	for (index = 0; index < 16384; index += 4)
 		check_xa_mark_1(xa, index);
+
+	check_xa_mark_2(xa);
 }
 
 static noinline void check_xa_shrink(struct xarray *xa)
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index 543181e4847b..56a42f1c5ab0 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -331,34 +331,6 @@ static void single_check(void)
 	item_kill_tree(&tree);
 }
 
-void radix_tree_clear_tags_test(void)
-{
-	unsigned long index;
-	struct radix_tree_node *node;
-	struct radix_tree_iter iter;
-	void **slot;
-
-	RADIX_TREE(tree, GFP_KERNEL);
-
-	item_insert(&tree, 0);
-	item_tag_set(&tree, 0, 0);
-	__radix_tree_lookup(&tree, 0, &node, &slot);
-	radix_tree_clear_tags(&tree, node, slot);
-	assert(item_tag_get(&tree, 0, 0) == 0);
-
-	for (index = 0; index < 1000; index++) {
-		item_insert(&tree, index);
-		item_tag_set(&tree, index, 0);
-	}
-
-	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
-		radix_tree_clear_tags(&tree, iter.node, slot);
-		assert(item_tag_get(&tree, iter.index, 0) == 0);
-	}
-
-	item_kill_tree(&tree);
-}
-
 void tag_check(void)
 {
 	single_check();
@@ -376,5 +348,4 @@ void tag_check(void)
 	thrash_tags();
 	rcu_barrier();
 	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
-	radix_tree_clear_tags_test();
 }
-- 
cgit v1.2.3


From 372266ba0267803564824b1c09f1bb7f3f3fc761 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 18 Aug 2018 07:09:22 -0400
Subject: radix tree test suite: Convert tag_tagged_items to XArray

The tag_tagged_items() function is supposed to test the page-writeback
tagging code.  Since that has been converted to the XArray, there's
not much point in testing the radix tree's tagging code.  This requires
using the pthread mutex embedded in the xarray instead of an external
lock, so remove the pthread mutexes which protect xarrays/radix trees.
Also remove radix_tree_iter_tag_set() as this was the last user.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h                 |  2 --
 lib/radix-tree.c                           | 12 ----------
 tools/testing/radix-tree/iteration_check.c | 16 ++++++-------
 tools/testing/radix-tree/main.c            |  4 ++--
 tools/testing/radix-tree/multiorder.c      | 12 +++++-----
 tools/testing/radix-tree/regression1.c     | 17 +++++++-------
 tools/testing/radix-tree/regression2.c     |  8 +++----
 tools/testing/radix-tree/tag_check.c       |  4 ++--
 tools/testing/radix-tree/test.c            | 37 ++++++++++++------------------
 tools/testing/radix-tree/test.h            |  5 ++--
 10 files changed, 46 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7b009af3bb1e..9a1460488163 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -264,8 +264,6 @@ void *radix_tree_tag_clear(struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
 int radix_tree_tag_get(const struct radix_tree_root *,
 			unsigned long index, unsigned int tag);
-void radix_tree_iter_tag_set(struct radix_tree_root *,
-		const struct radix_tree_iter *iter, unsigned int tag);
 void radix_tree_iter_tag_clear(struct radix_tree_root *,
 		const struct radix_tree_iter *iter, unsigned int tag);
 unsigned int radix_tree_gang_lookup_tag(const struct radix_tree_root *,
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index e92f2b6ae9c5..f107dd2698e3 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1108,18 +1108,6 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_set);
 
-/**
- * radix_tree_iter_tag_set - set a tag on the current iterator entry
- * @root:	radix tree root
- * @iter:	iterator state
- * @tag:	tag to set
- */
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
-			const struct radix_tree_iter *iter, unsigned int tag)
-{
-	node_tag_set(root, iter->node, tag, iter_offset(iter));
-}
-
 static void node_tag_clear(struct radix_tree_root *root,
 				struct radix_tree_node *node,
 				unsigned int tag, unsigned int offset)
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
index a92bab513701..d047327bb9ef 100644
--- a/tools/testing/radix-tree/iteration_check.c
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -18,10 +18,9 @@
 
 #define NUM_THREADS	5
 #define MAX_IDX		100
-#define TAG		0
-#define NEW_TAG		1
+#define TAG		XA_MARK_0
+#define NEW_TAG		XA_MARK_1
 
-static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
 static pthread_t threads[NUM_THREADS];
 static unsigned int seeds[3];
 static RADIX_TREE(tree, GFP_KERNEL);
@@ -38,7 +37,7 @@ static void *add_entries_fn(void *arg)
 		int order;
 
 		for (pgoff = 0; pgoff < MAX_IDX; pgoff++) {
-			pthread_mutex_lock(&tree_lock);
+			xa_lock(&tree);
 			for (order = max_order; order >= 0; order--) {
 				if (item_insert_order(&tree, pgoff, order)
 						== 0) {
@@ -46,7 +45,7 @@ static void *add_entries_fn(void *arg)
 					break;
 				}
 			}
-			pthread_mutex_unlock(&tree_lock);
+			xa_unlock(&tree);
 		}
 	}
 
@@ -150,9 +149,9 @@ static void *remove_entries_fn(void *arg)
 
 		pgoff = rand_r(&seeds[2]) % MAX_IDX;
 
-		pthread_mutex_lock(&tree_lock);
+		xa_lock(&tree);
 		item_delete(&tree, pgoff);
-		pthread_mutex_unlock(&tree_lock);
+		xa_unlock(&tree);
 	}
 
 	rcu_unregister_thread();
@@ -165,8 +164,7 @@ static void *tag_entries_fn(void *arg)
 	rcu_register_thread();
 
 	while (!test_complete) {
-		tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG,
-					NEW_TAG);
+		tag_tagged_items(&tree, 0, MAX_IDX, 10, TAG, NEW_TAG);
 	}
 	rcu_unregister_thread();
 	return NULL;
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 79589ea570ab..77a44c54998f 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -214,7 +214,7 @@ void copy_tag_check(void)
 	}
 
 //	printf("\ncopying tags...\n");
-	tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
+	tagged = tag_tagged_items(&tree, start, end, ITEMS, XA_MARK_0, XA_MARK_1);
 
 //	printf("checking copied tags\n");
 	assert(tagged == count);
@@ -223,7 +223,7 @@ void copy_tag_check(void)
 	/* Copy tags in several rounds */
 //	printf("\ncopying tags...\n");
 	tmp = rand() % (count / 10 + 2);
-	tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
+	tagged = tag_tagged_items(&tree, start, end, tmp, XA_MARK_0, XA_MARK_2);
 	assert(tagged == count);
 
 //	printf("%lu %lu %lu\n", tagged, tmp, count);
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 221e042d1b89..0436554a099a 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -59,7 +59,7 @@ static void __multiorder_tag_test(int index, int order)
 		assert(!radix_tree_tag_get(&tree, i, 1));
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
+	assert(tag_tagged_items(&tree, 0, ~0UL, 10, XA_MARK_0, XA_MARK_1) == 1);
 	assert(radix_tree_tag_clear(&tree, index, 0));
 
 	for_each_index(i, base, order) {
@@ -87,7 +87,7 @@ static void __multiorder_tag_test2(unsigned order, unsigned long index2)
 	assert(radix_tree_tag_set(&tree, 0, 0));
 	assert(radix_tree_tag_set(&tree, index2, 0));
 
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2);
+	assert(tag_tagged_items(&tree, 0, ~0UL, 10, XA_MARK_0, XA_MARK_1) == 2);
 
 	item_kill_tree(&tree);
 }
@@ -318,8 +318,8 @@ void multiorder_tagged_iteration(void)
 		}
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
-				TAG_ENTRIES);
+	assert(tag_tagged_items(&tree, 0, ~0UL, TAG_ENTRIES, XA_MARK_1,
+				XA_MARK_2) == TAG_ENTRIES);
 
 	for (j = 0; j < 256; j++) {
 		int mask, k;
@@ -345,8 +345,8 @@ void multiorder_tagged_iteration(void)
 		}
 	}
 
-	assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
-			== TAG_ENTRIES);
+	assert(tag_tagged_items(&tree, 1, ~0UL, MT_NUM_ENTRIES * 2, XA_MARK_1,
+				XA_MARK_0) == TAG_ENTRIES);
 	i = 0;
 	radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
 		assert(iter.index == tag_index[i]);
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index b4a4a7168986..a61c7bcbc72d 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -44,7 +44,6 @@
 #include "regression.h"
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
-static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
 
 struct page {
 	pthread_mutex_t lock;
@@ -126,29 +125,29 @@ static void *regression1_fn(void *arg)
 			struct page *p;
 
 			p = page_alloc(0);
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			radix_tree_insert(&mt_tree, 0, p);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 
 			p = page_alloc(1);
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			radix_tree_insert(&mt_tree, 1, p);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			p = radix_tree_delete(&mt_tree, 1);
 			pthread_mutex_lock(&p->lock);
 			p->count--;
 			pthread_mutex_unlock(&p->lock);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 			page_free(p);
 
-			pthread_mutex_lock(&mt_lock);
+			xa_lock(&mt_tree);
 			p = radix_tree_delete(&mt_tree, 0);
 			pthread_mutex_lock(&p->lock);
 			p->count--;
 			pthread_mutex_unlock(&p->lock);
-			pthread_mutex_unlock(&mt_lock);
+			xa_unlock(&mt_tree);
 			page_free(p);
 		}
 	} else {
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
index 424b91c77831..f2c7e640a919 100644
--- a/tools/testing/radix-tree/regression2.c
+++ b/tools/testing/radix-tree/regression2.c
@@ -53,9 +53,9 @@
 #include "regression.h"
 #include "test.h"
 
-#define PAGECACHE_TAG_DIRTY     0
-#define PAGECACHE_TAG_WRITEBACK 1
-#define PAGECACHE_TAG_TOWRITE   2
+#define PAGECACHE_TAG_DIRTY     XA_MARK_0
+#define PAGECACHE_TAG_WRITEBACK XA_MARK_1
+#define PAGECACHE_TAG_TOWRITE   XA_MARK_2
 
 static RADIX_TREE(mt_tree, GFP_KERNEL);
 unsigned long page_count = 0;
@@ -92,7 +92,7 @@ void regression2_test(void)
 	/* 1. */
 	start = 0;
 	end = max_slots - 2;
-	tag_tagged_items(&mt_tree, NULL, start, end, 1,
+	tag_tagged_items(&mt_tree, start, end, 1,
 				PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
 
 	/* 2. */
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
index 56a42f1c5ab0..f898957b1a19 100644
--- a/tools/testing/radix-tree/tag_check.c
+++ b/tools/testing/radix-tree/tag_check.c
@@ -24,7 +24,7 @@ __simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
 	item_tag_set(tree, index, tag);
 	ret = item_tag_get(tree, index, tag);
 	assert(ret != 0);
-	ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
+	ret = tag_tagged_items(tree, first, ~0UL, 10, tag, !tag);
 	assert(ret == 1);
 	ret = item_tag_get(tree, index, !tag);
 	assert(ret != 0);
@@ -321,7 +321,7 @@ static void single_check(void)
 	assert(ret == 0);
 	verify_tag_consistency(&tree, 0);
 	verify_tag_consistency(&tree, 1);
-	ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
+	ret = tag_tagged_items(&tree, first, 10, 10, XA_MARK_0, XA_MARK_1);
 	assert(ret == 1);
 	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
 	assert(ret == 1);
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
index 470419bfd49d..d70adcd03d35 100644
--- a/tools/testing/radix-tree/test.c
+++ b/tools/testing/radix-tree/test.c
@@ -176,35 +176,28 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 }
 
 /* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
-int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
-			unsigned long start, unsigned long end, unsigned batch,
-			unsigned iftag, unsigned thentag)
+int tag_tagged_items(struct xarray *xa, unsigned long start, unsigned long end,
+		unsigned batch, xa_mark_t iftag, xa_mark_t thentag)
 {
-	unsigned long tagged = 0;
-	struct radix_tree_iter iter;
-	void **slot;
+	XA_STATE(xas, xa, start);
+	unsigned int tagged = 0;
+	struct item *item;
 
 	if (batch == 0)
 		batch = 1;
 
-	if (lock)
-		pthread_mutex_lock(lock);
-	radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
-		if (iter.index > end)
-			break;
-		radix_tree_iter_tag_set(root, &iter, thentag);
-		tagged++;
-		if ((tagged % batch) != 0)
+	xas_lock_irq(&xas);
+	xas_for_each_marked(&xas, item, end, iftag) {
+		xas_set_mark(&xas, thentag);
+		if (++tagged % batch)
 			continue;
-		slot = radix_tree_iter_resume(slot, &iter);
-		if (lock) {
-			pthread_mutex_unlock(lock);
-			rcu_barrier();
-			pthread_mutex_lock(lock);
-		}
+
+		xas_pause(&xas);
+		xas_unlock_irq(&xas);
+		rcu_barrier();
+		xas_lock_irq(&xas);
 	}
-	if (lock)
-		pthread_mutex_unlock(lock);
+	xas_unlock_irq(&xas);
 
 	return tagged;
 }
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 9532c18c6cb1..100e9a456f91 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -29,9 +29,8 @@ void item_full_scan(struct radix_tree_root *root, unsigned long start,
 			unsigned long nr, int chunk);
 void item_kill_tree(struct radix_tree_root *root);
 
-int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
-			unsigned long start, unsigned long end, unsigned batch,
-			unsigned iftag, unsigned thentag);
+int tag_tagged_items(struct xarray *, unsigned long start, unsigned long end,
+		unsigned batch, xa_mark_t iftag, xa_mark_t thentag);
 
 void xarray_tests(void);
 void tag_check(void);
-- 
cgit v1.2.3


From 0e9446c35a80931044b6d8d2d74a9cabd248539f Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Wed, 15 Aug 2018 14:13:29 -0400
Subject: xarray: Add range store functionality

This version of xa_store_range() really only supports load and store.
Our only user only needs basic load and store functionality, so there's
no need to do the extra work to support marking and overlapping stores
correctly yet.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 Documentation/core-api/xarray.rst |  8 ++++
 include/linux/xarray.h            |  2 +
 lib/test_xarray.c                 | 34 ++++++++++++++
 lib/xarray.c                      | 97 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 139 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 463e4c798ec1..a4e705108f42 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -98,6 +98,13 @@ the XArray by calling :c:func:`xa_for_each`.  You may prefer to use
 :c:func:`xa_find` or :c:func:`xa_find_after` to move to the next present
 entry in the XArray.
 
+Calling :c:func:`xa_store_range` stores the same entry in a range
+of indices.  If you do this, some of the other operations will behave
+in a slightly odd way.  For example, marking the entry at one index
+may result in the entry being marked at some, but not all of the other
+indices.  Storing into one index may result in the entry retrieved by
+some, but not all of the other indices changing.
+
 Finally, you can remove all entries from an XArray by calling
 :c:func:`xa_destroy`.  If the XArray entries are pointers, you may wish
 to free the entries first.  You can do this by iterating over all present
@@ -156,6 +163,7 @@ Takes xa_lock internally:
  * :c:func:`xa_erase_bh`
  * :c:func:`xa_erase_irq`
  * :c:func:`xa_cmpxchg`
+ * :c:func:`xa_store_range`
  * :c:func:`xa_alloc`
  * :c:func:`xa_alloc_bh`
  * :c:func:`xa_alloc_irq`
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index e0b57af52e9b..d9514928ddac 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -292,6 +292,8 @@ void *xa_store(struct xarray *, unsigned long index, void *entry, gfp_t);
 void *xa_cmpxchg(struct xarray *, unsigned long index,
 			void *old, void *entry, gfp_t);
 int xa_reserve(struct xarray *, unsigned long index, gfp_t);
+void *xa_store_range(struct xarray *, unsigned long first, unsigned long last,
+			void *entry, gfp_t);
 bool xa_get_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_set_mark(struct xarray *, unsigned long index, xa_mark_t);
 void xa_clear_mark(struct xarray *, unsigned long index, xa_mark_t);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 0f06a93b4d0e..aa47754150ce 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1039,6 +1039,39 @@ static noinline void check_create_range(struct xarray *xa)
 	check_create_range_3();
 }
 
+static noinline void __check_store_range(struct xarray *xa, unsigned long first,
+		unsigned long last)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	xa_store_range(xa, first, last, xa_mk_value(first), GFP_KERNEL);
+
+	XA_BUG_ON(xa, xa_load(xa, first) != xa_mk_value(first));
+	XA_BUG_ON(xa, xa_load(xa, last) != xa_mk_value(first));
+	XA_BUG_ON(xa, xa_load(xa, first - 1) != NULL);
+	XA_BUG_ON(xa, xa_load(xa, last + 1) != NULL);
+
+	xa_store_range(xa, first, last, NULL, GFP_KERNEL);
+#endif
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_store_range(struct xarray *xa)
+{
+	unsigned long i, j;
+
+	for (i = 0; i < 128; i++) {
+		for (j = i; j < 128; j++) {
+			__check_store_range(xa, i, j);
+			__check_store_range(xa, 128 + i, 128 + j);
+			__check_store_range(xa, 4095 + i, 4095 + j);
+			__check_store_range(xa, 4096 + i, 4096 + j);
+			__check_store_range(xa, 123456 + i, 123456 + j);
+			__check_store_range(xa, UINT_MAX + i, UINT_MAX + j);
+		}
+	}
+}
+
 static LIST_HEAD(shadow_nodes);
 
 static void test_update_node(struct xa_node *node)
@@ -1184,6 +1217,7 @@ static int xarray_checks(void)
 	check_destroy(&array);
 	check_move(&array);
 	check_create_range(&array);
+	check_store_range(&array);
 	check_store_iter(&array);
 
 	check_workingset(&array, 0);
diff --git a/lib/xarray.c b/lib/xarray.c
index 9a0d49d4b5f0..8b176f009c08 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -376,6 +376,14 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift)
 	return node;
 }
 
+#ifdef CONFIG_XARRAY_MULTI
+/* Returns the number of indices covered by a given xa_state */
+static unsigned long xas_size(const struct xa_state *xas)
+{
+	return (xas->xa_sibs + 1UL) << xas->xa_shift;
+}
+#endif
+
 /*
  * Use this to calculate the maximum index that will need to be created
  * in order to add the entry described by @xas.  Because we cannot store a
@@ -388,8 +396,7 @@ static unsigned long xas_max(struct xa_state *xas)
 
 #ifdef CONFIG_XARRAY_MULTI
 	if (xas->xa_shift || xas->xa_sibs) {
-		unsigned long mask;
-		mask = (((xas->xa_sibs + 1UL) << xas->xa_shift) - 1);
+		unsigned long mask = xas_size(xas) - 1;
 		max |= mask;
 		if (mask == max)
 			max++;
@@ -1517,6 +1524,92 @@ int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
 }
 EXPORT_SYMBOL(xa_reserve);
 
+#ifdef CONFIG_XARRAY_MULTI
+static void xas_set_range(struct xa_state *xas, unsigned long first,
+		unsigned long last)
+{
+	unsigned int shift = 0;
+	unsigned long sibs = last - first;
+	unsigned int offset = XA_CHUNK_MASK;
+
+	xas_set(xas, first);
+
+	while ((first & XA_CHUNK_MASK) == 0) {
+		if (sibs < XA_CHUNK_MASK)
+			break;
+		if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK))
+			break;
+		shift += XA_CHUNK_SHIFT;
+		if (offset == XA_CHUNK_MASK)
+			offset = sibs & XA_CHUNK_MASK;
+		sibs >>= XA_CHUNK_SHIFT;
+		first >>= XA_CHUNK_SHIFT;
+	}
+
+	offset = first & XA_CHUNK_MASK;
+	if (offset + sibs > XA_CHUNK_MASK)
+		sibs = XA_CHUNK_MASK - offset;
+	if ((((first + sibs + 1) << shift) - 1) > last)
+		sibs -= 1;
+
+	xas->xa_shift = shift;
+	xas->xa_sibs = sibs;
+}
+
+/**
+ * xa_store_range() - Store this entry at a range of indices in the XArray.
+ * @xa: XArray.
+ * @first: First index to affect.
+ * @last: Last index to affect.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * After this function returns, loads from any index between @first and @last,
+ * inclusive will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
+ *
+ * Context: Process context.  Takes and releases the xa_lock.  May sleep
+ * if the @gfp flags permit.
+ * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in
+ * an XArray, or xa_err(-ENOMEM) if memory allocation failed.
+ */
+void *xa_store_range(struct xarray *xa, unsigned long first,
+		unsigned long last, void *entry, gfp_t gfp)
+{
+	XA_STATE(xas, xa, 0);
+
+	if (WARN_ON_ONCE(xa_is_internal(entry)))
+		return XA_ERROR(-EINVAL);
+	if (last < first)
+		return XA_ERROR(-EINVAL);
+
+	do {
+		xas_lock(&xas);
+		if (entry) {
+			unsigned int order = (last == ~0UL) ? 64 :
+						ilog2(last + 1);
+			xas_set_order(&xas, last, order);
+			xas_create(&xas);
+			if (xas_error(&xas))
+				goto unlock;
+		}
+		do {
+			xas_set_range(&xas, first, last);
+			xas_store(&xas, entry);
+			if (xas_error(&xas))
+				goto unlock;
+			first += xas_size(&xas);
+		} while (first <= last);
+unlock:
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, gfp));
+
+	return xas_result(&xas, NULL);
+}
+EXPORT_SYMBOL(xa_store_range);
+#endif /* CONFIG_XARRAY_MULTI */
+
 /**
  * __xa_alloc() - Find somewhere to store this entry in the XArray.
  * @xa: XArray.
-- 
cgit v1.2.3


From 3a08cd52c37c793ffc199f6fc2ecfc368e284b2d Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Sat, 22 Sep 2018 16:14:30 -0400
Subject: radix tree: Remove multiorder support

All users have now been converted to the XArray.  Removing the support
reduces code size and ensures new users will use the XArray instead.

Signed-off-by: Matthew Wilcox <willy@infradead.org>
---
 include/linux/radix-tree.h                    |  40 +----
 lib/Kconfig                                   |   4 -
 lib/radix-tree.c                              | 215 ++------------------------
 mm/Kconfig                                    |   4 +-
 tools/testing/radix-tree/generated/autoconf.h |   1 -
 5 files changed, 19 insertions(+), 245 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 9a1460488163..06c4c7a6c09c 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -96,7 +96,6 @@ static inline bool radix_tree_empty(const struct radix_tree_root *root)
  * @next_index:	one beyond the last index for this chunk
  * @tags:	bit-mask for tag-iterating
  * @node:	node that contains current slot
- * @shift:	shift for the node that holds our slots
  *
  * This radix tree iterator works in terms of "chunks" of slots.  A chunk is a
  * subinterval of slots contained within one radix tree leaf node.  It is
@@ -110,20 +109,8 @@ struct radix_tree_iter {
 	unsigned long	next_index;
 	unsigned long	tags;
 	struct radix_tree_node *node;
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	unsigned int	shift;
-#endif
 };
 
-static inline unsigned int iter_shift(const struct radix_tree_iter *iter)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	return iter->shift;
-#else
-	return 0;
-#endif
-}
-
 /**
  * Radix-tree synchronization
  *
@@ -230,13 +217,8 @@ static inline int radix_tree_exception(void *arg)
 	return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
 }
 
-int __radix_tree_insert(struct radix_tree_root *, unsigned long index,
-			unsigned order, void *);
-static inline int radix_tree_insert(struct radix_tree_root *root,
-			unsigned long index, void *entry)
-{
-	return __radix_tree_insert(root, index, 0, entry);
-}
+int radix_tree_insert(struct radix_tree_root *, unsigned long index,
+			void *);
 void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
 			  struct radix_tree_node **nodep, void __rcu ***slotp);
 void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
@@ -384,7 +366,7 @@ void __rcu **radix_tree_iter_retry(struct radix_tree_iter *iter)
 static inline unsigned long
 __radix_tree_iter_add(struct radix_tree_iter *iter, unsigned long slots)
 {
-	return iter->index + (slots << iter_shift(iter));
+	return iter->index + slots;
 }
 
 /**
@@ -409,20 +391,8 @@ void __rcu **__must_check radix_tree_iter_resume(void __rcu **slot,
 static __always_inline long
 radix_tree_chunk_size(struct radix_tree_iter *iter)
 {
-	return (iter->next_index - iter->index) >> iter_shift(iter);
-}
-
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-void __rcu **__radix_tree_next_slot(void __rcu **slot,
-				struct radix_tree_iter *iter, unsigned flags);
-#else
-/* Can't happen without sibling entries, but the compiler can't tell that */
-static inline void __rcu **__radix_tree_next_slot(void __rcu **slot,
-				struct radix_tree_iter *iter, unsigned flags)
-{
-	return slot;
+	return iter->next_index - iter->index;
 }
-#endif
 
 /**
  * radix_tree_next_slot - find next slot in chunk
@@ -482,8 +452,6 @@ static __always_inline void __rcu **radix_tree_next_slot(void __rcu **slot,
 	return NULL;
 
  found:
-	if (unlikely(radix_tree_is_internal_node(rcu_dereference_raw(*slot))))
-		return __radix_tree_next_slot(slot, iter, flags);
 	return slot;
 }
 
diff --git a/lib/Kconfig b/lib/Kconfig
index 40bfa6ccd294..a9965f4af4dd 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -405,10 +405,6 @@ config XARRAY_MULTI
 	  Support entries which occupy multiple consecutive indices in the
 	  XArray.
 
-config RADIX_TREE_MULTIORDER
-	bool
-	select XARRAY_MULTI
-
 config ASSOCIATIVE_ARRAY
 	bool
 	help
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f107dd2698e3..1106bb6aa01e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -110,11 +110,6 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
 	unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
 	void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
 
-	if (xa_is_sibling(entry)) {
-		offset = xa_to_sibling(entry);
-		entry = rcu_dereference_raw(parent->slots[offset]);
-	}
-
 	*nodep = (void *)entry;
 	return offset;
 }
@@ -229,7 +224,7 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
 
 static unsigned int iter_offset(const struct radix_tree_iter *iter)
 {
-	return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
+	return iter->index & RADIX_TREE_MAP_MASK;
 }
 
 /*
@@ -506,16 +501,13 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
 
 		/*
 		 * The candidate node has more than one child, or its child
-		 * is not at the leftmost slot, or the child is a multiorder
-		 * entry, we cannot shrink.
+		 * is not at the leftmost slot, we cannot shrink.
 		 */
 		if (node->count != 1)
 			break;
 		child = rcu_dereference_raw(node->slots[0]);
 		if (!child)
 			break;
-		if (!radix_tree_is_internal_node(child) && node->shift)
-			break;
 
 		/*
 		 * For an IDR, we must not shrink entry 0 into the root in
@@ -613,7 +605,6 @@ static bool delete_node(struct radix_tree_root *root,
  *	__radix_tree_create	-	create a slot in a radix tree
  *	@root:		radix tree root
  *	@index:		index key
- *	@order:		index occupies 2^order aligned slots
  *	@nodep:		returns node
  *	@slotp:		returns slot
  *
@@ -627,21 +618,19 @@ static bool delete_node(struct radix_tree_root *root,
  *	Returns -ENOMEM, or 0 for success.
  */
 static int __radix_tree_create(struct radix_tree_root *root,
-		unsigned long index, unsigned order,
-		struct radix_tree_node **nodep, void __rcu ***slotp)
+		unsigned long index, struct radix_tree_node **nodep,
+		void __rcu ***slotp)
 {
 	struct radix_tree_node *node = NULL, *child;
 	void __rcu **slot = (void __rcu **)&root->xa_head;
 	unsigned long maxindex;
 	unsigned int shift, offset = 0;
-	unsigned long max = index | ((1UL << order) - 1);
+	unsigned long max = index;
 	gfp_t gfp = root_gfp_mask(root);
 
 	shift = radix_tree_load_root(root, &child, &maxindex);
 
 	/* Make sure the tree is high enough.  */
-	if (order > 0 && max == ((1UL << order) - 1))
-		max++;
 	if (max > maxindex) {
 		int error = radix_tree_extend(root, gfp, max, shift);
 		if (error < 0)
@@ -650,7 +639,7 @@ static int __radix_tree_create(struct radix_tree_root *root,
 		child = rcu_dereference_raw(root->xa_head);
 	}
 
-	while (shift > order) {
+	while (shift > 0) {
 		shift -= RADIX_TREE_MAP_SHIFT;
 		if (child == NULL) {
 			/* Have to add a child node.  */
@@ -711,70 +700,8 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
 	}
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-static inline int insert_entries(struct radix_tree_node *node,
-		void __rcu **slot, void *item, unsigned order, bool replace)
-{
-	void *sibling;
-	unsigned i, n, tag, offset, tags = 0;
-
-	if (node) {
-		if (order > node->shift)
-			n = 1 << (order - node->shift);
-		else
-			n = 1;
-		offset = get_slot_offset(node, slot);
-	} else {
-		n = 1;
-		offset = 0;
-	}
-
-	if (n > 1) {
-		offset = offset & ~(n - 1);
-		slot = &node->slots[offset];
-	}
-	sibling = xa_mk_sibling(offset);
-
-	for (i = 0; i < n; i++) {
-		if (slot[i]) {
-			if (replace) {
-				node->count--;
-				for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-					if (tag_get(node, tag, offset + i))
-						tags |= 1 << tag;
-			} else
-				return -EEXIST;
-		}
-	}
-
-	for (i = 0; i < n; i++) {
-		struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
-		if (i) {
-			rcu_assign_pointer(slot[i], sibling);
-			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-				if (tags & (1 << tag))
-					tag_clear(node, tag, offset + i);
-		} else {
-			rcu_assign_pointer(slot[i], item);
-			for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
-				if (tags & (1 << tag))
-					tag_set(node, tag, offset);
-		}
-		if (xa_is_node(old))
-			radix_tree_free_nodes(old);
-		if (xa_is_value(old))
-			node->nr_values--;
-	}
-	if (node) {
-		node->count += n;
-		if (xa_is_value(item))
-			node->nr_values += n;
-	}
-	return n;
-}
-#else
 static inline int insert_entries(struct radix_tree_node *node,
-		void __rcu **slot, void *item, unsigned order, bool replace)
+		void __rcu **slot, void *item, bool replace)
 {
 	if (*slot)
 		return -EEXIST;
@@ -786,19 +713,17 @@ static inline int insert_entries(struct radix_tree_node *node,
 	}
 	return 1;
 }
-#endif
 
 /**
  *	__radix_tree_insert    -    insert into a radix tree
  *	@root:		radix tree root
  *	@index:		index key
- *	@order:		key covers the 2^order indices around index
  *	@item:		item to insert
  *
  *	Insert an item into the radix tree at position @index.
  */
-int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
-			unsigned order, void *item)
+int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
+			void *item)
 {
 	struct radix_tree_node *node;
 	void __rcu **slot;
@@ -806,11 +731,11 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
 
 	BUG_ON(radix_tree_is_internal_node(item));
 
-	error = __radix_tree_create(root, index, order, &node, &slot);
+	error = __radix_tree_create(root, index, &node, &slot);
 	if (error)
 		return error;
 
-	error = insert_entries(node, slot, item, order, false);
+	error = insert_entries(node, slot, item, false);
 	if (error < 0)
 		return error;
 
@@ -825,7 +750,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
 
 	return 0;
 }
-EXPORT_SYMBOL(__radix_tree_insert);
+EXPORT_SYMBOL(radix_tree_insert);
 
 /**
  *	__radix_tree_lookup	-	lookup an item in a radix tree
@@ -917,32 +842,12 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_lookup);
 
-static inline void replace_sibling_entries(struct radix_tree_node *node,
-				void __rcu **slot, int count, int values)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	unsigned offset = get_slot_offset(node, slot);
-	void *ptr = xa_mk_sibling(offset);
-
-	while (++offset < RADIX_TREE_MAP_SIZE) {
-		if (rcu_dereference_raw(node->slots[offset]) != ptr)
-			break;
-		if (count < 0) {
-			node->slots[offset] = NULL;
-			node->count--;
-		}
-		node->nr_values += values;
-	}
-#endif
-}
-
 static void replace_slot(void __rcu **slot, void *item,
 		struct radix_tree_node *node, int count, int values)
 {
 	if (node && (count || values)) {
 		node->count += count;
 		node->nr_values += values;
-		replace_sibling_entries(node, slot, count, values);
 	}
 
 	rcu_assign_pointer(*slot, item);
@@ -1223,14 +1128,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_get);
 
-static inline void __set_iter_shift(struct radix_tree_iter *iter,
-					unsigned int shift)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-	iter->shift = shift;
-#endif
-}
-
 /* Construct iter->tags bit-mask from node->tags[tag] array */
 static void set_iter_tags(struct radix_tree_iter *iter,
 				struct radix_tree_node *node, unsigned offset,
@@ -1257,92 +1154,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
 	}
 }
 
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-static void __rcu **skip_siblings(struct radix_tree_node **nodep,
-			void __rcu **slot, struct radix_tree_iter *iter)
-{
-	while (iter->index < iter->next_index) {
-		*nodep = rcu_dereference_raw(*slot);
-		if (*nodep && !xa_is_sibling(*nodep))
-			return slot;
-		slot++;
-		iter->index = __radix_tree_iter_add(iter, 1);
-		iter->tags >>= 1;
-	}
-
-	*nodep = NULL;
-	return NULL;
-}
-
-void __rcu **__radix_tree_next_slot(void __rcu **slot,
-				struct radix_tree_iter *iter, unsigned flags)
-{
-	unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
-	struct radix_tree_node *node;
-
-	slot = skip_siblings(&node, slot, iter);
-
-	while (radix_tree_is_internal_node(node)) {
-		unsigned offset;
-		unsigned long next_index;
-
-		if (node == RADIX_TREE_RETRY)
-			return slot;
-		node = entry_to_node(node);
-		iter->node = node;
-		iter->shift = node->shift;
-
-		if (flags & RADIX_TREE_ITER_TAGGED) {
-			offset = radix_tree_find_next_bit(node, tag, 0);
-			if (offset == RADIX_TREE_MAP_SIZE)
-				return NULL;
-			slot = &node->slots[offset];
-			iter->index = __radix_tree_iter_add(iter, offset);
-			set_iter_tags(iter, node, offset, tag);
-			node = rcu_dereference_raw(*slot);
-		} else {
-			offset = 0;
-			slot = &node->slots[0];
-			for (;;) {
-				node = rcu_dereference_raw(*slot);
-				if (node)
-					break;
-				slot++;
-				offset++;
-				if (offset == RADIX_TREE_MAP_SIZE)
-					return NULL;
-			}
-			iter->index = __radix_tree_iter_add(iter, offset);
-		}
-		if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
-			goto none;
-		next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
-		if (next_index < iter->next_index)
-			iter->next_index = next_index;
-	}
-
-	return slot;
- none:
-	iter->next_index = 0;
-	return NULL;
-}
-EXPORT_SYMBOL(__radix_tree_next_slot);
-#else
-static void __rcu **skip_siblings(struct radix_tree_node **nodep,
-			void __rcu **slot, struct radix_tree_iter *iter)
-{
-	return slot;
-}
-#endif
-
 void __rcu **radix_tree_iter_resume(void __rcu **slot,
 					struct radix_tree_iter *iter)
 {
-	struct radix_tree_node *node;
-
 	slot++;
 	iter->index = __radix_tree_iter_add(iter, 1);
-	skip_siblings(&node, slot, iter);
 	iter->next_index = iter->index;
 	iter->tags = 0;
 	return NULL;
@@ -1393,7 +1209,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 		iter->next_index = maxindex + 1;
 		iter->tags = 1;
 		iter->node = NULL;
-		__set_iter_shift(iter, 0);
 		return (void __rcu **)&root->xa_head;
 	}
 
@@ -1414,8 +1229,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 				while (++offset	< RADIX_TREE_MAP_SIZE) {
 					void *slot = rcu_dereference_raw(
 							node->slots[offset]);
-					if (xa_is_sibling(slot))
-						continue;
 					if (slot)
 						break;
 				}
@@ -1436,10 +1249,9 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
 	} while (node->shift && radix_tree_is_internal_node(child));
 
 	/* Update the iterator state */
-	iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
+	iter->index = (index &~ node_maxindex(node)) | offset;
 	iter->next_index = (index | node_maxindex(node)) + 1;
 	iter->node = node;
-	__set_iter_shift(iter, node->shift);
 
 	if (flags & RADIX_TREE_ITER_TAGGED)
 		set_iter_tags(iter, node, offset, tag);
@@ -1750,7 +1562,6 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
 	else
 		iter->next_index = 1;
 	iter->node = node;
-	__set_iter_shift(iter, shift);
 	set_iter_tags(iter, node, offset, IDR_FREE);
 
 	return slot;
diff --git a/mm/Kconfig b/mm/Kconfig
index de64ea658716..02301a89089e 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -379,7 +379,7 @@ config TRANSPARENT_HUGEPAGE
 	bool "Transparent Hugepage Support"
 	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select COMPACTION
-	select RADIX_TREE_MULTIORDER
+	select XARRAY_MULTI
 	help
 	  Transparent Hugepages allows the kernel to use huge pages and
 	  huge tlb transparently to the applications whenever possible.
@@ -671,7 +671,7 @@ config ZONE_DEVICE
 	depends on MEMORY_HOTREMOVE
 	depends on SPARSEMEM_VMEMMAP
 	depends on ARCH_HAS_ZONE_DEVICE
-	select RADIX_TREE_MULTIORDER
+	select XARRAY_MULTI
 
 	help
 	  Device memory hotplug support allows for establishing pmem,
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
index ca8e03ad19ac..2218b3cc184e 100644
--- a/tools/testing/radix-tree/generated/autoconf.h
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -1,2 +1 @@
-#define CONFIG_RADIX_TREE_MULTIORDER 1
 #define CONFIG_XARRAY_MULTI 1
-- 
cgit v1.2.3


From 94e6992bb560be8bffb47f287194adf070b57695 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 26 Sep 2018 18:03:16 +0200
Subject: libceph: bump CEPH_MSG_MAX_DATA_LEN

If the read is large enough, we end up spinning in the messenger:

  libceph: osd0 192.168.122.1:6801 io error
  libceph: osd0 192.168.122.1:6801 io error
  libceph: osd0 192.168.122.1:6801 io error

This is a receive side limit, so only reads were affected.

Cc: stable@vger.kernel.org
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/libceph.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 49c93b9308d7..68bb09c29ce8 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -81,7 +81,13 @@ struct ceph_options {
 
 #define CEPH_MSG_MAX_FRONT_LEN	(16*1024*1024)
 #define CEPH_MSG_MAX_MIDDLE_LEN	(16*1024*1024)
-#define CEPH_MSG_MAX_DATA_LEN	(16*1024*1024)
+
+/*
+ * Handle the largest possible rbd object in one message.
+ * There is no limit on the size of cephfs objects, but it has to obey
+ * rsize and wsize mount options anyway.
+ */
+#define CEPH_MSG_MAX_DATA_LEN	(32*1024*1024)
 
 #define CEPH_AUTH_NAME_DEFAULT   "guest"
 
-- 
cgit v1.2.3


From 24639ce56040a8ea890ad8836068c1ad8bd177c7 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 26 Sep 2018 19:12:07 +0200
Subject: libceph: osd_req_op_cls_init() doesn't need to take opcode

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 drivers/block/rbd.c             | 3 +--
 include/linux/ceph/osd_client.h | 5 ++---
 net/ceph/osd_client.c           | 9 ++++-----
 3 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index f2fe692dda40..9cc7ee3b427f 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -2374,8 +2374,7 @@ static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
 	if (!obj_req->osd_req)
 		return -ENOMEM;
 
-	ret = osd_req_op_cls_init(obj_req->osd_req, 0, CEPH_OSD_OP_CALL, "rbd",
-				  "copyup");
+	ret = osd_req_op_cls_init(obj_req->osd_req, 0, "rbd", "copyup");
 	if (ret)
 		return ret;
 
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 02096da01845..f3e828460c91 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -444,9 +444,8 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 					struct page **pages, u64 length,
 					u32 alignment, bool pages_from_pool,
 					bool own_pages);
-extern int osd_req_op_cls_init(struct ceph_osd_request *osd_req,
-					unsigned int which, u16 opcode,
-					const char *class, const char *method);
+int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
+			const char *class, const char *method);
 extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 				 u16 opcode, const char *name, const void *value,
 				 size_t size, u8 cmp_op, u8 cmp_mode);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 60934bd8796c..a871b234cd90 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -767,15 +767,14 @@ void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
 EXPORT_SYMBOL(osd_req_op_extent_dup_last);
 
 int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
-			u16 opcode, const char *class, const char *method)
+			const char *class, const char *method)
 {
-	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which,
-						      opcode, 0);
+	struct ceph_osd_req_op *op;
 	struct ceph_pagelist *pagelist;
 	size_t payload_len = 0;
 	size_t size;
 
-	BUG_ON(opcode != CEPH_OSD_OP_CALL);
+	op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
 
 	pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
 	if (!pagelist)
@@ -4962,7 +4961,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
 	if (ret)
 		goto out_put_req;
 
-	ret = osd_req_op_cls_init(req, 0, CEPH_OSD_OP_CALL, class, method);
+	ret = osd_req_op_cls_init(req, 0, class, method);
 	if (ret)
 		goto out_put_req;
 
-- 
cgit v1.2.3


From 33165d472310262d8c79c7e4d1a17dc60cea7e35 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 28 Sep 2018 15:38:34 +0200
Subject: libceph: introduce ceph_pagelist_alloc()

struct ceph_pagelist cannot be embedded into anything else because it
has its own refcount.  Merge allocation and initialization together.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/acl.c                 |  3 +--
 fs/ceph/mds_client.c          |  3 +--
 fs/ceph/xattr.c               |  3 +--
 include/linux/ceph/pagelist.h | 11 +----------
 net/ceph/osd_client.c         | 14 ++++----------
 net/ceph/pagelist.c           | 20 ++++++++++++++++++++
 6 files changed, 28 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 8a9b562ae4ca..5f0103f40079 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -206,10 +206,9 @@ int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
 	tmp_buf = kmalloc(max(val_size1, val_size2), GFP_KERNEL);
 	if (!tmp_buf)
 		goto out_err;
-	pagelist = kmalloc(sizeof(struct ceph_pagelist), GFP_KERNEL);
+	pagelist = ceph_pagelist_alloc(GFP_KERNEL);
 	if (!pagelist)
 		goto out_err;
-	ceph_pagelist_init(pagelist);
 
 	err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
 	if (err)
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index bc43c822426a..580a79b9a91f 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3126,10 +3126,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 
 	pr_info("mds%d reconnect start\n", mds);
 
-	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!pagelist)
 		goto fail_nopagelist;
-	ceph_pagelist_init(pagelist);
 
 	reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false);
 	if (!reply)
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 5cc8b94f8206..316f6ad10644 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -951,11 +951,10 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
 
 	if (size > 0) {
 		/* copy value into pagelist */
-		pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+		pagelist = ceph_pagelist_alloc(GFP_NOFS);
 		if (!pagelist)
 			return -ENOMEM;
 
-		ceph_pagelist_init(pagelist);
 		err = ceph_pagelist_append(pagelist, value, size);
 		if (err)
 			goto out;
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index d0223364349f..5dead8486fd8 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -23,16 +23,7 @@ struct ceph_pagelist_cursor {
 	size_t room;		    /* room remaining to reset to */
 };
 
-static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
-{
-	INIT_LIST_HEAD(&pl->head);
-	pl->mapped_tail = NULL;
-	pl->length = 0;
-	pl->room = 0;
-	INIT_LIST_HEAD(&pl->free_list);
-	pl->num_pages_free = 0;
-	refcount_set(&pl->refcnt, 1);
-}
+struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags);
 
 extern void ceph_pagelist_release(struct ceph_pagelist *pl);
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a871b234cd90..db2ebc9e5f1f 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -776,12 +776,10 @@ int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 
 	op = _osd_req_op_init(osd_req, which, CEPH_OSD_OP_CALL, 0);
 
-	pagelist = kmalloc(sizeof (*pagelist), GFP_NOFS);
+	pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!pagelist)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pagelist);
-
 	op->cls.class_name = class;
 	size = strlen(class);
 	BUG_ON(size > (size_t) U8_MAX);
@@ -814,12 +812,10 @@ int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
 
 	BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
 
-	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	pagelist = ceph_pagelist_alloc(GFP_NOFS);
 	if (!pagelist)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pagelist);
-
 	payload_len = strlen(name);
 	op->xattr.name_len = payload_len;
 	ceph_pagelist_append(pagelist, name, payload_len);
@@ -4598,11 +4594,10 @@ static int osd_req_op_notify_ack_init(struct ceph_osd_request *req, int which,
 
 	op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY_ACK, 0);
 
-	pl = kmalloc(sizeof(*pl), GFP_NOIO);
+	pl = ceph_pagelist_alloc(GFP_NOIO);
 	if (!pl)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pl);
 	ret = ceph_pagelist_encode_64(pl, notify_id);
 	ret |= ceph_pagelist_encode_64(pl, cookie);
 	if (payload) {
@@ -4669,11 +4664,10 @@ static int osd_req_op_notify_init(struct ceph_osd_request *req, int which,
 	op = _osd_req_op_init(req, which, CEPH_OSD_OP_NOTIFY, 0);
 	op->notify.cookie = cookie;
 
-	pl = kmalloc(sizeof(*pl), GFP_NOIO);
+	pl = ceph_pagelist_alloc(GFP_NOIO);
 	if (!pl)
 		return -ENOMEM;
 
-	ceph_pagelist_init(pl);
 	ret = ceph_pagelist_encode_32(pl, 1); /* prot_ver */
 	ret |= ceph_pagelist_encode_32(pl, timeout);
 	ret |= ceph_pagelist_encode_32(pl, payload_len);
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 2ea0564771d2..65e34f78b05d 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -6,6 +6,26 @@
 #include <linux/highmem.h>
 #include <linux/ceph/pagelist.h>
 
+struct ceph_pagelist *ceph_pagelist_alloc(gfp_t gfp_flags)
+{
+	struct ceph_pagelist *pl;
+
+	pl = kmalloc(sizeof(*pl), gfp_flags);
+	if (!pl)
+		return NULL;
+
+	INIT_LIST_HEAD(&pl->head);
+	pl->mapped_tail = NULL;
+	pl->length = 0;
+	pl->room = 0;
+	INIT_LIST_HEAD(&pl->free_list);
+	pl->num_pages_free = 0;
+	refcount_set(&pl->refcnt, 1);
+
+	return pl;
+}
+EXPORT_SYMBOL(ceph_pagelist_alloc);
+
 static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 {
 	if (pl->mapped_tail) {
-- 
cgit v1.2.3


From 0d9c1ab3be4c0187663096a6a084421d0a1e45c6 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Mon, 15 Oct 2018 17:38:23 +0200
Subject: libceph: preallocate message data items

Currently message data items are allocated with ceph_msg_data_create()
in setup_request_data() inside send_request().  send_request() has never
been allowed to fail, so each allocation is followed by a BUG_ON:

  data = ceph_msg_data_create(...);
  BUG_ON(!data);

It's been this way since support for multiple message data items was
added in commit 6644ed7b7e04 ("libceph: make message data be a pointer")
in 3.10.

There is no reason to delay the allocation of message data items until
the last possible moment and we certainly don't need a linked list of
them as they are only ever appended to the end and never erased.  Make
ceph_msg_new2() take max_data_items and adapt the rest of the code.

Reported-by: Jerry Lee <leisurelysw24@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/mds_client.c           |   4 +-
 include/linux/ceph/messenger.h |  24 ++--------
 include/linux/ceph/msgpool.h   |  11 +++--
 net/ceph/messenger.c           | 106 +++++++++++++++--------------------------
 net/ceph/msgpool.c             |  25 ++++++----
 net/ceph/osd_client.c          | 102 +++++++++++++++++++++++++++++++++------
 6 files changed, 157 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 97de674ea377..67a9aeb2f4ec 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2071,7 +2071,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 	if (req->r_old_dentry_drop)
 		len += req->r_old_dentry->d_name.len;
 
-	msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false);
+	msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
 	if (!msg) {
 		msg = ERR_PTR(-ENOMEM);
 		goto out_free2;
@@ -3129,7 +3129,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	if (!pagelist)
 		goto fail_nopagelist;
 
-	reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false);
+	reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false);
 	if (!reply)
 		goto fail_nomsg;
 
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index fc2b4491ee0a..800a2128d411 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -82,22 +82,6 @@ enum ceph_msg_data_type {
 	CEPH_MSG_DATA_BVECS,	/* data source/destination is a bio_vec array */
 };
 
-static __inline__ bool ceph_msg_data_type_valid(enum ceph_msg_data_type type)
-{
-	switch (type) {
-	case CEPH_MSG_DATA_NONE:
-	case CEPH_MSG_DATA_PAGES:
-	case CEPH_MSG_DATA_PAGELIST:
-#ifdef CONFIG_BLOCK
-	case CEPH_MSG_DATA_BIO:
-#endif /* CONFIG_BLOCK */
-	case CEPH_MSG_DATA_BVECS:
-		return true;
-	default:
-		return false;
-	}
-}
-
 #ifdef CONFIG_BLOCK
 
 struct ceph_bio_iter {
@@ -181,7 +165,6 @@ struct ceph_bvec_iter {
 } while (0)
 
 struct ceph_msg_data {
-	struct list_head		links;	/* ceph_msg->data */
 	enum ceph_msg_data_type		type;
 	union {
 #ifdef CONFIG_BLOCK
@@ -202,7 +185,6 @@ struct ceph_msg_data {
 
 struct ceph_msg_data_cursor {
 	size_t			total_resid;	/* across all data items */
-	struct list_head	*data_head;	/* = &ceph_msg->data */
 
 	struct ceph_msg_data	*data;		/* current data item */
 	size_t			resid;		/* bytes not yet consumed */
@@ -240,7 +222,9 @@ struct ceph_msg {
 	struct ceph_buffer *middle;
 
 	size_t				data_length;
-	struct list_head		data;
+	struct ceph_msg_data		*data;
+	int				num_data_items;
+	int				max_data_items;
 	struct ceph_msg_data_cursor	cursor;
 
 	struct ceph_connection *con;
@@ -381,6 +365,8 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 			     struct ceph_bvec_iter *bvec_pos);
 
+struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
+			       gfp_t flags, bool can_fail);
 extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 				     bool can_fail);
 
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
index 76c98a512758..729cdf700eae 100644
--- a/include/linux/ceph/msgpool.h
+++ b/include/linux/ceph/msgpool.h
@@ -13,14 +13,15 @@ struct ceph_msgpool {
 	mempool_t *pool;
 	int type;               /* preallocated message type */
 	int front_len;          /* preallocated payload size */
+	int max_data_items;
 };
 
-extern int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
-			     int front_len, int size, bool blocking,
-			     const char *name);
+int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
+		      int front_len, int max_data_items, int size,
+		      const char *name);
 extern void ceph_msgpool_destroy(struct ceph_msgpool *pool);
-extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *,
-					 int front_len);
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
+				  int max_data_items);
 extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *);
 
 #endif
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 76684edc43ef..88e35830198c 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -156,7 +156,6 @@ static bool con_flag_test_and_set(struct ceph_connection *con,
 /* Slab caches for frequently-allocated structures */
 
 static struct kmem_cache	*ceph_msg_cache;
-static struct kmem_cache	*ceph_msg_data_cache;
 
 /* static tag bytes (protocol control messages) */
 static char tag_msg = CEPH_MSGR_TAG_MSG;
@@ -235,23 +234,11 @@ static int ceph_msgr_slab_init(void)
 	if (!ceph_msg_cache)
 		return -ENOMEM;
 
-	BUG_ON(ceph_msg_data_cache);
-	ceph_msg_data_cache = KMEM_CACHE(ceph_msg_data, 0);
-	if (ceph_msg_data_cache)
-		return 0;
-
-	kmem_cache_destroy(ceph_msg_cache);
-	ceph_msg_cache = NULL;
-
-	return -ENOMEM;
+	return 0;
 }
 
 static void ceph_msgr_slab_exit(void)
 {
-	BUG_ON(!ceph_msg_data_cache);
-	kmem_cache_destroy(ceph_msg_data_cache);
-	ceph_msg_data_cache = NULL;
-
 	BUG_ON(!ceph_msg_cache);
 	kmem_cache_destroy(ceph_msg_cache);
 	ceph_msg_cache = NULL;
@@ -1141,16 +1128,13 @@ static void __ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor)
 static void ceph_msg_data_cursor_init(struct ceph_msg *msg, size_t length)
 {
 	struct ceph_msg_data_cursor *cursor = &msg->cursor;
-	struct ceph_msg_data *data;
 
 	BUG_ON(!length);
 	BUG_ON(length > msg->data_length);
-	BUG_ON(list_empty(&msg->data));
+	BUG_ON(!msg->num_data_items);
 
-	cursor->data_head = &msg->data;
 	cursor->total_resid = length;
-	data = list_first_entry(&msg->data, struct ceph_msg_data, links);
-	cursor->data = data;
+	cursor->data = msg->data;
 
 	__ceph_msg_data_cursor_init(cursor);
 }
@@ -1231,8 +1215,7 @@ static void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor,
 
 	if (!cursor->resid && cursor->total_resid) {
 		WARN_ON(!cursor->last_piece);
-		BUG_ON(list_is_last(&cursor->data->links, cursor->data_head));
-		cursor->data = list_next_entry(cursor->data, links);
+		cursor->data++;
 		__ceph_msg_data_cursor_init(cursor);
 		new_piece = true;
 	}
@@ -1248,9 +1231,6 @@ static size_t sizeof_footer(struct ceph_connection *con)
 
 static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
 {
-	BUG_ON(!msg);
-	BUG_ON(!data_len);
-
 	/* Initialize data cursor */
 
 	ceph_msg_data_cursor_init(msg, (size_t)data_len);
@@ -1590,7 +1570,7 @@ static int write_partial_message_data(struct ceph_connection *con)
 
 	dout("%s %p msg %p\n", __func__, con, msg);
 
-	if (list_empty(&msg->data))
+	if (!msg->num_data_items)
 		return -EINVAL;
 
 	/*
@@ -2347,8 +2327,7 @@ static int read_partial_msg_data(struct ceph_connection *con)
 	u32 crc = 0;
 	int ret;
 
-	BUG_ON(!msg);
-	if (list_empty(&msg->data))
+	if (!msg->num_data_items)
 		return -EIO;
 
 	if (do_datacrc)
@@ -3256,32 +3235,16 @@ bool ceph_con_keepalive_expired(struct ceph_connection *con,
 	return false;
 }
 
-static struct ceph_msg_data *ceph_msg_data_create(enum ceph_msg_data_type type)
+static struct ceph_msg_data *ceph_msg_data_add(struct ceph_msg *msg)
 {
-	struct ceph_msg_data *data;
-
-	if (WARN_ON(!ceph_msg_data_type_valid(type)))
-		return NULL;
-
-	data = kmem_cache_zalloc(ceph_msg_data_cache, GFP_NOFS);
-	if (!data)
-		return NULL;
-
-	data->type = type;
-	INIT_LIST_HEAD(&data->links);
-
-	return data;
+	BUG_ON(msg->num_data_items >= msg->max_data_items);
+	return &msg->data[msg->num_data_items++];
 }
 
 static void ceph_msg_data_destroy(struct ceph_msg_data *data)
 {
-	if (!data)
-		return;
-
-	WARN_ON(!list_empty(&data->links));
 	if (data->type == CEPH_MSG_DATA_PAGELIST)
 		ceph_pagelist_release(data->pagelist);
-	kmem_cache_free(ceph_msg_data_cache, data);
 }
 
 void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
@@ -3292,13 +3255,12 @@ void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages,
 	BUG_ON(!pages);
 	BUG_ON(!length);
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGES);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_PAGES;
 	data->pages = pages;
 	data->length = length;
 	data->alignment = alignment & ~PAGE_MASK;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_pages);
@@ -3311,12 +3273,11 @@ void ceph_msg_data_add_pagelist(struct ceph_msg *msg,
 	BUG_ON(!pagelist);
 	BUG_ON(!pagelist->length);
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_PAGELIST);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_PAGELIST;
 	refcount_inc(&pagelist->refcnt);
 	data->pagelist = pagelist;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += pagelist->length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_pagelist);
@@ -3327,12 +3288,11 @@ void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos,
 {
 	struct ceph_msg_data *data;
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_BIO);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_BIO;
 	data->bio_pos = *bio_pos;
 	data->bio_length = length;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += length;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bio);
@@ -3343,11 +3303,10 @@ void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
 {
 	struct ceph_msg_data *data;
 
-	data = ceph_msg_data_create(CEPH_MSG_DATA_BVECS);
-	BUG_ON(!data);
+	data = ceph_msg_data_add(msg);
+	data->type = CEPH_MSG_DATA_BVECS;
 	data->bvec_pos = *bvec_pos;
 
-	list_add_tail(&data->links, &msg->data);
 	msg->data_length += bvec_pos->iter.bi_size;
 }
 EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
@@ -3356,8 +3315,8 @@ EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
  * construct a new message with given type, size
  * the new msg has a ref count of 1.
  */
-struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
-			      bool can_fail)
+struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
+			       gfp_t flags, bool can_fail)
 {
 	struct ceph_msg *m;
 
@@ -3371,7 +3330,6 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 
 	INIT_LIST_HEAD(&m->list_head);
 	kref_init(&m->kref);
-	INIT_LIST_HEAD(&m->data);
 
 	/* front */
 	if (front_len) {
@@ -3386,6 +3344,15 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
 	}
 	m->front_alloc_len = m->front.iov_len = front_len;
 
+	if (max_data_items) {
+		m->data = kmalloc_array(max_data_items, sizeof(*m->data),
+					flags);
+		if (!m->data)
+			goto out2;
+
+		m->max_data_items = max_data_items;
+	}
+
 	dout("ceph_msg_new %p front %d\n", m, front_len);
 	return m;
 
@@ -3402,6 +3369,13 @@ out:
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(ceph_msg_new2);
+
+struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags,
+			      bool can_fail)
+{
+	return ceph_msg_new2(type, front_len, 0, flags, can_fail);
+}
 EXPORT_SYMBOL(ceph_msg_new);
 
 /*
@@ -3497,13 +3471,14 @@ static void ceph_msg_free(struct ceph_msg *m)
 {
 	dout("%s %p\n", __func__, m);
 	kvfree(m->front.iov_base);
+	kfree(m->data);
 	kmem_cache_free(ceph_msg_cache, m);
 }
 
 static void ceph_msg_release(struct kref *kref)
 {
 	struct ceph_msg *m = container_of(kref, struct ceph_msg, kref);
-	struct ceph_msg_data *data, *next;
+	int i;
 
 	dout("%s %p\n", __func__, m);
 	WARN_ON(!list_empty(&m->list_head));
@@ -3516,11 +3491,8 @@ static void ceph_msg_release(struct kref *kref)
 		m->middle = NULL;
 	}
 
-	list_for_each_entry_safe(data, next, &m->data, links) {
-		list_del_init(&data->links);
-		ceph_msg_data_destroy(data);
-	}
-	m->data_length = 0;
+	for (i = 0; i < m->num_data_items; i++)
+		ceph_msg_data_destroy(&m->data[i]);
 
 	if (m->pool)
 		ceph_msgpool_put(m->pool, m);
diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c
index 3dddc074f0d7..e3ecb80cd182 100644
--- a/net/ceph/msgpool.c
+++ b/net/ceph/msgpool.c
@@ -14,7 +14,8 @@ static void *msgpool_alloc(gfp_t gfp_mask, void *arg)
 	struct ceph_msgpool *pool = arg;
 	struct ceph_msg *msg;
 
-	msg = ceph_msg_new(pool->type, pool->front_len, gfp_mask, true);
+	msg = ceph_msg_new2(pool->type, pool->front_len, pool->max_data_items,
+			    gfp_mask, true);
 	if (!msg) {
 		dout("msgpool_alloc %s failed\n", pool->name);
 	} else {
@@ -35,11 +36,13 @@ static void msgpool_free(void *element, void *arg)
 }
 
 int ceph_msgpool_init(struct ceph_msgpool *pool, int type,
-		      int front_len, int size, bool blocking, const char *name)
+		      int front_len, int max_data_items, int size,
+		      const char *name)
 {
 	dout("msgpool %s init\n", name);
 	pool->type = type;
 	pool->front_len = front_len;
+	pool->max_data_items = max_data_items;
 	pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool);
 	if (!pool->pool)
 		return -ENOMEM;
@@ -53,18 +56,21 @@ void ceph_msgpool_destroy(struct ceph_msgpool *pool)
 	mempool_destroy(pool->pool);
 }
 
-struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool,
-				  int front_len)
+struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len,
+				  int max_data_items)
 {
 	struct ceph_msg *msg;
 
-	if (front_len > pool->front_len) {
-		dout("msgpool_get %s need front %d, pool size is %d\n",
-		       pool->name, front_len, pool->front_len);
+	if (front_len > pool->front_len ||
+	    max_data_items > pool->max_data_items) {
+		pr_warn_ratelimited("%s need %d/%d, pool %s has %d/%d\n",
+		    __func__, front_len, max_data_items, pool->name,
+		    pool->front_len, pool->max_data_items);
 		WARN_ON_ONCE(1);
 
 		/* try to alloc a fresh message */
-		return ceph_msg_new(pool->type, front_len, GFP_NOFS, false);
+		return ceph_msg_new2(pool->type, front_len, max_data_items,
+				     GFP_NOFS, false);
 	}
 
 	msg = mempool_alloc(pool->pool, GFP_NOFS);
@@ -80,6 +86,9 @@ void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg)
 	msg->front.iov_len = pool->front_len;
 	msg->hdr.front_len = cpu_to_le32(pool->front_len);
 
+	msg->data_length = 0;
+	msg->num_data_items = 0;
+
 	kref_init(&msg->kref);  /* retake single ref */
 	mempool_free(msg, pool->pool);
 }
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 7ac7f21ff317..cf0bd2cce848 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -614,12 +614,15 @@ static int ceph_oloc_encoding_size(const struct ceph_object_locator *oloc)
 	return 8 + 4 + 4 + 4 + (oloc->pool_ns ? oloc->pool_ns->len : 0);
 }
 
-int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+static int __ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp,
+				      int num_request_data_items,
+				      int num_reply_data_items)
 {
 	struct ceph_osd_client *osdc = req->r_osdc;
 	struct ceph_msg *msg;
 	int msg_size;
 
+	WARN_ON(req->r_request || req->r_reply);
 	WARN_ON(ceph_oid_empty(&req->r_base_oid));
 	WARN_ON(ceph_oloc_empty(&req->r_base_oloc));
 
@@ -641,9 +644,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 	msg_size += 4 + 8; /* retry_attempt, features */
 
 	if (req->r_mempool)
-		msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size);
+		msg = ceph_msgpool_get(&osdc->msgpool_op, msg_size,
+				       num_request_data_items);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, gfp, true);
+		msg = ceph_msg_new2(CEPH_MSG_OSD_OP, msg_size,
+				    num_request_data_items, gfp, true);
 	if (!msg)
 		return -ENOMEM;
 
@@ -656,9 +661,11 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 	msg_size += req->r_num_ops * sizeof(struct ceph_osd_op);
 
 	if (req->r_mempool)
-		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size);
+		msg = ceph_msgpool_get(&osdc->msgpool_op_reply, msg_size,
+				       num_reply_data_items);
 	else
-		msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, msg_size, gfp, true);
+		msg = ceph_msg_new2(CEPH_MSG_OSD_OPREPLY, msg_size,
+				    num_reply_data_items, gfp, true);
 	if (!msg)
 		return -ENOMEM;
 
@@ -666,7 +673,6 @@ int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
 
 	return 0;
 }
-EXPORT_SYMBOL(ceph_osdc_alloc_messages);
 
 static bool osd_req_opcode_valid(u16 opcode)
 {
@@ -679,6 +685,64 @@ __CEPH_FORALL_OSD_OPS(GENERATE_CASE)
 	}
 }
 
+static void get_num_data_items(struct ceph_osd_request *req,
+			       int *num_request_data_items,
+			       int *num_reply_data_items)
+{
+	struct ceph_osd_req_op *op;
+
+	*num_request_data_items = 0;
+	*num_reply_data_items = 0;
+
+	for (op = req->r_ops; op != &req->r_ops[req->r_num_ops]; op++) {
+		switch (op->op) {
+		/* request */
+		case CEPH_OSD_OP_WRITE:
+		case CEPH_OSD_OP_WRITEFULL:
+		case CEPH_OSD_OP_SETXATTR:
+		case CEPH_OSD_OP_CMPXATTR:
+		case CEPH_OSD_OP_NOTIFY_ACK:
+			*num_request_data_items += 1;
+			break;
+
+		/* reply */
+		case CEPH_OSD_OP_STAT:
+		case CEPH_OSD_OP_READ:
+		case CEPH_OSD_OP_LIST_WATCHERS:
+			*num_reply_data_items += 1;
+			break;
+
+		/* both */
+		case CEPH_OSD_OP_NOTIFY:
+			*num_request_data_items += 1;
+			*num_reply_data_items += 1;
+			break;
+		case CEPH_OSD_OP_CALL:
+			*num_request_data_items += 2;
+			*num_reply_data_items += 1;
+			break;
+
+		default:
+			WARN_ON(!osd_req_opcode_valid(op->op));
+			break;
+		}
+	}
+}
+
+/*
+ * oid, oloc and OSD op opcode(s) must be filled in before this function
+ * is called.
+ */
+int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp)
+{
+	int num_request_data_items, num_reply_data_items;
+
+	get_num_data_items(req, &num_request_data_items, &num_reply_data_items);
+	return __ceph_osdc_alloc_messages(req, gfp, num_request_data_items,
+					  num_reply_data_items);
+}
+EXPORT_SYMBOL(ceph_osdc_alloc_messages);
+
 /*
  * This is an osd op init function for opcodes that have no data or
  * other information associated with them.  It also serves as a
@@ -1035,7 +1099,15 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	if (flags & CEPH_OSD_FLAG_WRITE)
 		req->r_data_offset = off;
 
-	r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+	if (num_ops > 1)
+		/*
+		 * This is a special case for ceph_writepages_start(), but it
+		 * also covers ceph_uninline_data().  If more multi-op request
+		 * use cases emerge, we will need a separate helper.
+		 */
+		r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
+	else
+		r = ceph_osdc_alloc_messages(req, GFP_NOFS);
 	if (r)
 		goto fail;
 
@@ -1842,13 +1914,16 @@ static bool should_plug_request(struct ceph_osd_request *req)
 	return true;
 }
 
+/*
+ * Keep get_num_data_items() in sync with this function.
+ */
 static void setup_request_data(struct ceph_osd_request *req,
 			       struct ceph_msg *msg)
 {
 	u32 data_len = 0;
 	int i;
 
-	if (!list_empty(&msg->data))
+	if (msg->num_data_items)
 		return;
 
 	WARN_ON(msg->data_length);
@@ -4325,9 +4400,7 @@ static void handle_watch_notify(struct ceph_osd_client *osdc,
 			     lreq->notify_id, notify_id);
 		} else if (!completion_done(&lreq->notify_finish_wait)) {
 			struct ceph_msg_data *data =
-			    list_first_entry_or_null(&msg->data,
-						     struct ceph_msg_data,
-						     links);
+			    msg->num_data_items ? &msg->data[0] : NULL;
 
 			if (data) {
 				if (lreq->preply_pages) {
@@ -5036,11 +5109,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
 		goto out_map;
 
 	err = ceph_msgpool_init(&osdc->msgpool_op, CEPH_MSG_OSD_OP,
-				PAGE_SIZE, 10, true, "osd_op");
+				PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10, "osd_op");
 	if (err < 0)
 		goto out_mempool;
 	err = ceph_msgpool_init(&osdc->msgpool_op_reply, CEPH_MSG_OSD_OPREPLY,
-				PAGE_SIZE, 10, true, "osd_op_reply");
+				PAGE_SIZE, CEPH_OSD_SLAB_OPS, 10,
+				"osd_op_reply");
 	if (err < 0)
 		goto out_msgpool;
 
@@ -5310,7 +5384,7 @@ static struct ceph_msg *alloc_msg_with_page_vector(struct ceph_msg_header *hdr)
 	u32 front_len = le32_to_cpu(hdr->front_len);
 	u32 data_len = le32_to_cpu(hdr->data_len);
 
-	m = ceph_msg_new(type, front_len, GFP_NOIO, false);
+	m = ceph_msg_new2(type, front_len, 1, GFP_NOIO, false);
 	if (!m)
 		return NULL;
 
-- 
cgit v1.2.3


From 23ddf9bea900b4ce39e5707e1ddf66062cfe6d91 Mon Sep 17 00:00:00 2001
From: Luis Henriques <lhenriques@suse.com>
Date: Mon, 15 Oct 2018 16:45:58 +0100
Subject: libceph: support the RADOS copy-from operation

Add support for performing remote object copies using the 'copy-from'
operation.

[ Add COPY_FROM to get_num_data_items(). ]

Signed-off-by: Luis Henriques <lhenriques@suse.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 include/linux/ceph/osd_client.h | 17 ++++++++
 include/linux/ceph/rados.h      | 28 +++++++++++++
 net/ceph/osd_client.c           | 90 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 135 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index f3e828460c91..7a2af5034278 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -136,6 +136,13 @@ struct ceph_osd_req_op {
 			u64 expected_object_size;
 			u64 expected_write_size;
 		} alloc_hint;
+		struct {
+			u64 snapid;
+			u64 src_version;
+			u8 flags;
+			u32 src_fadvise_flags;
+			struct ceph_osd_data osd_data;
+		} copy_from;
 	};
 };
 
@@ -510,6 +517,16 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc,
 				struct timespec64 *mtime,
 				struct page **pages, int nr_pages);
 
+int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
+			u64 src_snapid, u64 src_version,
+			struct ceph_object_id *src_oid,
+			struct ceph_object_locator *src_oloc,
+			u32 src_fadvise_flags,
+			struct ceph_object_id *dst_oid,
+			struct ceph_object_locator *dst_oloc,
+			u32 dst_fadvise_flags,
+			u8 copy_from_flags);
+
 /* watch/notify */
 struct ceph_osd_linger_request *
 ceph_osdc_watch(struct ceph_osd_client *osdc,
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index f1988387c5ad..3eb0e55665b4 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -410,6 +410,14 @@ enum {
 enum {
 	CEPH_OSD_OP_FLAG_EXCL = 1,      /* EXCL object create */
 	CEPH_OSD_OP_FLAG_FAILOK = 2,    /* continue despite failure */
+	CEPH_OSD_OP_FLAG_FADVISE_RANDOM	    = 0x4, /* the op is random */
+	CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL = 0x8, /* the op is sequential */
+	CEPH_OSD_OP_FLAG_FADVISE_WILLNEED   = 0x10,/* data will be accessed in
+						      the near future */
+	CEPH_OSD_OP_FLAG_FADVISE_DONTNEED   = 0x20,/* data will not be accessed
+						      in the near future */
+	CEPH_OSD_OP_FLAG_FADVISE_NOCACHE    = 0x40,/* data will be accessed only
+						      once by this client */
 };
 
 #define EOLDSNAPC    ERESTART  /* ORDERSNAP flag set; writer has old snapc*/
@@ -431,6 +439,15 @@ enum {
 	CEPH_OSD_CMPXATTR_MODE_U64    = 2
 };
 
+enum {
+	CEPH_OSD_COPY_FROM_FLAG_FLUSH = 1,       /* part of a flush operation */
+	CEPH_OSD_COPY_FROM_FLAG_IGNORE_OVERLAY = 2, /* ignore pool overlay */
+	CEPH_OSD_COPY_FROM_FLAG_IGNORE_CACHE = 4,   /* ignore osd cache logic */
+	CEPH_OSD_COPY_FROM_FLAG_MAP_SNAP_CLONE = 8, /* map snap direct to
+						     * cloneid */
+	CEPH_OSD_COPY_FROM_FLAG_RWORDERED = 16,     /* order with write */
+};
+
 enum {
 	CEPH_OSD_WATCH_OP_UNWATCH = 0,
 	CEPH_OSD_WATCH_OP_LEGACY_WATCH = 1,
@@ -497,6 +514,17 @@ struct ceph_osd_op {
 			__le64 expected_object_size;
 			__le64 expected_write_size;
 		} __attribute__ ((packed)) alloc_hint;
+		struct {
+			__le64 snapid;
+			__le64 src_version;
+			__u8 flags; /* CEPH_OSD_COPY_FROM_FLAG_* */
+			/*
+			 * CEPH_OSD_OP_FLAG_FADVISE_*: fadvise flags
+			 * for src object, flags for dest object are in
+			 * ceph_osd_op::flags.
+			 */
+			__le32 src_fadvise_flags;
+		} __attribute__ ((packed)) copy_from;
 	};
 	__le32 payload_len;
 } __attribute__ ((packed));
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a0148de51cc6..d23a9f81f3d7 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -410,6 +410,9 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 	case CEPH_OSD_OP_LIST_WATCHERS:
 		ceph_osd_data_release(&op->list_watchers.response_data);
 		break;
+	case CEPH_OSD_OP_COPY_FROM:
+		ceph_osd_data_release(&op->copy_from.osd_data);
+		break;
 	default:
 		break;
 	}
@@ -702,6 +705,7 @@ static void get_num_data_items(struct ceph_osd_request *req,
 		case CEPH_OSD_OP_SETXATTR:
 		case CEPH_OSD_OP_CMPXATTR:
 		case CEPH_OSD_OP_NOTIFY_ACK:
+		case CEPH_OSD_OP_COPY_FROM:
 			*num_request_data_items += 1;
 			break;
 
@@ -1016,6 +1020,14 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst,
 	case CEPH_OSD_OP_CREATE:
 	case CEPH_OSD_OP_DELETE:
 		break;
+	case CEPH_OSD_OP_COPY_FROM:
+		dst->copy_from.snapid = cpu_to_le64(src->copy_from.snapid);
+		dst->copy_from.src_version =
+			cpu_to_le64(src->copy_from.src_version);
+		dst->copy_from.flags = src->copy_from.flags;
+		dst->copy_from.src_fadvise_flags =
+			cpu_to_le32(src->copy_from.src_fadvise_flags);
+		break;
 	default:
 		pr_err("unsupported osd opcode %s\n",
 			ceph_osd_op_name(src->op));
@@ -1947,6 +1959,10 @@ static void setup_request_data(struct ceph_osd_request *req)
 			ceph_osdc_msg_data_add(request_msg,
 					       &op->notify_ack.request_data);
 			break;
+		case CEPH_OSD_OP_COPY_FROM:
+			ceph_osdc_msg_data_add(request_msg,
+					       &op->copy_from.osd_data);
+			break;
 
 		/* reply */
 		case CEPH_OSD_OP_STAT:
@@ -5255,6 +5271,80 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 }
 EXPORT_SYMBOL(ceph_osdc_writepages);
 
+static int osd_req_op_copy_from_init(struct ceph_osd_request *req,
+				     u64 src_snapid, u64 src_version,
+				     struct ceph_object_id *src_oid,
+				     struct ceph_object_locator *src_oloc,
+				     u32 src_fadvise_flags,
+				     u32 dst_fadvise_flags,
+				     u8 copy_from_flags)
+{
+	struct ceph_osd_req_op *op;
+	struct page **pages;
+	void *p, *end;
+
+	pages = ceph_alloc_page_vector(1, GFP_KERNEL);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	op = _osd_req_op_init(req, 0, CEPH_OSD_OP_COPY_FROM, dst_fadvise_flags);
+	op->copy_from.snapid = src_snapid;
+	op->copy_from.src_version = src_version;
+	op->copy_from.flags = copy_from_flags;
+	op->copy_from.src_fadvise_flags = src_fadvise_flags;
+
+	p = page_address(pages[0]);
+	end = p + PAGE_SIZE;
+	ceph_encode_string(&p, end, src_oid->name, src_oid->name_len);
+	encode_oloc(&p, end, src_oloc);
+	op->indata_len = PAGE_SIZE - (end - p);
+
+	ceph_osd_data_pages_init(&op->copy_from.osd_data, pages,
+				 op->indata_len, 0, false, true);
+	return 0;
+}
+
+int ceph_osdc_copy_from(struct ceph_osd_client *osdc,
+			u64 src_snapid, u64 src_version,
+			struct ceph_object_id *src_oid,
+			struct ceph_object_locator *src_oloc,
+			u32 src_fadvise_flags,
+			struct ceph_object_id *dst_oid,
+			struct ceph_object_locator *dst_oloc,
+			u32 dst_fadvise_flags,
+			u8 copy_from_flags)
+{
+	struct ceph_osd_request *req;
+	int ret;
+
+	req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	req->r_flags = CEPH_OSD_FLAG_WRITE;
+
+	ceph_oloc_copy(&req->r_t.base_oloc, dst_oloc);
+	ceph_oid_copy(&req->r_t.base_oid, dst_oid);
+
+	ret = osd_req_op_copy_from_init(req, src_snapid, src_version, src_oid,
+					src_oloc, src_fadvise_flags,
+					dst_fadvise_flags, copy_from_flags);
+	if (ret)
+		goto out;
+
+	ret = ceph_osdc_alloc_messages(req, GFP_KERNEL);
+	if (ret)
+		goto out;
+
+	ceph_osdc_start_request(osdc, req, false);
+	ret = ceph_osdc_wait_request(osdc, req);
+
+out:
+	ceph_osdc_put_request(req);
+	return ret;
+}
+EXPORT_SYMBOL(ceph_osdc_copy_from);
+
 int __init ceph_osdc_setup(void)
 {
 	size_t size = sizeof(struct ceph_osd_request) +
-- 
cgit v1.2.3


From 1227daa43bce1318ff6fb54e6cd862b4f60245c7 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 17 Oct 2018 17:20:35 -0700
Subject: pstore/ram: Clarify resource reservation labels

When ramoops reserved a memory region in the kernel, it had an unhelpful
label of "persistent_memory". When reading /proc/iomem, it would be
repeated many times, did not hint that it was ramoops in particular,
and didn't clarify very much about what each was used for:

400000000-407ffffff : Persistent Memory (legacy)
  400000000-400000fff : persistent_memory
  400001000-400001fff : persistent_memory
...
  4000ff000-4000fffff : persistent_memory

Instead, this adds meaningful labels for how the various regions are
being used:

400000000-407ffffff : Persistent Memory (legacy)
  400000000-400000fff : ramoops:dump(0/252)
  400001000-400001fff : ramoops:dump(1/252)
...
  4000fc000-4000fcfff : ramoops:dump(252/252)
  4000fd000-4000fdfff : ramoops:console
  4000fe000-4000fe3ff : ramoops:ftrace(0/3)
  4000fe400-4000fe7ff : ramoops:ftrace(1/3)
  4000fe800-4000febff : ramoops:ftrace(2/3)
  4000fec00-4000fefff : ramoops:ftrace(3/3)
  4000ff000-4000fffff : ramoops:pmsg

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Joel Fernandes (Google) <joel@joelfernandes.org>
Tested-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
Tested-by: Guenter Roeck <groeck@chromium.org>
---
 fs/pstore/ram.c            | 16 +++++++++++++---
 fs/pstore/ram_core.c       | 11 +++++++----
 include/linux/pstore_ram.h |  3 ++-
 3 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 98e48d1a9776..712960e117fe 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -587,9 +587,16 @@ static int ramoops_init_przs(const char *name,
 		goto fail;
 
 	for (i = 0; i < *cnt; i++) {
+		char *label;
+
+		if (*cnt == 1)
+			label = kasprintf(GFP_KERNEL, "ramoops:%s", name);
+		else
+			label = kasprintf(GFP_KERNEL, "ramoops:%s(%d/%d)",
+					  name, i, *cnt - 1);
 		prz_ar[i] = persistent_ram_new(*paddr, zone_sz, sig,
-						  &cxt->ecc_info,
-						  cxt->memtype, flags);
+					       &cxt->ecc_info,
+					       cxt->memtype, flags, label);
 		if (IS_ERR(prz_ar[i])) {
 			err = PTR_ERR(prz_ar[i]);
 			dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
@@ -619,6 +626,8 @@ static int ramoops_init_prz(const char *name,
 			    struct persistent_ram_zone **prz,
 			    phys_addr_t *paddr, size_t sz, u32 sig)
 {
+	char *label;
+
 	if (!sz)
 		return 0;
 
@@ -629,8 +638,9 @@ static int ramoops_init_prz(const char *name,
 		return -ENOMEM;
 	}
 
+	label = kasprintf(GFP_KERNEL, "ramoops:%s", name);
 	*prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
-				  cxt->memtype, 0);
+				  cxt->memtype, 0, label);
 	if (IS_ERR(*prz)) {
 		int err = PTR_ERR(*prz);
 
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 951a14edcf51..f200dcefb3b4 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -433,11 +433,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size,
 }
 
 static void *persistent_ram_iomap(phys_addr_t start, size_t size,
-		unsigned int memtype)
+		unsigned int memtype, char *label)
 {
 	void *va;
 
-	if (!request_mem_region(start, size, "persistent_ram")) {
+	if (!request_mem_region(start, size, label ?: "ramoops")) {
 		pr_err("request mem region (0x%llx@0x%llx) failed\n",
 			(unsigned long long)size, (unsigned long long)start);
 		return NULL;
@@ -460,7 +460,8 @@ static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
 	if (pfn_valid(start >> PAGE_SHIFT))
 		prz->vaddr = persistent_ram_vmap(start, size, memtype);
 	else
-		prz->vaddr = persistent_ram_iomap(start, size, memtype);
+		prz->vaddr = persistent_ram_iomap(start, size, memtype,
+						  prz->label);
 
 	if (!prz->vaddr) {
 		pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__,
@@ -530,12 +531,13 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
 	prz->ecc_info.par = NULL;
 
 	persistent_ram_free_old(prz);
+	kfree(prz->label);
 	kfree(prz);
 }
 
 struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
 			u32 sig, struct persistent_ram_ecc_info *ecc_info,
-			unsigned int memtype, u32 flags)
+			unsigned int memtype, u32 flags, char *label)
 {
 	struct persistent_ram_zone *prz;
 	int ret = -ENOMEM;
@@ -549,6 +551,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
 	/* Initialize general buffer state. */
 	raw_spin_lock_init(&prz->buffer_lock);
 	prz->flags = flags;
+	prz->label = label;
 
 	ret = persistent_ram_buffer_map(start, size, prz, memtype);
 	if (ret)
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index e6d226464838..602d64725222 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -46,6 +46,7 @@ struct persistent_ram_zone {
 	phys_addr_t paddr;
 	size_t size;
 	void *vaddr;
+	char *label;
 	struct persistent_ram_buffer *buffer;
 	size_t buffer_size;
 	u32 flags;
@@ -65,7 +66,7 @@ struct persistent_ram_zone {
 
 struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
 			u32 sig, struct persistent_ram_ecc_info *ecc_info,
-			unsigned int memtype, u32 flags);
+			unsigned int memtype, u32 flags, char *label);
 void persistent_ram_free(struct persistent_ram_zone *prz);
 void persistent_ram_zap(struct persistent_ram_zone *prz);
 
-- 
cgit v1.2.3


From af033b2aa8a874fd5737fafe90d159136527b5b4 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Thu, 20 Sep 2018 20:05:00 +0800
Subject: f2fs: guarantee journalled quota data by checkpoint

For journalled quota mode, let checkpoint to flush dquot dirty data
and quota file data to guarntee persistence of all quota sysfile in
last checkpoint, by this way, we can avoid corrupting quota sysfile
when encountering SPO.

The implementation is as below:

1. add a global state SBI_QUOTA_NEED_FLUSH to indicate that there is
cached dquot metadata changes in quota subsystem, and later checkpoint
should:
 a) flush dquot metadata into quota file.
 b) flush quota file to storage to keep file usage be consistent.

2. add a global state SBI_QUOTA_NEED_REPAIR to indicate that quota
operation failed due to -EIO or -ENOSPC, so later,
 a) checkpoint will skip syncing dquot metadata.
 b) CP_QUOTA_NEED_FSCK_FLAG will be set in last cp pack to give a
    hint for fsck repairing.

3. add a global state SBI_QUOTA_SKIP_FLUSH, in checkpoint, if quota
data updating is very heavy, it may cause hungtask in block_operation().
To avoid this, if our retry time exceed threshold, let's just skip
flushing and retry in next checkpoint().

Signed-off-by: Weichao Guo <guoweichao@huawei.com>
Signed-off-by: Chao Yu <yuchao0@huawei.com>
[Jaegeuk Kim: avoid warnings and set fsck flag]
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    |  62 +++++++++++++++++++++++--
 fs/f2fs/data.c          |  16 +++++--
 fs/f2fs/f2fs.h          |  49 ++++++++++++++++----
 fs/f2fs/file.c          |  31 ++++++++++---
 fs/f2fs/inline.c        |   4 +-
 fs/f2fs/inode.c         |  11 +++--
 fs/f2fs/namei.c         |   4 --
 fs/f2fs/recovery.c      |  43 +++++++++++++++--
 fs/f2fs/super.c         | 121 +++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/f2fs_fs.h |   1 +
 10 files changed, 294 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index eb6ac79640f8..9c28ea439e0b 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1079,6 +1079,21 @@ static void __prepare_cp_block(struct f2fs_sb_info *sbi)
 	ckpt->next_free_nid = cpu_to_le32(last_nid);
 }
 
+static bool __need_flush_quota(struct f2fs_sb_info *sbi)
+{
+	if (!is_journalled_quota(sbi))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+		return false;
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_FLUSH))
+		return true;
+	if (get_pages(sbi, F2FS_DIRTY_QDATA))
+		return true;
+	return false;
+}
+
 /*
  * Freeze all the FS-operations for checkpoint.
  */
@@ -1090,12 +1105,36 @@ static int block_operations(struct f2fs_sb_info *sbi)
 		.for_reclaim = 0,
 	};
 	struct blk_plug plug;
-	int err = 0;
+	int err = 0, cnt = 0;
 
 	blk_start_plug(&plug);
 
-retry_flush_dents:
+retry_flush_quotas:
+	if (__need_flush_quota(sbi)) {
+		int locked;
+
+		if (++cnt > DEFAULT_RETRY_QUOTA_FLUSH_COUNT) {
+			set_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
+			f2fs_lock_all(sbi);
+			goto retry_flush_dents;
+		}
+		clear_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+		/* only failed during mount/umount/freeze/quotactl */
+		locked = down_read_trylock(&sbi->sb->s_umount);
+		f2fs_quota_sync(sbi->sb, -1);
+		if (locked)
+			up_read(&sbi->sb->s_umount);
+	}
+
 	f2fs_lock_all(sbi);
+	if (__need_flush_quota(sbi)) {
+		f2fs_unlock_all(sbi);
+		cond_resched();
+		goto retry_flush_quotas;
+	}
+
+retry_flush_dents:
 	/* write all the dirty dentry pages */
 	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
 		f2fs_unlock_all(sbi);
@@ -1103,7 +1142,7 @@ retry_flush_dents:
 		if (err)
 			goto out;
 		cond_resched();
-		goto retry_flush_dents;
+		goto retry_flush_quotas;
 	}
 
 	/*
@@ -1112,6 +1151,12 @@ retry_flush_dents:
 	 */
 	down_write(&sbi->node_change);
 
+	if (__need_flush_quota(sbi)) {
+		up_write(&sbi->node_change);
+		f2fs_unlock_all(sbi);
+		goto retry_flush_quotas;
+	}
+
 	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
 		up_write(&sbi->node_change);
 		f2fs_unlock_all(sbi);
@@ -1119,7 +1164,7 @@ retry_flush_dents:
 		if (err)
 			goto out;
 		cond_resched();
-		goto retry_flush_dents;
+		goto retry_flush_quotas;
 	}
 
 retry_flush_nodes:
@@ -1215,6 +1260,14 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	else
 		__clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
 
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
+		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+	else
+		__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
+	if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
+		__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
+
 	/* set this flag to activate crc|cp_ver for recovery */
 	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
 	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
@@ -1422,6 +1475,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 
 	clear_sbi_flag(sbi, SBI_IS_DIRTY);
 	clear_sbi_flag(sbi, SBI_NEED_CP);
+	clear_sbi_flag(sbi, SBI_QUOTA_SKIP_FLUSH);
 	sbi->unusable_block_count = 0;
 	__set_cp_next_pack(sbi);
 
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 00b37a1bd15c..106f116466bf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -46,7 +46,7 @@ static bool __is_cp_guaranteed(struct page *page)
 			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
 			S_ISDIR(inode->i_mode) ||
 			(S_ISREG(inode->i_mode) &&
-			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
+			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
 			is_cold_data(page))
 		return true;
 	return false;
@@ -1766,6 +1766,8 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
 		return true;
 	if (S_ISDIR(inode->i_mode))
 		return true;
+	if (IS_NOQUOTA(inode))
+		return true;
 	if (f2fs_is_atomic_file(inode))
 		return true;
 	if (fio) {
@@ -2016,7 +2018,7 @@ out:
 	}
 
 	unlock_page(page);
-	if (!S_ISDIR(inode->i_mode))
+	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
 		f2fs_balance_fs(sbi, need_balance_fs);
 
 	if (unlikely(f2fs_cp_error(sbi))) {
@@ -2207,6 +2209,8 @@ static inline bool __should_serialize_io(struct inode *inode,
 {
 	if (!S_ISREG(inode->i_mode))
 		return false;
+	if (IS_NOQUOTA(inode))
+		return false;
 	if (wbc->sync_mode != WB_SYNC_ALL)
 		return true;
 	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
@@ -2236,7 +2240,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping,
 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
 		goto skip_write;
 
-	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
+	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
+			wbc->sync_mode == WB_SYNC_NONE &&
 			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
 			f2fs_available_free_memory(sbi, DIRTY_DENTS))
 		goto skip_write;
@@ -2301,7 +2306,7 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
 		down_write(&F2FS_I(inode)->i_mmap_sem);
 
 		truncate_pagecache(inode, i_size);
-		f2fs_truncate_blocks(inode, i_size, true);
+		f2fs_truncate_blocks(inode, i_size, true, true);
 
 		up_write(&F2FS_I(inode)->i_mmap_sem);
 		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -2440,7 +2445,8 @@ repeat:
 	if (err)
 		goto fail;
 
-	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
+	if (need_balance && !IS_NOQUOTA(inode) &&
+			has_not_enough_free_secs(sbi, 0, 0)) {
 		unlock_page(page);
 		f2fs_balance_fs(sbi, true);
 		lock_page(page);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 5c80eca194b5..f447cbc2295f 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -531,6 +531,9 @@ enum {
 
 #define DEFAULT_RETRY_IO_COUNT	8	/* maximum retry read IO count */
 
+/* maximum retry quota flush count */
+#define DEFAULT_RETRY_QUOTA_FLUSH_COUNT		8
+
 #define F2FS_LINK_MAX	0xffffffff	/* maximum link count per file */
 
 #define MAX_DIR_RA_PAGES	4	/* maximum ra pages of dir */
@@ -1099,6 +1102,9 @@ enum {
 	SBI_IS_SHUTDOWN,			/* shutdown by ioctl */
 	SBI_IS_RECOVERED,			/* recovered orphan/data */
 	SBI_CP_DISABLED,			/* CP was disabled last mount */
+	SBI_QUOTA_NEED_FLUSH,			/* need to flush quota info in CP */
+	SBI_QUOTA_SKIP_FLUSH,			/* skip flushing quota in current CP */
+	SBI_QUOTA_NEED_REPAIR,			/* quota file may be corrupted */
 };
 
 enum {
@@ -1923,12 +1929,18 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 {
 	block_t	valid_block_count;
 	unsigned int valid_node_count;
-	bool quota = inode && !is_inode;
+	int err;
 
-	if (quota) {
-		int ret = dquot_reserve_block(inode, 1);
-		if (ret)
-			return ret;
+	if (is_inode) {
+		if (inode) {
+			err = dquot_alloc_inode(inode);
+			if (err)
+				return err;
+		}
+	} else {
+		err = dquot_reserve_block(inode, 1);
+		if (err)
+			return err;
 	}
 
 	if (time_to_inject(sbi, FAULT_BLOCK)) {
@@ -1972,8 +1984,12 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 	return 0;
 
 enospc:
-	if (quota)
+	if (is_inode) {
+		if (inode)
+			dquot_free_inode(inode);
+	} else {
 		dquot_release_reservation_block(inode, 1);
+	}
 	return -ENOSPC;
 }
 
@@ -1994,7 +2010,9 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi,
 
 	spin_unlock(&sbi->stat_lock);
 
-	if (!is_inode)
+	if (is_inode)
+		dquot_free_inode(inode);
+	else
 		f2fs_i_blocks_write(inode, 1, false, true);
 }
 
@@ -2782,7 +2800,8 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
  */
 int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
+							bool buf_write);
 int f2fs_truncate(struct inode *inode);
 int f2fs_getattr(const struct path *path, struct kstat *stat,
 			u32 request_mask, unsigned int flags);
@@ -2870,6 +2889,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode)
 int f2fs_inode_dirtied(struct inode *inode, bool sync);
 void f2fs_inode_synced(struct inode *inode);
 int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
+int f2fs_quota_sync(struct super_block *sb, int type);
 void f2fs_quota_off_umount(struct super_block *sb);
 int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
 int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -3564,3 +3584,16 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
 #endif
 
 #endif
+
+static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
+{
+#ifdef CONFIG_QUOTA
+	if (f2fs_sb_has_quota_ino(sbi->sb))
+		return true;
+	if (F2FS_OPTION(sbi).s_qf_names[USRQUOTA] ||
+		F2FS_OPTION(sbi).s_qf_names[GRPQUOTA] ||
+		F2FS_OPTION(sbi).s_qf_names[PRJQUOTA])
+		return true;
+#endif
+	return false;
+}
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 543c742f8bd7..971463e0589e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -586,7 +586,8 @@ truncate_out:
 	return 0;
 }
 
-int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
+int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
+							bool buf_write)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct dnode_of_data dn;
@@ -594,6 +595,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
 	int count = 0, err = 0;
 	struct page *ipage;
 	bool truncate_page = false;
+	int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
 
 	trace_f2fs_truncate_blocks_enter(inode, from);
 
@@ -603,7 +605,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
 		goto free_partial;
 
 	if (lock)
-		f2fs_lock_op(sbi);
+		__do_map_lock(sbi, flag, true);
 
 	ipage = f2fs_get_node_page(sbi, inode->i_ino);
 	if (IS_ERR(ipage)) {
@@ -641,7 +643,7 @@ free_next:
 	err = f2fs_truncate_inode_blocks(inode, free_from);
 out:
 	if (lock)
-		f2fs_unlock_op(sbi);
+		__do_map_lock(sbi, flag, false);
 free_partial:
 	/* lastly zero out the first data page */
 	if (!err)
@@ -676,7 +678,7 @@ int f2fs_truncate(struct inode *inode)
 			return err;
 	}
 
-	err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+	err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
 	if (err)
 		return err;
 
@@ -785,9 +787,24 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 		!uid_eq(attr->ia_uid, inode->i_uid)) ||
 		(attr->ia_valid & ATTR_GID &&
 		!gid_eq(attr->ia_gid, inode->i_gid))) {
+		f2fs_lock_op(F2FS_I_SB(inode));
 		err = dquot_transfer(inode, attr);
-		if (err)
+		if (err) {
+			set_sbi_flag(F2FS_I_SB(inode),
+					SBI_QUOTA_NEED_REPAIR);
+			f2fs_unlock_op(F2FS_I_SB(inode));
 			return err;
+		}
+		/*
+		 * update uid/gid under lock_op(), so that dquot and inode can
+		 * be updated atomically.
+		 */
+		if (attr->ia_valid & ATTR_UID)
+			inode->i_uid = attr->ia_uid;
+		if (attr->ia_valid & ATTR_GID)
+			inode->i_gid = attr->ia_gid;
+		f2fs_mark_inode_dirty_sync(inode, true);
+		f2fs_unlock_op(F2FS_I_SB(inode));
 	}
 
 	if (attr->ia_valid & ATTR_SIZE) {
@@ -1242,7 +1259,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
 	new_size = i_size_read(inode) - len;
 	truncate_pagecache(inode, new_size);
 
-	ret = f2fs_truncate_blocks(inode, new_size, true);
+	ret = f2fs_truncate_blocks(inode, new_size, true, false);
 	up_write(&F2FS_I(inode)->i_mmap_sem);
 	if (!ret)
 		f2fs_i_size_write(inode, new_size);
@@ -1427,7 +1444,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
 	f2fs_balance_fs(sbi, true);
 
 	down_write(&F2FS_I(inode)->i_mmap_sem);
-	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
+	ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
 	up_write(&F2FS_I(inode)->i_mmap_sem);
 	if (ret)
 		return ret;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 425d740f87fd..cb31a719b048 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -298,7 +298,7 @@ process_inline:
 		clear_inode_flag(inode, FI_INLINE_DATA);
 		f2fs_put_page(ipage, 1);
 	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
-		if (f2fs_truncate_blocks(inode, 0, false))
+		if (f2fs_truncate_blocks(inode, 0, false, false))
 			return false;
 		goto process_inline;
 	}
@@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
 	return 0;
 punch_dentry_pages:
 	truncate_inode_pages(&dir->i_data, 0);
-	f2fs_truncate_blocks(dir, 0, false);
+	f2fs_truncate_blocks(dir, 0, false, false);
 	f2fs_remove_dirty_inode(dir);
 	return err;
 }
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 57a7a15239d6..91ceee0ed4c4 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -654,7 +654,11 @@ void f2fs_evict_inode(struct inode *inode)
 	if (inode->i_nlink || is_bad_inode(inode))
 		goto no_delete;
 
-	dquot_initialize(inode);
+	err = dquot_initialize(inode);
+	if (err) {
+		err = 0;
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	}
 
 	f2fs_remove_ino_entry(sbi, inode->i_ino, APPEND_INO);
 	f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO);
@@ -686,9 +690,10 @@ retry:
 		goto retry;
 	}
 
-	if (err)
+	if (err) {
 		f2fs_update_inode_page(inode);
-	dquot_free_inode(inode);
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+	}
 	sb_end_intwrite(inode->i_sb);
 no_delete:
 	dquot_drop(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 54295b5c1822..99299ede7429 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -72,10 +72,6 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 	if (err)
 		goto fail_drop;
 
-	err = dquot_alloc_inode(inode);
-	if (err)
-		goto fail_drop;
-
 	set_inode_flag(inode, FI_NEW_INODE);
 
 	/* If the directory encrypted, then we should encrypt the inode. */
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 875d2e205791..df2123759ac7 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -195,6 +195,33 @@ out:
 	return err;
 }
 
+static int recover_quota_data(struct inode *inode, struct page *page)
+{
+	struct f2fs_inode *raw = F2FS_INODE(page);
+	struct iattr attr;
+	uid_t i_uid = le32_to_cpu(raw->i_uid);
+	gid_t i_gid = le32_to_cpu(raw->i_gid);
+	int err;
+
+	memset(&attr, 0, sizeof(attr));
+
+	attr.ia_uid = make_kuid(inode->i_sb->s_user_ns, i_uid);
+	attr.ia_gid = make_kgid(inode->i_sb->s_user_ns, i_gid);
+
+	if (!uid_eq(attr.ia_uid, inode->i_uid))
+		attr.ia_valid |= ATTR_UID;
+	if (!gid_eq(attr.ia_gid, inode->i_gid))
+		attr.ia_valid |= ATTR_GID;
+
+	if (!attr.ia_valid)
+		return 0;
+
+	err = dquot_transfer(inode, &attr);
+	if (err)
+		set_sbi_flag(F2FS_I_SB(inode), SBI_QUOTA_NEED_REPAIR);
+	return err;
+}
+
 static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
 {
 	if (ri->i_inline & F2FS_PIN_FILE)
@@ -207,12 +234,18 @@ static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
 		clear_inode_flag(inode, FI_DATA_EXIST);
 }
 
-static void recover_inode(struct inode *inode, struct page *page)
+static int recover_inode(struct inode *inode, struct page *page)
 {
 	struct f2fs_inode *raw = F2FS_INODE(page);
 	char *name;
+	int err;
 
 	inode->i_mode = le16_to_cpu(raw->i_mode);
+
+	err = recover_quota_data(inode, page);
+	if (err)
+		return err;
+
 	i_uid_write(inode, le32_to_cpu(raw->i_uid));
 	i_gid_write(inode, le32_to_cpu(raw->i_gid));
 
@@ -254,6 +287,7 @@ static void recover_inode(struct inode *inode, struct page *page)
 	f2fs_msg(inode->i_sb, KERN_NOTICE,
 		"recover_inode: ino = %x, name = %s, inline = %x",
 			ino_of_node(page), name, raw->i_inline);
+	return 0;
 }
 
 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
@@ -622,8 +656,11 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
 		 * In this case, we can lose the latest inode(x).
 		 * So, call recover_inode for the inode update.
 		 */
-		if (IS_INODE(page))
-			recover_inode(entry->inode, page);
+		if (IS_INODE(page)) {
+			err = recover_inode(entry->inode, page);
+			if (err)
+				break;
+		}
 		if (entry->last_dentry == blkaddr) {
 			err = recover_dentry(entry->inode, page, dir_list);
 			if (err) {
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index f7814bb26a13..af58b2cc21b8 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1711,6 +1711,7 @@ repeat:
 				congestion_wait(BLK_RW_ASYNC, HZ/50);
 				goto repeat;
 			}
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			return PTR_ERR(page);
 		}
 
@@ -1722,6 +1723,7 @@ repeat:
 		}
 		if (unlikely(!PageUptodate(page))) {
 			f2fs_put_page(page, 1);
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			return -EIO;
 		}
 
@@ -1763,6 +1765,7 @@ retry:
 				congestion_wait(BLK_RW_ASYNC, HZ/50);
 				goto retry;
 			}
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 			break;
 		}
 
@@ -1799,6 +1802,12 @@ static qsize_t *f2fs_get_reserved_space(struct inode *inode)
 
 static int f2fs_quota_on_mount(struct f2fs_sb_info *sbi, int type)
 {
+	if (is_set_ckpt_flags(sbi, CP_QUOTA_NEED_FSCK_FLAG)) {
+		f2fs_msg(sbi->sb, KERN_ERR,
+			"quota sysfile may be corrupted, skip loading it");
+		return 0;
+	}
+
 	return dquot_quota_on_mount(sbi->sb, F2FS_OPTION(sbi).s_qf_names[type],
 					F2FS_OPTION(sbi).s_jquota_fmt, type);
 }
@@ -1869,7 +1878,14 @@ static int f2fs_enable_quotas(struct super_block *sb)
 		test_opt(F2FS_SB(sb), PRJQUOTA),
 	};
 
-	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+	if (is_set_ckpt_flags(F2FS_SB(sb), CP_QUOTA_NEED_FSCK_FLAG)) {
+		f2fs_msg(sb, KERN_ERR,
+			"quota file may be corrupted, skip loading it");
+		return 0;
+	}
+
+	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
+
 	for (type = 0; type < MAXQUOTAS; type++) {
 		qf_inum = f2fs_qf_ino(sb, type);
 		if (qf_inum) {
@@ -1883,6 +1899,8 @@ static int f2fs_enable_quotas(struct super_block *sb)
 					"fsck to fix.", type, err);
 				for (type--; type >= 0; type--)
 					dquot_quota_off(sb, type);
+				set_sbi_flag(F2FS_SB(sb),
+						SBI_QUOTA_NEED_REPAIR);
 				return err;
 			}
 		}
@@ -1890,35 +1908,51 @@ static int f2fs_enable_quotas(struct super_block *sb)
 	return 0;
 }
 
-static int f2fs_quota_sync(struct super_block *sb, int type)
+int f2fs_quota_sync(struct super_block *sb, int type)
 {
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
 	struct quota_info *dqopt = sb_dqopt(sb);
 	int cnt;
 	int ret;
 
 	ret = dquot_writeback_dquots(sb, type);
 	if (ret)
-		return ret;
+		goto out;
 
 	/*
 	 * Now when everything is written we can discard the pagecache so
 	 * that userspace sees the changes.
 	 */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		struct address_space *mapping;
+
 		if (type != -1 && cnt != type)
 			continue;
 		if (!sb_has_quota_active(sb, cnt))
 			continue;
 
-		ret = filemap_write_and_wait(dqopt->files[cnt]->i_mapping);
+		mapping = dqopt->files[cnt]->i_mapping;
+
+		ret = filemap_fdatawrite(mapping);
 		if (ret)
-			return ret;
+			goto out;
+
+		/* if we are using journalled quota */
+		if (is_journalled_quota(sbi))
+			continue;
+
+		ret = filemap_fdatawait(mapping);
+		if (ret)
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 
 		inode_lock(dqopt->files[cnt]);
 		truncate_inode_pages(&dqopt->files[cnt]->i_data, 0);
 		inode_unlock(dqopt->files[cnt]);
 	}
-	return 0;
+out:
+	if (ret)
+		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
 }
 
 static int f2fs_quota_on(struct super_block *sb, int type, int format_id,
@@ -1986,7 +2020,7 @@ void f2fs_quota_off_umount(struct super_block *sb)
 				"Fail to turn off disk quota "
 				"(type: %d, err: %d, ret:%d), Please "
 				"run fsck to fix it.", type, err, ret);
-			set_sbi_flag(F2FS_SB(sb), SBI_NEED_FSCK);
+			set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
 		}
 	}
 }
@@ -2003,6 +2037,61 @@ static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
 	}
 }
 
+static int f2fs_dquot_commit(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_commit(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
+
+static int f2fs_dquot_acquire(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_acquire(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+
+	return ret;
+}
+
+static int f2fs_dquot_release(struct dquot *dquot)
+{
+	int ret;
+
+	ret = dquot_release(dquot);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(dquot->dq_sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
+
+static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot)
+{
+	struct super_block *sb = dquot->dq_sb;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	int ret;
+
+	ret = dquot_mark_dquot_dirty(dquot);
+
+	/* if we are using journalled quota */
+	if (is_journalled_quota(sbi))
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH);
+
+	return ret;
+}
+
+static int f2fs_dquot_commit_info(struct super_block *sb, int type)
+{
+	int ret;
+
+	ret = dquot_commit_info(sb, type);
+	if (ret < 0)
+		set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
+	return ret;
+}
 
 static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
 {
@@ -2012,11 +2101,11 @@ static int f2fs_get_projid(struct inode *inode, kprojid_t *projid)
 
 static const struct dquot_operations f2fs_quota_operations = {
 	.get_reserved_space = f2fs_get_reserved_space,
-	.write_dquot	= dquot_commit,
-	.acquire_dquot	= dquot_acquire,
-	.release_dquot	= dquot_release,
-	.mark_dirty	= dquot_mark_dquot_dirty,
-	.write_info	= dquot_commit_info,
+	.write_dquot	= f2fs_dquot_commit,
+	.acquire_dquot	= f2fs_dquot_acquire,
+	.release_dquot	= f2fs_dquot_release,
+	.mark_dirty	= f2fs_dquot_mark_dquot_dirty,
+	.write_info	= f2fs_dquot_commit_info,
 	.alloc_dquot	= dquot_alloc,
 	.destroy_dquot	= dquot_destroy,
 	.get_projid	= f2fs_get_projid,
@@ -2034,6 +2123,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = {
 	.get_nextdqblk	= dquot_get_next_dqblk,
 };
 #else
+int f2fs_quota_sync(struct super_block *sb, int type)
+{
+	return 0;
+}
+
 void f2fs_quota_off_umount(struct super_block *sb)
 {
 }
@@ -3104,6 +3198,9 @@ try_onemore:
 		goto free_meta_inode;
 	}
 
+	if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
+		set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
+
 	/* Initialize device list */
 	err = f2fs_scan_devices(sbi);
 	if (err) {
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 8b9c7dc0260c..d7711048ef93 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -117,6 +117,7 @@ struct f2fs_super_block {
  * For checkpoint
  */
 #define CP_DISABLED_FLAG		0x00001000
+#define CP_QUOTA_NEED_FSCK_FLAG		0x00000800
 #define CP_LARGE_NAT_BITMAP_FLAG	0x00000400
 #define CP_NOCRC_RECOVERY_FLAG	0x00000200
 #define CP_TRIMMED_FLAG		0x00000100
-- 
cgit v1.2.3


From 876dcf2f3aaa0f68d437b368b93a4c4b81521191 Mon Sep 17 00:00:00 2001
From: Olivier Brunel <jjk@jjacky.com>
Date: Sat, 20 Oct 2018 19:39:56 +0200
Subject: umh: Add command line to user mode helpers

User mode helpers were spawned without a command line, and because
an empty command line is used by many tools to identify processes as
kernel threads, this could cause some issues.

Notably during killing spree on shutdown, since such helper would then
be skipped (i.e. not killed) which would result in the process remaining
alive, and thus preventing unmouting of the rootfs (as experienced with
the bpfilter umh).

Fixes: 449325b52b7a ("umh: introduce fork_usermode_blob() helper")
Signed-off-by: Olivier Brunel <jjk@jjacky.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/umh.h |  1 +
 kernel/umh.c        | 16 ++++++++++++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/umh.h b/include/linux/umh.h
index 5c812acbb80a..235f51b62c71 100644
--- a/include/linux/umh.h
+++ b/include/linux/umh.h
@@ -44,6 +44,7 @@ struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
 			  int (*init)(struct subprocess_info *info, struct cred *new),
 			  void (*cleanup)(struct subprocess_info *), void *data);
 struct umh_info {
+	const char *cmdline;
 	struct file *pipe_to_umh;
 	struct file *pipe_from_umh;
 	pid_t pid;
diff --git a/kernel/umh.c b/kernel/umh.c
index c449858946af..0baa672e023c 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -405,11 +405,19 @@ struct subprocess_info *call_usermodehelper_setup_file(struct file *file,
 		void (*cleanup)(struct subprocess_info *info), void *data)
 {
 	struct subprocess_info *sub_info;
+	struct umh_info *info = data;
+	const char *cmdline = (info->cmdline) ? info->cmdline : "usermodehelper";
 
 	sub_info = kzalloc(sizeof(struct subprocess_info), GFP_KERNEL);
 	if (!sub_info)
 		return NULL;
 
+	sub_info->argv = argv_split(GFP_KERNEL, cmdline, NULL);
+	if (!sub_info->argv) {
+		kfree(sub_info);
+		return NULL;
+	}
+
 	INIT_WORK(&sub_info->work, call_usermodehelper_exec_work);
 	sub_info->path = "none";
 	sub_info->file = file;
@@ -458,10 +466,11 @@ static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
 	return 0;
 }
 
-static void umh_save_pid(struct subprocess_info *info)
+static void umh_clean_and_save_pid(struct subprocess_info *info)
 {
 	struct umh_info *umh_info = info->data;
 
+	argv_free(info->argv);
 	umh_info->pid = info->pid;
 }
 
@@ -471,6 +480,9 @@ static void umh_save_pid(struct subprocess_info *info)
  * @len: length of the blob
  * @info: information about usermode process (shouldn't be NULL)
  *
+ * If info->cmdline is set it will be used as command line for the
+ * user process, else "usermodehelper" is used.
+ *
  * Returns either negative error or zero which indicates success
  * in executing a blob of bytes as a usermode process. In such
  * case 'struct umh_info *info' is populated with two pipes
@@ -500,7 +512,7 @@ int fork_usermode_blob(void *data, size_t len, struct umh_info *info)
 
 	err = -ENOMEM;
 	sub_info = call_usermodehelper_setup_file(file, umh_pipe_setup,
-						  umh_save_pid, info);
+						  umh_clean_and_save_pid, info);
 	if (!sub_info)
 		goto out;
 
-- 
cgit v1.2.3


From 39b27ad9c71cb72b07c9cd85191a8fe723e5b21a Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 7 Aug 2018 18:11:22 +0200
Subject: mfd: sec-core: Add SPDX license identifiers

Replace GPL v2.0+ license statements with SPDX license identifiers.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c              | 16 ++++------------
 drivers/mfd/sec-irq.c               | 16 ++++------------
 include/linux/mfd/samsung/core.h    | 11 ++---------
 include/linux/mfd/samsung/irq.h     | 10 ++--------
 include/linux/mfd/samsung/rtc.h     | 15 ++-------------
 include/linux/mfd/samsung/s2mpa01.h |  7 +------
 include/linux/mfd/samsung/s2mps11.h |  9 +--------
 include/linux/mfd/samsung/s2mps13.h | 14 +-------------
 include/linux/mfd/samsung/s2mps14.h | 14 +-------------
 include/linux/mfd/samsung/s2mps15.h | 11 +----------
 include/linux/mfd/samsung/s2mpu02.h | 14 +-------------
 include/linux/mfd/samsung/s5m8763.h | 10 ++--------
 include/linux/mfd/samsung/s5m8767.h | 10 ++--------
 13 files changed, 24 insertions(+), 133 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index 9613b4257302..e0835c9df7a1 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -1,15 +1,7 @@
-/*
- * sec-core.c
- *
- * Copyright (c) 2012 Samsung Electronics Co., Ltd
- *              http://www.samsung.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (c) 2012 Samsung Electronics Co., Ltd
+//              http://www.samsung.com
 
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c
index 5eb59c233d52..295d24d4501d 100644
--- a/drivers/mfd/sec-irq.c
+++ b/drivers/mfd/sec-irq.c
@@ -1,15 +1,7 @@
-/*
- * sec-irq.c
- *
- * Copyright (c) 2011-2014 Samsung Electronics Co., Ltd
- *              http://www.samsung.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright (c) 2011-2014 Samsung Electronics Co., Ltd
+//              http://www.samsung.com
 
 #include <linux/device.h>
 #include <linux/interrupt.h>
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 28f4ae76271d..3ca17eb89aa2 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -1,14 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * core.h
- *
- * copyright (c) 2011 Samsung Electronics Co., Ltd
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
  */
 
 #ifndef __LINUX_MFD_SEC_CORE_H
diff --git a/include/linux/mfd/samsung/irq.h b/include/linux/mfd/samsung/irq.h
index 667aa40486dd..6cfe4201a106 100644
--- a/include/linux/mfd/samsung/irq.h
+++ b/include/linux/mfd/samsung/irq.h
@@ -1,13 +1,7 @@
-/* irq.h
- *
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
  * Copyright (c) 2012 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
  */
 
 #ifndef __LINUX_MFD_SEC_IRQ_H
diff --git a/include/linux/mfd/samsung/rtc.h b/include/linux/mfd/samsung/rtc.h
index 9ed2871ea335..0204decfc9aa 100644
--- a/include/linux/mfd/samsung/rtc.h
+++ b/include/linux/mfd/samsung/rtc.h
@@ -1,18 +1,7 @@
-/* rtc.h
- *
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
  * Copyright (c) 2011-2014 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #ifndef __LINUX_MFD_SEC_RTC_H
diff --git a/include/linux/mfd/samsung/s2mpa01.h b/include/linux/mfd/samsung/s2mpa01.h
index 2766108bca2f..0762e9de6f2f 100644
--- a/include/linux/mfd/samsung/s2mpa01.h
+++ b/include/linux/mfd/samsung/s2mpa01.h
@@ -1,12 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Copyright (c) 2013 Samsung Electronics Co., Ltd
  *		http://www.samsung.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
  */
 
 #ifndef __LINUX_MFD_S2MPA01_H
diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h
index 2c14eeca46f0..6e7668a389a1 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -1,14 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * s2mps11.h
- *
  * Copyright (c) 2012 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
  */
 
 #ifndef __LINUX_MFD_S2MPS11_H
diff --git a/include/linux/mfd/samsung/s2mps13.h b/include/linux/mfd/samsung/s2mps13.h
index 239e977ba45d..b96d8a11dcd3 100644
--- a/include/linux/mfd/samsung/s2mps13.h
+++ b/include/linux/mfd/samsung/s2mps13.h
@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * s2mps13.h
- *
  * Copyright (c) 2014 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #ifndef __LINUX_MFD_S2MPS13_H
diff --git a/include/linux/mfd/samsung/s2mps14.h b/include/linux/mfd/samsung/s2mps14.h
index c92f4782afb5..f4afa0cfc24f 100644
--- a/include/linux/mfd/samsung/s2mps14.h
+++ b/include/linux/mfd/samsung/s2mps14.h
@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * s2mps14.h
- *
  * Copyright (c) 2014 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #ifndef __LINUX_MFD_S2MPS14_H
diff --git a/include/linux/mfd/samsung/s2mps15.h b/include/linux/mfd/samsung/s2mps15.h
index 36d35287c3c0..eac6bf74b72e 100644
--- a/include/linux/mfd/samsung/s2mps15.h
+++ b/include/linux/mfd/samsung/s2mps15.h
@@ -1,16 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Copyright (c) 2015 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef __LINUX_MFD_S2MPS15_H
diff --git a/include/linux/mfd/samsung/s2mpu02.h b/include/linux/mfd/samsung/s2mpu02.h
index 47ae9bc583a7..76cd5380cf0f 100644
--- a/include/linux/mfd/samsung/s2mpu02.h
+++ b/include/linux/mfd/samsung/s2mpu02.h
@@ -1,19 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
- * s2mpu02.h
- *
  * Copyright (c) 2014 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #ifndef __LINUX_MFD_S2MPU02_H
diff --git a/include/linux/mfd/samsung/s5m8763.h b/include/linux/mfd/samsung/s5m8763.h
index e025418e5589..c534f086ca16 100644
--- a/include/linux/mfd/samsung/s5m8763.h
+++ b/include/linux/mfd/samsung/s5m8763.h
@@ -1,13 +1,7 @@
-/*  s5m8763.h
- *
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
  * Copyright (c) 2011 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
  */
 
 #ifndef __LINUX_MFD_S5M8763_H
diff --git a/include/linux/mfd/samsung/s5m8767.h b/include/linux/mfd/samsung/s5m8767.h
index 243b58fec33d..704f8d80e96e 100644
--- a/include/linux/mfd/samsung/s5m8767.h
+++ b/include/linux/mfd/samsung/s5m8767.h
@@ -1,13 +1,7 @@
-/*  s5m8767.h
- *
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
  * Copyright (c) 2011 Samsung Electronics Co., Ltd
  *              http://www.samsung.com
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- *
  */
 
 #ifndef __LINUX_MFD_S5M8767_H
-- 
cgit v1.2.3


From d7d8d7a2405f8b6296660648e893fc3442269d81 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Tue, 7 Aug 2018 18:11:23 +0200
Subject: mfd: maxim: Add SPDX license identifiers

Replace GPL v2.0+ license statements with SPDX license identifiers.

Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max14577.c               | 28 +++++++++-------------------
 drivers/mfd/max77686.c               | 32 +++++++++-----------------------
 drivers/mfd/max77693.c               | 34 ++++++++++------------------------
 drivers/mfd/max77843.c               | 19 +++++++------------
 drivers/mfd/max8997-irq.c            | 30 ++++++++----------------------
 drivers/mfd/max8997.c                | 30 ++++++++----------------------
 drivers/mfd/max8998-irq.c            | 18 ++++++------------
 drivers/mfd/max8998.c                | 28 +++++++---------------------
 include/linux/mfd/max14577-private.h | 11 +----------
 include/linux/mfd/max14577.h         | 11 +----------
 include/linux/mfd/max77686-private.h | 15 +--------------
 include/linux/mfd/max77686.h         | 15 +--------------
 include/linux/mfd/max77693-common.h  |  6 +-----
 include/linux/mfd/max77693-private.h | 15 +--------------
 include/linux/mfd/max77693.h         | 15 +--------------
 include/linux/mfd/max77843-private.h |  6 +-----
 include/linux/mfd/max8997-private.h  | 15 +--------------
 include/linux/mfd/max8997.h          | 15 +--------------
 include/linux/mfd/max8998-private.h  | 15 +--------------
 include/linux/mfd/max8998.h          | 15 +--------------
 20 files changed, 76 insertions(+), 297 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c
index 6cbe96b28f42..ebb13d5de530 100644
--- a/drivers/mfd/max14577.c
+++ b/drivers/mfd/max14577.c
@@ -1,22 +1,12 @@
-/*
- * max14577.c - mfd core driver for the Maxim 14577/77836
- *
- * Copyright (C) 2014 Samsung Electronics
- * Chanwoo Choi <cw00.choi@samsung.com>
- * Krzysztof Kozlowski <krzk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * This driver is based on max8997.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max14577.c - mfd core driver for the Maxim 14577/77836
+//
+// Copyright (C) 2014 Samsung Electronics
+// Chanwoo Choi <cw00.choi@samsung.com>
+// Krzysztof Kozlowski <krzk@kernel.org>
+//
+// This driver is based on max8997.c
 
 #include <linux/err.h>
 #include <linux/module.h>
diff --git a/drivers/mfd/max77686.c b/drivers/mfd/max77686.c
index b0e8e13c0049..71faf503844b 100644
--- a/drivers/mfd/max77686.c
+++ b/drivers/mfd/max77686.c
@@ -1,26 +1,12 @@
-/*
- * max77686.c - mfd core driver for the Maxim 77686/802
- *
- * Copyright (C) 2012 Samsung Electronics
- * Chiwoong Byun <woong.byun@samsung.com>
- * Jonghwa Lee <jonghwa3.lee@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8997.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max77686.c - mfd core driver for the Maxim 77686/802
+//
+// Copyright (C) 2012 Samsung Electronics
+// Chiwoong Byun <woong.byun@samsung.com>
+// Jonghwa Lee <jonghwa3.lee@samsung.com>
+//
+//This driver is based on max8997.c
 
 #include <linux/export.h>
 #include <linux/slab.h>
diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index 1c05ea0cba61..901d99d65924 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -1,27 +1,13 @@
-/*
- * max77693.c - mfd core driver for the MAX 77693
- *
- * Copyright (C) 2012 Samsung Electronics
- * SangYoung Son <hello.son@samsung.com>
- *
- * This program is not provided / owned by Maxim Integrated Products.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8997.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max77693.c - mfd core driver for the MAX 77693
+//
+// Copyright (C) 2012 Samsung Electronics
+// SangYoung Son <hello.son@samsung.com>
+//
+// This program is not provided / owned by Maxim Integrated Products.
+//
+// This driver is based on max8997.c
 
 #include <linux/module.h>
 #include <linux/slab.h>
diff --git a/drivers/mfd/max77843.c b/drivers/mfd/max77843.c
index da9612dbb222..25cbb2242b26 100644
--- a/drivers/mfd/max77843.c
+++ b/drivers/mfd/max77843.c
@@ -1,15 +1,10 @@
-/*
- * MFD core driver for the Maxim MAX77843
- *
- * Copyright (C) 2015 Samsung Electronics
- * Author: Jaewon Kim <jaewon02.kim@samsung.com>
- * Author: Beomho Seo <beomho.seo@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// MFD core driver for the Maxim MAX77843
+//
+// Copyright (C) 2015 Samsung Electronics
+// Author: Jaewon Kim <jaewon02.kim@samsung.com>
+// Author: Beomho Seo <beomho.seo@samsung.com>
 
 #include <linux/err.h>
 #include <linux/i2c.h>
diff --git a/drivers/mfd/max8997-irq.c b/drivers/mfd/max8997-irq.c
index 326f17b632a7..93a3b1698d9c 100644
--- a/drivers/mfd/max8997-irq.c
+++ b/drivers/mfd/max8997-irq.c
@@ -1,25 +1,11 @@
-/*
- * max8997-irq.c - Interrupt controller support for MAX8997
- *
- * Copyright (C) 2011 Samsung Electronics Co.Ltd
- * MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8998-irq.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max8997-irq.c - Interrupt controller support for MAX8997
+//
+// Copyright (C) 2011 Samsung Electronics Co.Ltd
+// MyungJoo Ham <myungjoo.ham@samsung.com>
+//
+// This driver is based on max8998-irq.c
 
 #include <linux/err.h>
 #include <linux/irq.h>
diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c
index 3f554c447521..8dbae31911a1 100644
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c
@@ -1,25 +1,11 @@
-/*
- * max8997.c - mfd core driver for the Maxim 8966 and 8997
- *
- * Copyright (C) 2011 Samsung Electronics
- * MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- * This driver is based on max8998.c
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max8997.c - mfd core driver for the Maxim 8966 and 8997
+//
+// Copyright (C) 2011 Samsung Electronics
+// MyungJoo Ham <myungjoo.ham@samsung.com>
+//
+// This driver is based on max8998.c
 
 #include <linux/err.h>
 #include <linux/slab.h>
diff --git a/drivers/mfd/max8998-irq.c b/drivers/mfd/max8998-irq.c
index 90bad9ffa7e2..83b6f510bc05 100644
--- a/drivers/mfd/max8998-irq.c
+++ b/drivers/mfd/max8998-irq.c
@@ -1,15 +1,9 @@
-/*
- * Interrupt controller support for MAX8998
- *
- * Copyright (C) 2010 Samsung Electronics Co.Ltd
- * Author: Joonyoung Shim <jy0922.shim@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Interrupt controller support for MAX8998
+//
+// Copyright (C) 2010 Samsung Electronics Co.Ltd
+// Author: Joonyoung Shim <jy0922.shim@samsung.com>
 
 #include <linux/device.h>
 #include <linux/interrupt.h>
diff --git a/drivers/mfd/max8998.c b/drivers/mfd/max8998.c
index b1d3f70782d9..56409df120f8 100644
--- a/drivers/mfd/max8998.c
+++ b/drivers/mfd/max8998.c
@@ -1,24 +1,10 @@
-/*
- * max8998.c - mfd core driver for the Maxim 8998
- *
- *  Copyright (C) 2009-2010 Samsung Electronics
- *  Kyungmin Park <kyungmin.park@samsung.com>
- *  Marek Szyprowski <m.szyprowski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
+// SPDX-License-Identifier: GPL-2.0+
+//
+// max8998.c - mfd core driver for the Maxim 8998
+//
+//  Copyright (C) 2009-2010 Samsung Electronics
+//  Kyungmin Park <kyungmin.park@samsung.com>
+//  Marek Szyprowski <m.szyprowski@samsung.com>
 
 #include <linux/err.h>
 #include <linux/init.h>
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index df75234f979d..a21374f8ad26 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -1,19 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max14577-private.h - Common API for the Maxim 14577/77836 internal sub chip
  *
  * Copyright (C) 2014 Samsung Electrnoics
  * Chanwoo Choi <cw00.choi@samsung.com>
  * Krzysztof Kozlowski <krzk@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #ifndef __MAX14577_PRIVATE_H__
diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h
index d81b52bb8bee..8b3ef891ba42 100644
--- a/include/linux/mfd/max14577.h
+++ b/include/linux/mfd/max14577.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max14577.h - Driver for the Maxim 14577/77836
  *
@@ -5,16 +6,6 @@
  * Chanwoo Choi <cw00.choi@samsung.com>
  * Krzysztof Kozlowski <krzk@kernel.org>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * This driver is based on max8997.h
  *
  * MAX14577 has MUIC, Charger devices.
diff --git a/include/linux/mfd/max77686-private.h b/include/linux/mfd/max77686-private.h
index 643dae777b43..833e578e051e 100644
--- a/include/linux/mfd/max77686-private.h
+++ b/include/linux/mfd/max77686-private.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max77686-private.h - Voltage regulator driver for the Maxim 77686/802
  *
  *  Copyright (C) 2012 Samsung Electrnoics
  *  Chiwoong Byun <woong.byun@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #ifndef __LINUX_MFD_MAX77686_PRIV_H
diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h
index d4b72d519115..d0fb510875e6 100644
--- a/include/linux/mfd/max77686.h
+++ b/include/linux/mfd/max77686.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max77686.h - Driver for the Maxim 77686/802
  *
  *  Copyright (C) 2012 Samsung Electrnoics
  *  Chiwoong Byun <woong.byun@samsung.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
  * This driver is based on max8997.h
  *
  * MAX77686 has PMIC, RTC devices.
diff --git a/include/linux/mfd/max77693-common.h b/include/linux/mfd/max77693-common.h
index 095b121aa725..a5bce099f1ed 100644
--- a/include/linux/mfd/max77693-common.h
+++ b/include/linux/mfd/max77693-common.h
@@ -1,12 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Common data shared between Maxim 77693 and 77843 drivers
  *
  * Copyright (C) 2015 Samsung Electronics
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_MFD_MAX77693_COMMON_H
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 3c7a63b98ad6..e798c81aec31 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max77693-private.h - Voltage regulator driver for the Maxim 77693
  *
@@ -5,20 +6,6 @@
  *  SangYoung Son <hello.son@samsung.com>
  *
  * This program is not provided / owned by Maxim Integrated Products.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #ifndef __LINUX_MFD_MAX77693_PRIV_H
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index d450f687301b..c67c16ba8649 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max77693.h - Driver for the Maxim 77693
  *
@@ -6,20 +7,6 @@
  *
  * This program is not provided / owned by Maxim Integrated Products.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
  * This driver is based on max8997.h
  *
  * MAX77693 has PMIC, Charger, Flash LED, Haptic, MUIC devices.
diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h
index b8908bf8d315..0bc7454c4dbe 100644
--- a/include/linux/mfd/max77843-private.h
+++ b/include/linux/mfd/max77843-private.h
@@ -1,14 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * Common variables for the Maxim MAX77843 driver
  *
  * Copyright (C) 2015 Samsung Electronics
  * Author: Jaewon Kim <jaewon02.kim@samsung.com>
  * Author: Beomho Seo <beomho.seo@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __MAX77843_PRIVATE_H_
diff --git a/include/linux/mfd/max8997-private.h b/include/linux/mfd/max8997-private.h
index 78c76cd4d37b..a10cd6945232 100644
--- a/include/linux/mfd/max8997-private.h
+++ b/include/linux/mfd/max8997-private.h
@@ -1,22 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max8997-private.h - Voltage regulator driver for the Maxim 8997
  *
  *  Copyright (C) 2010 Samsung Electrnoics
  *  MyungJoo Ham <myungjoo.ham@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #ifndef __LINUX_MFD_MAX8997_PRIV_H
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index cf815577bd68..3c4e920b4727 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max8997.h - Driver for the Maxim 8997/8966
  *
  *  Copyright (C) 2009-2010 Samsung Electrnoics
  *  MyungJoo Ham <myungjoo.ham@samsung.com>
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
  * This driver is based on max8998.h
  *
  * MAX8997 has PMIC, MUIC, HAPTIC, RTC, FLASH, and Fuel Gauge devices.
diff --git a/include/linux/mfd/max8998-private.h b/include/linux/mfd/max8998-private.h
index d68ada502ff3..6deb5f577602 100644
--- a/include/linux/mfd/max8998-private.h
+++ b/include/linux/mfd/max8998-private.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max8998-private.h - Voltage regulator driver for the Maxim 8998
  *
  *  Copyright (C) 2009-2010 Samsung Electrnoics
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *  Marek Szyprowski <m.szyprowski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #ifndef __LINUX_MFD_MAX8998_PRIV_H
diff --git a/include/linux/mfd/max8998.h b/include/linux/mfd/max8998.h
index e3956a654cbc..061af220dcd3 100644
--- a/include/linux/mfd/max8998.h
+++ b/include/linux/mfd/max8998.h
@@ -1,23 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * max8998.h - Voltage regulator driver for the Maxim 8998
  *
  *  Copyright (C) 2009-2010 Samsung Electrnoics
  *  Kyungmin Park <kyungmin.park@samsung.com>
  *  Marek Szyprowski <m.szyprowski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
 
 #ifndef __LINUX_MFD_MAX8998_H
-- 
cgit v1.2.3


From 3d51ec93a564a0f87d1276f067732be904816a53 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Tue, 21 Aug 2018 19:16:12 +0200
Subject: mfd: Add ingenic-tcu.h header

This header contains macros for the registers that are present in the
regmap shared by all the drivers related to the TCU (Timer Counter Unit)
of the Ingenic JZ47xx SoCs.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/ingenic-tcu.h | 56 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 include/linux/mfd/ingenic-tcu.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/ingenic-tcu.h b/include/linux/mfd/ingenic-tcu.h
new file mode 100644
index 000000000000..ab16ad283def
--- /dev/null
+++ b/include/linux/mfd/ingenic-tcu.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for the Ingenic JZ47xx TCU driver
+ */
+#ifndef __LINUX_MFD_INGENIC_TCU_H_
+#define __LINUX_MFD_INGENIC_TCU_H_
+
+#include <linux/bitops.h>
+
+#define TCU_REG_WDT_TDR		0x00
+#define TCU_REG_WDT_TCER	0x04
+#define TCU_REG_WDT_TCNT	0x08
+#define TCU_REG_WDT_TCSR	0x0c
+#define TCU_REG_TER		0x10
+#define TCU_REG_TESR		0x14
+#define TCU_REG_TECR		0x18
+#define TCU_REG_TSR		0x1c
+#define TCU_REG_TFR		0x20
+#define TCU_REG_TFSR		0x24
+#define TCU_REG_TFCR		0x28
+#define TCU_REG_TSSR		0x2c
+#define TCU_REG_TMR		0x30
+#define TCU_REG_TMSR		0x34
+#define TCU_REG_TMCR		0x38
+#define TCU_REG_TSCR		0x3c
+#define TCU_REG_TDFR0		0x40
+#define TCU_REG_TDHR0		0x44
+#define TCU_REG_TCNT0		0x48
+#define TCU_REG_TCSR0		0x4c
+#define TCU_REG_OST_DR		0xe0
+#define TCU_REG_OST_CNTL	0xe4
+#define TCU_REG_OST_CNTH	0xe8
+#define TCU_REG_OST_TCSR	0xec
+#define TCU_REG_TSTR		0xf0
+#define TCU_REG_TSTSR		0xf4
+#define TCU_REG_TSTCR		0xf8
+#define TCU_REG_OST_CNTHBUF	0xfc
+
+#define TCU_TCSR_RESERVED_BITS		0x3f
+#define TCU_TCSR_PARENT_CLOCK_MASK	0x07
+#define TCU_TCSR_PRESCALE_LSB		3
+#define TCU_TCSR_PRESCALE_MASK		0x38
+
+#define TCU_TCSR_PWM_SD		BIT(9)	/* 0: Shutdown abruptly 1: gracefully */
+#define TCU_TCSR_PWM_INITL_HIGH	BIT(8)	/* Sets the initial output level */
+#define TCU_TCSR_PWM_EN		BIT(7)	/* PWM pin output enable */
+
+#define TCU_WDT_TCER_TCEN	BIT(0)	/* Watchdog timer enable */
+
+#define TCU_CHANNEL_STRIDE	0x10
+#define TCU_REG_TDFRc(c)	(TCU_REG_TDFR0 + ((c) * TCU_CHANNEL_STRIDE))
+#define TCU_REG_TDHRc(c)	(TCU_REG_TDHR0 + ((c) * TCU_CHANNEL_STRIDE))
+#define TCU_REG_TCNTc(c)	(TCU_REG_TCNT0 + ((c) * TCU_CHANNEL_STRIDE))
+#define TCU_REG_TCSRc(c)	(TCU_REG_TCSR0 + ((c) * TCU_CHANNEL_STRIDE))
+
+#endif /* __LINUX_MFD_INGENIC_TCU_H_ */
-- 
cgit v1.2.3


From 6360e40f421fba0a11bbabaa211c9c22c5cf5e61 Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.cirrus.com>
Date: Tue, 28 Aug 2018 09:41:58 +0100
Subject: mfd: madera: Remove unused forward reference

The madera_irqchip_pdata struct was replaced by the irq_flags
member of struct madera_pdata so the forward reference is
obsolete.

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/madera/pdata.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/madera/pdata.h b/include/linux/mfd/madera/pdata.h
index 0b311f39c8f4..8dc852402dbb 100644
--- a/include/linux/mfd/madera/pdata.h
+++ b/include/linux/mfd/madera/pdata.h
@@ -24,7 +24,6 @@
 
 struct gpio_desc;
 struct pinctrl_map;
-struct madera_irqchip_pdata;
 struct madera_codec_pdata;
 
 /**
-- 
cgit v1.2.3


From 55143439b7b501882bea9d95a54adfe00ffc79a3 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Tue, 28 Aug 2018 17:02:40 -0300
Subject: mfd: mc13xxx-core: Fix PMIC shutdown when reading ADC values

When trying to read any MC13892 ADC channel on a imx51-babbage board:

The MC13892 PMIC shutdowns completely.

After debugging this issue and comparing the MC13892 and MC13783
initializations done in the vendor kernel, it was noticed that the
CHRGRAWDIV bit of the ADC0 register was not being set.

This bit is set by default after power on, but the driver was
clearing it.

After setting this bit it is possible to read the ADC values correctly.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Tested-by: Chris Healy <cphealy@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/mc13xxx-core.c  | 3 ++-
 include/linux/mfd/mc13xxx.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c
index c63e331738c1..f475e848252f 100644
--- a/drivers/mfd/mc13xxx-core.c
+++ b/drivers/mfd/mc13xxx-core.c
@@ -276,7 +276,8 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode,
 
 	mc13xxx_reg_read(mc13xxx, MC13XXX_ADC0, &old_adc0);
 
-	adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2;
+	adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2 |
+	       MC13XXX_ADC0_CHRGRAWDIV;
 	adc1 = MC13XXX_ADC1_ADEN | MC13XXX_ADC1_ADTRIGIGN | MC13XXX_ADC1_ASC;
 
 	/*
diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h
index 54a3cd808f9e..2ad9bdc0a5ec 100644
--- a/include/linux/mfd/mc13xxx.h
+++ b/include/linux/mfd/mc13xxx.h
@@ -249,6 +249,7 @@ struct mc13xxx_platform_data {
 #define MC13XXX_ADC0_TSMOD0		(1 << 12)
 #define MC13XXX_ADC0_TSMOD1		(1 << 13)
 #define MC13XXX_ADC0_TSMOD2		(1 << 14)
+#define MC13XXX_ADC0_CHRGRAWDIV		(1 << 15)
 #define MC13XXX_ADC0_ADINC1		(1 << 16)
 #define MC13XXX_ADC0_ADINC2		(1 << 17)
 
-- 
cgit v1.2.3


From 9f8ddee1dab836ca758ca8fc555ab5a3aaa5d3fd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Aug 2018 19:52:52 +0300
Subject: mfd: intel_soc_pmic_bxtwc: Chain power button IRQs as well

Power button IRQ actually has a second level of interrupts to
distinguish between UI and POWER buttons. Moreover, current
implementation looks awkward in approach to handle second level IRQs by
first level related IRQ chip.

To address above issues, split power button IRQ to be chained as well.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/intel_soc_pmic_bxtwc.c | 41 ++++++++++++++++++++++++++++----------
 include/linux/mfd/intel_soc_pmic.h |  1 +
 2 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c
index 15bc052704a6..9ca1f8c015de 100644
--- a/drivers/mfd/intel_soc_pmic_bxtwc.c
+++ b/drivers/mfd/intel_soc_pmic_bxtwc.c
@@ -31,8 +31,8 @@
 
 /* Interrupt Status Registers */
 #define BXTWC_IRQLVL1		0x4E02
-#define BXTWC_PWRBTNIRQ		0x4E03
 
+#define BXTWC_PWRBTNIRQ		0x4E03
 #define BXTWC_THRM0IRQ		0x4E04
 #define BXTWC_THRM1IRQ		0x4E05
 #define BXTWC_THRM2IRQ		0x4E06
@@ -47,10 +47,9 @@
 
 /* Interrupt MASK Registers */
 #define BXTWC_MIRQLVL1		0x4E0E
-#define BXTWC_MPWRTNIRQ		0x4E0F
-
 #define BXTWC_MIRQLVL1_MCHGR	BIT(5)
 
+#define BXTWC_MPWRBTNIRQ	0x4E0F
 #define BXTWC_MTHRM0IRQ		0x4E12
 #define BXTWC_MTHRM1IRQ		0x4E13
 #define BXTWC_MTHRM2IRQ		0x4E14
@@ -66,9 +65,7 @@
 /* Whiskey Cove PMIC share same ACPI ID between different platforms */
 #define BROXTON_PMIC_WC_HRV	4
 
-/* Manage in two IRQ chips since mask registers are not consecutive */
 enum bxtwc_irqs {
-	/* Level 1 */
 	BXTWC_PWRBTN_LVL1_IRQ = 0,
 	BXTWC_TMU_LVL1_IRQ,
 	BXTWC_THRM_LVL1_IRQ,
@@ -77,9 +74,11 @@ enum bxtwc_irqs {
 	BXTWC_CHGR_LVL1_IRQ,
 	BXTWC_GPIO_LVL1_IRQ,
 	BXTWC_CRIT_LVL1_IRQ,
+};
 
-	/* Level 2 */
-	BXTWC_PWRBTN_IRQ,
+enum bxtwc_irqs_pwrbtn {
+	BXTWC_PWRBTN_IRQ = 0,
+	BXTWC_UIBTN_IRQ,
 };
 
 enum bxtwc_irqs_bcu {
@@ -113,7 +112,10 @@ static const struct regmap_irq bxtwc_regmap_irqs[] = {
 	REGMAP_IRQ_REG(BXTWC_CHGR_LVL1_IRQ, 0, BIT(5)),
 	REGMAP_IRQ_REG(BXTWC_GPIO_LVL1_IRQ, 0, BIT(6)),
 	REGMAP_IRQ_REG(BXTWC_CRIT_LVL1_IRQ, 0, BIT(7)),
-	REGMAP_IRQ_REG(BXTWC_PWRBTN_IRQ, 1, 0x03),
+};
+
+static const struct regmap_irq bxtwc_regmap_irqs_pwrbtn[] = {
+	REGMAP_IRQ_REG(BXTWC_PWRBTN_IRQ, 0, 0x01),
 };
 
 static const struct regmap_irq bxtwc_regmap_irqs_bcu[] = {
@@ -125,7 +127,7 @@ static const struct regmap_irq bxtwc_regmap_irqs_adc[] = {
 };
 
 static const struct regmap_irq bxtwc_regmap_irqs_chgr[] = {
-	REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 0, BIT(5)),
+	REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 0, 0x20),
 	REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 0, 0x1f),
 	REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 1, 0x1f),
 };
@@ -144,7 +146,16 @@ static struct regmap_irq_chip bxtwc_regmap_irq_chip = {
 	.mask_base = BXTWC_MIRQLVL1,
 	.irqs = bxtwc_regmap_irqs,
 	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs),
-	.num_regs = 2,
+	.num_regs = 1,
+};
+
+static struct regmap_irq_chip bxtwc_regmap_irq_chip_pwrbtn = {
+	.name = "bxtwc_irq_chip_pwrbtn",
+	.status_base = BXTWC_PWRBTNIRQ,
+	.mask_base = BXTWC_MPWRBTNIRQ,
+	.irqs = bxtwc_regmap_irqs_pwrbtn,
+	.num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_pwrbtn),
+	.num_regs = 1,
 };
 
 static struct regmap_irq_chip bxtwc_regmap_irq_chip_tmu = {
@@ -472,6 +483,16 @@ static int bxtwc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data,
+					 BXTWC_PWRBTN_LVL1_IRQ,
+					 IRQF_ONESHOT,
+					 &bxtwc_regmap_irq_chip_pwrbtn,
+					 &pmic->irq_chip_data_pwrbtn);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to add PWRBTN IRQ chip\n");
+		return ret;
+	}
+
 	ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data,
 					 BXTWC_TMU_LVL1_IRQ,
 					 IRQF_ONESHOT,
diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h
index 5aacdb017a9f..806a4f095312 100644
--- a/include/linux/mfd/intel_soc_pmic.h
+++ b/include/linux/mfd/intel_soc_pmic.h
@@ -25,6 +25,7 @@ struct intel_soc_pmic {
 	int irq;
 	struct regmap *regmap;
 	struct regmap_irq_chip_data *irq_chip_data;
+	struct regmap_irq_chip_data *irq_chip_data_pwrbtn;
 	struct regmap_irq_chip_data *irq_chip_data_tmu;
 	struct regmap_irq_chip_data *irq_chip_data_bcu;
 	struct regmap_irq_chip_data *irq_chip_data_adc;
-- 
cgit v1.2.3


From 26c7e05a69d3843abb0e5389e7f538cd175c0f09 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Aug 2018 19:52:54 +0300
Subject: mfd: Convert Intel PMIC drivers to use SPDX identifier 1;5201;0c
 Reduce size of duplicated comments by switching to use SPDX identifier.

No functional change.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/intel_msic.c                 |  5 +----
 drivers/mfd/intel_soc_pmic_bxtwc.c       | 10 +---------
 drivers/mfd/intel_soc_pmic_chtdc_ti.c    |  5 +----
 drivers/mfd/intel_soc_pmic_chtwc.c       |  5 +----
 drivers/mfd/intel_soc_pmic_core.c        | 12 ++----------
 drivers/mfd/intel_soc_pmic_core.h        | 12 ++----------
 drivers/mfd/intel_soc_pmic_crc.c         | 12 ++----------
 include/linux/mfd/intel_msic.h           |  7 ++-----
 include/linux/mfd/intel_soc_pmic.h       | 12 ++----------
 include/linux/mfd/intel_soc_pmic_bxtwc.h | 10 +---------
 10 files changed, 15 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
index 441de7b3d231..bb24c2a07900 100644
--- a/drivers/mfd/intel_msic.c
+++ b/drivers/mfd/intel_msic.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Driver for Intel MSIC
  *
  * Copyright (C) 2011, Intel Corporation
  * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/err.h>
diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c
index c878724497a9..6310c3bdb991 100644
--- a/drivers/mfd/intel_soc_pmic_bxtwc.c
+++ b/drivers/mfd/intel_soc_pmic_bxtwc.c
@@ -1,16 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * MFD core driver for Intel Broxton Whiskey Cove PMIC
  *
  * Copyright (C) 2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #include <linux/acpi.h>
diff --git a/drivers/mfd/intel_soc_pmic_chtdc_ti.c b/drivers/mfd/intel_soc_pmic_chtdc_ti.c
index 861277c6580a..64b5c3cc30e7 100644
--- a/drivers/mfd/intel_soc_pmic_chtdc_ti.c
+++ b/drivers/mfd/intel_soc_pmic_chtdc_ti.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Device access for Dollar Cove TI PMIC
  *
@@ -6,10 +7,6 @@
  *
  * Cleanup and forward-ported
  *   Copyright (c) 2017 Takashi Iwai <tiwai@suse.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/acpi.h>
diff --git a/drivers/mfd/intel_soc_pmic_chtwc.c b/drivers/mfd/intel_soc_pmic_chtwc.c
index b35da01d5bcf..64a3aece9c5e 100644
--- a/drivers/mfd/intel_soc_pmic_chtwc.c
+++ b/drivers/mfd/intel_soc_pmic_chtwc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * MFD core driver for Intel Cherrytrail Whiskey Cove PMIC
  *
@@ -5,10 +6,6 @@
  *
  * Based on various non upstream patches to support the CHT Whiskey Cove PMIC:
  * Copyright (C) 2013-2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/acpi.h>
diff --git a/drivers/mfd/intel_soc_pmic_core.c b/drivers/mfd/intel_soc_pmic_core.c
index 170d5ed16a3b..c9f35378d391 100644
--- a/drivers/mfd/intel_soc_pmic_core.c
+++ b/drivers/mfd/intel_soc_pmic_core.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * intel_soc_pmic_core.c - Intel SoC PMIC MFD Driver
+ * Intel SoC PMIC MFD Driver
  *
  * Copyright (C) 2013, 2014 Intel Corporation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Author: Yang, Bin <bin.yang@intel.com>
  * Author: Zhu, Lejun <lejun.zhu@linux.intel.com>
  */
diff --git a/drivers/mfd/intel_soc_pmic_core.h b/drivers/mfd/intel_soc_pmic_core.h
index 90a1416d4dac..d490685845eb 100644
--- a/drivers/mfd/intel_soc_pmic_core.h
+++ b/drivers/mfd/intel_soc_pmic_core.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * intel_soc_pmic_core.h - Intel SoC PMIC MFD Driver
+ * Intel SoC PMIC MFD Driver
  *
  * Copyright (C) 2012-2014 Intel Corporation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Author: Yang, Bin <bin.yang@intel.com>
  * Author: Zhu, Lejun <lejun.zhu@linux.intel.com>
  */
diff --git a/drivers/mfd/intel_soc_pmic_crc.c b/drivers/mfd/intel_soc_pmic_crc.c
index 5ac6f3710294..b6ab72fa0569 100644
--- a/drivers/mfd/intel_soc_pmic_crc.c
+++ b/drivers/mfd/intel_soc_pmic_crc.c
@@ -1,17 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
- * intel_soc_pmic_crc.c - Device access for Crystal Cove PMIC
+ * Device access for Crystal Cove PMIC
  *
  * Copyright (C) 2013, 2014 Intel Corporation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Author: Yang, Bin <bin.yang@intel.com>
  * Author: Zhu, Lejun <lejun.zhu@linux.intel.com>
  */
diff --git a/include/linux/mfd/intel_msic.h b/include/linux/mfd/intel_msic.h
index 439a7a617bc9..317e8608cf41 100644
--- a/include/linux/mfd/intel_msic.h
+++ b/include/linux/mfd/intel_msic.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * include/linux/mfd/intel_msic.h - Core interface for Intel MSIC
+ * Core interface for Intel MSIC
  *
  * Copyright (C) 2011, Intel Corporation
  * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __LINUX_MFD_INTEL_MSIC_H__
diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h
index 806a4f095312..ed1dfba5e5f9 100644
--- a/include/linux/mfd/intel_soc_pmic.h
+++ b/include/linux/mfd/intel_soc_pmic.h
@@ -1,17 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
- * intel_soc_pmic.h - Intel SoC PMIC Driver
+ * Intel SoC PMIC Driver
  *
  * Copyright (C) 2012-2014 Intel Corporation. All rights reserved.
  *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Author: Yang, Bin <bin.yang@intel.com>
  * Author: Zhu, Lejun <lejun.zhu@linux.intel.com>
  */
diff --git a/include/linux/mfd/intel_soc_pmic_bxtwc.h b/include/linux/mfd/intel_soc_pmic_bxtwc.h
index 0c351bc85d2d..9be566cc58c6 100644
--- a/include/linux/mfd/intel_soc_pmic_bxtwc.h
+++ b/include/linux/mfd/intel_soc_pmic_bxtwc.h
@@ -1,16 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Header file for Intel Broxton Whiskey Cove PMIC
  *
  * Copyright (C) 2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
  */
 
 #ifndef __INTEL_BXTWC_H__
-- 
cgit v1.2.3


From efddff27c886e729a7f84a7205bd84d7d4af7336 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 5 Sep 2018 13:54:07 +0200
Subject: mfd: max8997: Enale irq-wakeup unconditionally

IRQ wake up support for MAX8997 driver was initially configured by
respective property in pdata. However, after the driver conversion to
device-tree, setting it was left as 'todo'. Nowadays most of other PMIC MFD
drivers initialized from device-tree assume that they can be an irq wakeup
source, so enable it also for MAX8997. This fixes support for wakeup from
MAX8997 RTC alarm.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max8997.c       | 8 +-------
 include/linux/mfd/max8997.h | 1 -
 2 files changed, 1 insertion(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c
index 8dbae31911a1..f99e8da99782 100644
--- a/drivers/mfd/max8997.c
+++ b/drivers/mfd/max8997.c
@@ -139,12 +139,6 @@ static struct max8997_platform_data *max8997_i2c_parse_dt_pdata(
 
 	pd->ono = irq_of_parse_and_map(dev->of_node, 1);
 
-	/*
-	 * ToDo: the 'wakeup' member in the platform data is more of a linux
-	 * specfic information. Hence, there is no binding for that yet and
-	 * not parsed here.
-	 */
-
 	return pd;
 }
 
@@ -232,7 +226,7 @@ static int max8997_i2c_probe(struct i2c_client *i2c,
 	 */
 
 	/* MAX8997 has a power button input. */
-	device_init_wakeup(max8997->dev, pdata->wakeup);
+	device_init_wakeup(max8997->dev, true);
 
 	return ret;
 
diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h
index 3c4e920b4727..e955e2f0a2cc 100644
--- a/include/linux/mfd/max8997.h
+++ b/include/linux/mfd/max8997.h
@@ -165,7 +165,6 @@ struct max8997_led_platform_data {
 struct max8997_platform_data {
 	/* IRQ */
 	int ono;
-	int wakeup;
 
 	/* ---- PMIC ---- */
 	struct max8997_regulator_data *regulators;
-- 
cgit v1.2.3


From 7a6a395b16c3fdcd2ede2b0ce4bf63f6dafc1fea Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Tue, 11 Sep 2018 23:01:33 +0200
Subject: mfd: ti-lmu: Switch to GPIOD

Use new descriptor based API instead of the legacy one.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ti-lmu.c       | 28 ++++++++++++----------------
 include/linux/mfd/ti-lmu.h |  3 ++-
 2 files changed, 14 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti-lmu.c b/drivers/mfd/ti-lmu.c
index 990437e5ed0a..e14cb9f41b44 100644
--- a/drivers/mfd/ti-lmu.c
+++ b/drivers/mfd/ti-lmu.c
@@ -12,7 +12,7 @@
 
 #include <linux/delay.h>
 #include <linux/err.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
 #include <linux/mfd/core.h>
@@ -21,7 +21,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/slab.h>
 
 struct ti_lmu_data {
@@ -32,17 +31,8 @@ struct ti_lmu_data {
 
 static int ti_lmu_enable_hw(struct ti_lmu *lmu, enum ti_lmu_id id)
 {
-	int ret;
-
-	if (gpio_is_valid(lmu->en_gpio)) {
-		ret = devm_gpio_request_one(lmu->dev, lmu->en_gpio,
-					    GPIOF_OUT_INIT_HIGH, "lmu_hwen");
-		if (ret) {
-			dev_err(lmu->dev, "Can not request enable GPIO: %d\n",
-				ret);
-			return ret;
-		}
-	}
+	if (lmu->en_gpio)
+		gpiod_set_value(lmu->en_gpio, 1);
 
 	/* Delay about 1ms after HW enable pin control */
 	usleep_range(1000, 1500);
@@ -59,8 +49,8 @@ static int ti_lmu_enable_hw(struct ti_lmu *lmu, enum ti_lmu_id id)
 
 static void ti_lmu_disable_hw(struct ti_lmu *lmu)
 {
-	if (gpio_is_valid(lmu->en_gpio))
-		gpio_set_value(lmu->en_gpio, 0);
+	if (lmu->en_gpio)
+		gpiod_set_value(lmu->en_gpio, 0);
 }
 
 static const struct mfd_cell lm3532_devices[] = {
@@ -204,7 +194,13 @@ static int ti_lmu_probe(struct i2c_client *cl, const struct i2c_device_id *id)
 		return PTR_ERR(lmu->regmap);
 
 	/* HW enable pin control and additional power up sequence if required */
-	lmu->en_gpio = of_get_named_gpio(dev->of_node, "enable-gpios", 0);
+	lmu->en_gpio = devm_gpiod_get_optional(dev, "enable", GPIOD_OUT_HIGH);
+	if (IS_ERR(lmu->en_gpio)) {
+		ret = PTR_ERR(lmu->en_gpio);
+		dev_err(dev, "Can not request enable GPIO: %d\n", ret);
+		return ret;
+	}
+
 	ret = ti_lmu_enable_hw(lmu, id->driver_data);
 	if (ret)
 		return ret;
diff --git a/include/linux/mfd/ti-lmu.h b/include/linux/mfd/ti-lmu.h
index 09d5f30384e5..1ef51ed36be5 100644
--- a/include/linux/mfd/ti-lmu.h
+++ b/include/linux/mfd/ti-lmu.h
@@ -16,6 +16,7 @@
 #include <linux/gpio.h>
 #include <linux/notifier.h>
 #include <linux/regmap.h>
+#include <linux/gpio/consumer.h>
 
 /* Notifier event */
 #define LMU_EVENT_MONITOR_DONE		0x01
@@ -81,7 +82,7 @@ enum lm363x_regulator_id {
 struct ti_lmu {
 	struct device *dev;
 	struct regmap *regmap;
-	int en_gpio;
+	struct gpio_desc *en_gpio;
 	struct blocking_notifier_head notifier;
 };
 #endif
-- 
cgit v1.2.3


From 07d02a67b7faae56e184f6c35f78de47f06da37f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 12 Oct 2018 13:28:26 -0400
Subject: SUNRPC: Simplify lookup code

We no longer need to worry about whether or not the entry is hashed in
order to figure out if the contents are valid. We only care whether or
not the refcount is non-zero.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/auth.h | 12 +++++-------
 net/sunrpc/auth.c           | 19 ++++++++-----------
 2 files changed, 13 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 2c97a3933ef9..a7fc8f5a2dad 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -206,11 +206,11 @@ bool			rpcauth_cred_key_to_expire(struct rpc_auth *, struct rpc_cred *);
 char *			rpcauth_stringify_acceptor(struct rpc_cred *);
 
 static inline
-struct rpc_cred *	get_rpccred(struct rpc_cred *cred)
+struct rpc_cred *get_rpccred(struct rpc_cred *cred)
 {
-	if (cred != NULL)
-		atomic_inc(&cred->cr_count);
-	return cred;
+	if (cred != NULL && atomic_inc_not_zero(&cred->cr_count))
+		return cred;
+	return NULL;
 }
 
 /**
@@ -226,9 +226,7 @@ struct rpc_cred *	get_rpccred(struct rpc_cred *cred)
 static inline struct rpc_cred *
 get_rpccred_rcu(struct rpc_cred *cred)
 {
-	if (atomic_inc_not_zero(&cred->cr_count))
-		return cred;
-	return NULL;
+	return get_rpccred(cred);
 }
 
 #endif /* __KERNEL__ */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index c1576b110974..77748e572686 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -588,19 +588,15 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
 		if (flags & RPCAUTH_LOOKUP_RCU) {
-			if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
-			    !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
-				cred = entry;
+			if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) ||
+			    atomic_read(&entry->cr_count) == 0)
+				continue;
+			cred = entry;
 			break;
 		}
-		spin_lock(&cache->lock);
-		if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
-			spin_unlock(&cache->lock);
-			continue;
-		}
 		cred = get_rpccred(entry);
-		spin_unlock(&cache->lock);
-		break;
+		if (cred)
+			break;
 	}
 	rcu_read_unlock();
 
@@ -621,7 +617,8 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 		if (!entry->cr_ops->crmatch(acred, entry, flags))
 			continue;
 		cred = get_rpccred(entry);
-		break;
+		if (cred)
+			break;
 	}
 	if (cred == NULL) {
 		cred = new;
-- 
cgit v1.2.3


From 79b181810285a6b9b7a1aed25c365c9e1782e22a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sun, 14 Oct 2018 10:34:31 -0400
Subject: SUNRPC: Convert auth creds to use refcount_t

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/auth.h |  4 ++--
 net/sunrpc/auth.c           | 14 +++++++-------
 net/sunrpc/auth_null.c      |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index a7fc8f5a2dad..a71d4bd191e7 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -67,7 +67,7 @@ struct rpc_cred {
 	const struct rpc_credops *cr_ops;
 	unsigned long		cr_expire;	/* when to gc */
 	unsigned long		cr_flags;	/* various flags */
-	atomic_t		cr_count;	/* ref count */
+	refcount_t		cr_count;	/* ref count */
 
 	kuid_t			cr_uid;
 
@@ -208,7 +208,7 @@ char *			rpcauth_stringify_acceptor(struct rpc_cred *);
 static inline
 struct rpc_cred *get_rpccred(struct rpc_cred *cred)
 {
-	if (cred != NULL && atomic_inc_not_zero(&cred->cr_count))
+	if (cred != NULL && refcount_inc_not_zero(&cred->cr_count))
 		return cred;
 	return NULL;
 }
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 77748e572686..4903eda5dd61 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -495,7 +495,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 
 		if (nr_to_scan-- == 0)
 			break;
-		if (atomic_read(&cred->cr_count) > 1) {
+		if (refcount_read(&cred->cr_count) > 1) {
 			rpcauth_lru_remove_locked(cred);
 			continue;
 		}
@@ -589,7 +589,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 			continue;
 		if (flags & RPCAUTH_LOOKUP_RCU) {
 			if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) ||
-			    atomic_read(&entry->cr_count) == 0)
+			    refcount_read(&entry->cr_count) == 0)
 				continue;
 			cred = entry;
 			break;
@@ -623,7 +623,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 	if (cred == NULL) {
 		cred = new;
 		set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
-		atomic_inc(&cred->cr_count);
+		refcount_inc(&cred->cr_count);
 		hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
 	} else
 		list_add_tail(&new->cr_lru, &free);
@@ -670,7 +670,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 {
 	INIT_HLIST_NODE(&cred->cr_hash);
 	INIT_LIST_HEAD(&cred->cr_lru);
-	atomic_set(&cred->cr_count, 1);
+	refcount_set(&cred->cr_count, 1);
 	cred->cr_auth = auth;
 	cred->cr_ops = ops;
 	cred->cr_expire = jiffies;
@@ -739,9 +739,9 @@ put_rpccred(struct rpc_cred *cred)
 	if (cred == NULL)
 		return;
 	rcu_read_lock();
-	if (atomic_dec_and_test(&cred->cr_count))
+	if (refcount_dec_and_test(&cred->cr_count))
 		goto destroy;
-	if (atomic_read(&cred->cr_count) != 1 ||
+	if (refcount_read(&cred->cr_count) != 1 ||
 	    !test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
 		goto out;
 	if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
@@ -752,7 +752,7 @@ put_rpccred(struct rpc_cred *cred)
 			rpcauth_lru_remove(cred);
 	} else if (rpcauth_unhash_cred(cred)) {
 		rpcauth_lru_remove(cred);
-		if (atomic_dec_and_test(&cred->cr_count))
+		if (refcount_dec_and_test(&cred->cr_count))
 			goto destroy;
 	}
 out:
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index a7c00b4959f3..ea816d7000a4 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -138,6 +138,6 @@ struct rpc_cred null_cred = {
 	.cr_lru		= LIST_HEAD_INIT(null_cred.cr_lru),
 	.cr_auth	= &null_auth,
 	.cr_ops		= &null_credops,
-	.cr_count	= ATOMIC_INIT(2),
+	.cr_count	= REFCOUNT_INIT(2),
 	.cr_flags	= 1UL << RPCAUTH_CRED_UPTODATE,
 };
-- 
cgit v1.2.3


From 331bc71cb1751d78f6807ad8e6162b07c67cdd1b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Sun, 14 Oct 2018 10:40:29 -0400
Subject: SUNRPC: Convert the auth cred cache to use refcount_t

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/auth.h    | 2 +-
 net/sunrpc/auth.c              | 2 +-
 net/sunrpc/auth_generic.c      | 2 +-
 net/sunrpc/auth_gss/auth_gss.c | 4 ++--
 net/sunrpc/auth_null.c         | 4 ++--
 net/sunrpc/auth_unix.c         | 4 ++--
 6 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index a71d4bd191e7..c4db9424b63b 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -100,7 +100,7 @@ struct rpc_auth {
 						 * differ from the flavor in
 						 * au_ops->au_flavor in gss
 						 * case) */
-	atomic_t		au_count;	/* Reference counter */
+	refcount_t		au_count;	/* Reference counter */
 
 	struct rpc_cred_cache *	au_credcache;
 	/* per-flavor data */
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 4903eda5dd61..ad8ead738981 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -284,7 +284,7 @@ EXPORT_SYMBOL_GPL(rpcauth_create);
 void
 rpcauth_release(struct rpc_auth *auth)
 {
-	if (!atomic_dec_and_test(&auth->au_count))
+	if (!refcount_dec_and_test(&auth->au_count))
 		return;
 	auth->au_ops->destroy(auth);
 }
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index f1df9837f1ac..d8831b988b1e 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -274,7 +274,7 @@ static const struct rpc_authops generic_auth_ops = {
 
 static struct rpc_auth generic_auth = {
 	.au_ops = &generic_auth_ops,
-	.au_count = ATOMIC_INIT(0),
+	.au_count = REFCOUNT_INIT(1),
 };
 
 static bool generic_key_to_expire(struct rpc_cred *cred)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index c898a7c75e84..30f970cdc7f6 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1058,7 +1058,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 	auth->au_flavor = flavor;
 	if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor))
 		auth->au_flags |= RPCAUTH_AUTH_DATATOUCH;
-	atomic_set(&auth->au_count, 1);
+	refcount_set(&auth->au_count, 1);
 	kref_init(&gss_auth->kref);
 
 	err = rpcauth_init_credcache(auth);
@@ -1187,7 +1187,7 @@ gss_auth_find_or_add_hashed(const struct rpc_auth_create_args *args,
 			if (strcmp(gss_auth->target_name, args->target_name))
 				continue;
 		}
-		if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
+		if (!refcount_inc_not_zero(&gss_auth->rpc_auth.au_count))
 			continue;
 		goto out;
 	}
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index ea816d7000a4..2694a1bc026b 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -21,7 +21,7 @@ static struct rpc_cred null_cred;
 static struct rpc_auth *
 nul_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
-	atomic_inc(&null_auth.au_count);
+	refcount_inc(&null_auth.au_count);
 	return &null_auth;
 }
 
@@ -119,7 +119,7 @@ struct rpc_auth null_auth = {
 	.au_flags	= RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
 	.au_ops		= &authnull_ops,
 	.au_flavor	= RPC_AUTH_NULL,
-	.au_count	= ATOMIC_INIT(0),
+	.au_count	= REFCOUNT_INIT(1),
 };
 
 static
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 185e56d4f9ae..4c1c7e56288f 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -34,7 +34,7 @@ unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
 {
 	dprintk("RPC:       creating UNIX authenticator for client %p\n",
 			clnt);
-	atomic_inc(&unix_auth.au_count);
+	refcount_inc(&unix_auth.au_count);
 	return &unix_auth;
 }
 
@@ -239,7 +239,7 @@ struct rpc_auth		unix_auth = {
 	.au_flags	= RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
 	.au_ops		= &authunix_ops,
 	.au_flavor	= RPC_AUTH_UNIX,
-	.au_count	= ATOMIC_INIT(0),
+	.au_count	= REFCOUNT_INIT(1),
 };
 
 static
-- 
cgit v1.2.3


From 00e23707442a75b404392cef1405ab4fd498de6b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 22 Oct 2018 13:07:28 +0100
Subject: iov_iter: Use accessor function

Use accessor functions to access an iterator's type and direction.  This
allows for the possibility of using some other method of determining the
type of iterator than if-chains with bitwise-AND conditions.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 block/bio.c           |  2 +-
 fs/block_dev.c        |  2 +-
 fs/ceph/file.c        |  2 +-
 fs/cifs/file.c        |  4 ++--
 fs/cifs/misc.c        |  2 +-
 fs/cifs/smbdirect.c   | 17 ++++++++++++----
 fs/direct-io.c        |  2 +-
 fs/fuse/file.c        |  2 +-
 fs/iomap.c            |  2 +-
 include/linux/uio.h   | 48 +++++++++++++++++++++++++++++--------------
 lib/iov_iter.c        | 56 +++++++++++++++++++++++++--------------------------
 mm/filemap.c          |  2 +-
 net/9p/trans_virtio.c |  2 +-
 net/tls/tls_sw.c      |  4 ++--
 14 files changed, 87 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 0093bed81c0e..c55f36bbe12a 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1255,7 +1255,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 	/*
 	 * success
 	 */
-	if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) ||
+	if ((iov_iter_rw(iter) == WRITE && (!map_data || !map_data->null_mapped)) ||
 	    (map_data && map_data->from_user)) {
 		ret = bio_copy_from_iter(bio, iter);
 		if (ret)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 38b8ce05cbc7..a80b4f0ee7c4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -349,7 +349,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 
 	dio->size = 0;
 	dio->multi_bio = false;
-	dio->should_dirty = is_read && (iter->type == ITER_IOVEC);
+	dio->should_dirty = is_read && iter_is_iovec(iter);
 
 	blk_start_plug(&plug);
 	for (;;) {
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 92ab20433682..524ecc95b04d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -658,7 +658,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 	if (ret < 0)
 		return ret;
 
-	if (unlikely(to->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(to))) {
 		size_t page_off;
 		ret = iov_iter_get_pages_alloc(to, &pages, len,
 					       &page_off);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8d41ca7bfcf1..dcdbcb6f09f8 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2990,7 +2990,7 @@ cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
 		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
 		size_t written;
 
-		if (unlikely(iter->type & ITER_PIPE)) {
+		if (unlikely(iov_iter_is_pipe(iter))) {
 			void *addr = kmap_atomic(page);
 
 			written = copy_to_iter(addr, copy, iter);
@@ -3302,7 +3302,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
 	if (!is_sync_kiocb(iocb))
 		ctx->iocb = iocb;
 
-	if (to->type == ITER_IOVEC)
+	if (iter_is_iovec(to))
 		ctx->should_dirty = true;
 
 	rc = setup_aio_ctx_iter(ctx, to, READ);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 6926685e513c..7b5b960a04b8 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -786,7 +786,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 	struct page **pages = NULL;
 	struct bio_vec *bv = NULL;
 
-	if (iter->type & ITER_KVEC) {
+	if (iov_iter_is_kvec(iter)) {
 		memcpy(&ctx->iter, iter, sizeof(struct iov_iter));
 		ctx->len = count;
 		iov_iter_advance(iter, count);
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 5fdb9a509a97..5b05bf255268 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -2054,14 +2054,22 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
 
 	info->smbd_recv_pending++;
 
-	switch (msg->msg_iter.type) {
-	case READ | ITER_KVEC:
+	if (iov_iter_rw(&msg->msg_iter) == WRITE) {
+		/* It's a bug in upper layer to get there */
+		cifs_dbg(VFS, "CIFS: invalid msg iter dir %u\n",
+			 iov_iter_rw(&msg->msg_iter));
+		rc = -EINVAL;
+		goto out;
+	}
+
+	switch (iov_iter_type(&msg->msg_iter)) {
+	case ITER_KVEC:
 		buf = msg->msg_iter.kvec->iov_base;
 		to_read = msg->msg_iter.kvec->iov_len;
 		rc = smbd_recv_buf(info, buf, to_read);
 		break;
 
-	case READ | ITER_BVEC:
+	case ITER_BVEC:
 		page = msg->msg_iter.bvec->bv_page;
 		page_offset = msg->msg_iter.bvec->bv_offset;
 		to_read = msg->msg_iter.bvec->bv_len;
@@ -2071,10 +2079,11 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
 	default:
 		/* It's a bug in upper layer to get there */
 		cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
-			msg->msg_iter.type);
+			 iov_iter_type(&msg->msg_iter));
 		rc = -EINVAL;
 	}
 
+out:
 	info->smbd_recv_pending--;
 	wake_up(&info->wait_smbd_recv_pending);
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 093fb54cd316..722d17c88edb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1313,7 +1313,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	spin_lock_init(&dio->bio_lock);
 	dio->refcount = 1;
 
-	dio->should_dirty = (iter->type == ITER_IOVEC);
+	dio->should_dirty = iter_is_iovec(iter) && iov_iter_rw(iter) == READ;
 	sdio.iter = iter;
 	sdio.final_block_in_request = end >> blkbits;
 
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 32d0b883e74f..c9ccd45156dc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1271,7 +1271,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 	ssize_t ret = 0;
 
 	/* Special case for kernel I/O: can copy directly into the buffer */
-	if (ii->type & ITER_KVEC) {
+	if (iov_iter_is_kvec(ii)) {
 		unsigned long user_addr = fuse_get_user_addr(ii);
 		size_t frag_size = fuse_get_frag_size(ii, *nbytesp);
 
diff --git a/fs/iomap.c b/fs/iomap.c
index ec15cf2ec696..2c53400aa802 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1795,7 +1795,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		if (pos >= dio->i_size)
 			goto out_free_dio;
 
-		if (iter->type == ITER_IOVEC)
+		if (iter_is_iovec(iter) && iov_iter_rw(iter) == READ)
 			dio->flags |= IOMAP_DIO_DIRTY;
 	} else {
 		flags |= IOMAP_WRITE;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 422b1c01ee0d..fcabc959c794 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -21,7 +21,7 @@ struct kvec {
 	size_t iov_len;
 };
 
-enum {
+enum iter_type {
 	ITER_IOVEC = 0,
 	ITER_KVEC = 2,
 	ITER_BVEC = 4,
@@ -47,6 +47,36 @@ struct iov_iter {
 	};
 };
 
+static inline enum iter_type iov_iter_type(const struct iov_iter *i)
+{
+	return i->type & ~(READ | WRITE);
+}
+
+static inline bool iter_is_iovec(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_IOVEC;
+}
+
+static inline bool iov_iter_is_kvec(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_KVEC;
+}
+
+static inline bool iov_iter_is_bvec(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_BVEC;
+}
+
+static inline bool iov_iter_is_pipe(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_PIPE;
+}
+
+static inline unsigned char iov_iter_rw(const struct iov_iter *i)
+{
+	return i->type & (READ | WRITE);
+}
+
 /*
  * Total number of bytes covered by an iovec.
  *
@@ -74,7 +104,8 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter)
 }
 
 #define iov_for_each(iov, iter, start)				\
-	if (!((start).type & (ITER_BVEC | ITER_PIPE)))		\
+	if (iov_iter_type(start) == ITER_IOVEC ||		\
+	    iov_iter_type(start) == ITER_KVEC)			\
 	for (iter = (start);					\
 	     (iter).count &&					\
 	     ((iov = iov_iter_iovec(&(iter))), 1);		\
@@ -202,19 +233,6 @@ static inline size_t iov_iter_count(const struct iov_iter *i)
 	return i->count;
 }
 
-static inline bool iter_is_iovec(const struct iov_iter *i)
-{
-	return !(i->type & (ITER_BVEC | ITER_KVEC | ITER_PIPE));
-}
-
-/*
- * Get one of READ or WRITE out of iter->type without any other flags OR'd in
- * with it.
- *
- * The ?: is just for type safety.
- */
-#define iov_iter_rw(i) ((0 ? (struct iov_iter *)0 : (i))->type & (READ | WRITE))
-
 /*
  * Cap the iov_iter by given limit; note that the second argument is
  * *not* the new size - it's upper limit for such.  Passing it a value
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 8be175df3075..42d39116a556 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -558,7 +558,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_pipe_to_iter(addr, bytes, i);
 	if (iter_is_iovec(i))
 		might_fault();
@@ -658,7 +658,7 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 	const char *from = addr;
 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
 	if (iter_is_iovec(i))
 		might_fault();
@@ -692,7 +692,7 @@ EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -712,7 +712,7 @@ EXPORT_SYMBOL(_copy_from_iter);
 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return false;
 	}
@@ -739,7 +739,7 @@ EXPORT_SYMBOL(_copy_from_iter_full);
 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -773,7 +773,7 @@ EXPORT_SYMBOL(_copy_from_iter_nocache);
 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -794,7 +794,7 @@ EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
 {
 	char *to = addr;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return false;
 	}
@@ -836,7 +836,7 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
 		return wanted;
-	} else if (likely(!(i->type & ITER_PIPE)))
+	} else if (likely(!iov_iter_is_pipe(i)))
 		return copy_page_to_iter_iovec(page, offset, bytes, i);
 	else
 		return copy_page_to_iter_pipe(page, offset, bytes, i);
@@ -848,7 +848,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 {
 	if (unlikely(!page_copy_sane(page, offset, bytes)))
 		return 0;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -888,7 +888,7 @@ static size_t pipe_zero(size_t bytes, struct iov_iter *i)
 
 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 {
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_zero(bytes, i);
 	iterate_and_advance(i, bytes, v,
 		clear_user(v.iov_base, v.iov_len),
@@ -908,7 +908,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 		kunmap_atomic(kaddr);
 		return 0;
 	}
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		kunmap_atomic(kaddr);
 		WARN_ON(1);
 		return 0;
@@ -972,7 +972,7 @@ static void pipe_advance(struct iov_iter *i, size_t size)
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		pipe_advance(i, size);
 		return;
 	}
@@ -987,7 +987,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 	if (WARN_ON(unroll > MAX_RW_COUNT))
 		return;
 	i->count += unroll;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		struct pipe_inode_info *pipe = i->pipe;
 		int idx = i->idx;
 		size_t off = i->iov_offset;
@@ -1016,7 +1016,7 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 		return;
 	}
 	unroll -= i->iov_offset;
-	if (i->type & ITER_BVEC) {
+	if (iov_iter_is_bvec(i)) {
 		const struct bio_vec *bvec = i->bvec;
 		while (1) {
 			size_t n = (--bvec)->bv_len;
@@ -1049,11 +1049,11 @@ EXPORT_SYMBOL(iov_iter_revert);
  */
 size_t iov_iter_single_seg_count(const struct iov_iter *i)
 {
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return i->count;	// it is a silly place, anyway
 	if (i->nr_segs == 1)
 		return i->count;
-	else if (i->type & ITER_BVEC)
+	else if (iov_iter_is_bvec(i))
 		return min(i->count, i->bvec->bv_len - i->iov_offset);
 	else
 		return min(i->count, i->iov->iov_len - i->iov_offset);
@@ -1106,7 +1106,7 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 	unsigned long res = 0;
 	size_t size = i->count;
 
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
 			return size | i->iov_offset;
 		return size;
@@ -1125,7 +1125,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 	unsigned long res = 0;
 	size_t size = i->count;
 
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return ~0U;
 	}
@@ -1193,7 +1193,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 	if (maxsize > i->count)
 		maxsize = i->count;
 
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
@@ -1205,7 +1205,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 			len = maxpages * PAGE_SIZE;
 		addr &= ~(PAGE_SIZE - 1);
 		n = DIV_ROUND_UP(len, PAGE_SIZE);
-		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
+		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages);
 		if (unlikely(res < 0))
 			return res;
 		return (res == n ? len : res * PAGE_SIZE) - *start;
@@ -1270,7 +1270,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 	if (maxsize > i->count)
 		maxsize = i->count;
 
-	if (unlikely(i->type & ITER_PIPE))
+	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_get_pages_alloc(i, pages, maxsize, start);
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
@@ -1283,7 +1283,7 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 		p = get_pages_array(n);
 		if (!p)
 			return -ENOMEM;
-		res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
+		res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p);
 		if (unlikely(res < 0)) {
 			kvfree(p);
 			return res;
@@ -1313,7 +1313,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -1355,7 +1355,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);
 		return false;
 	}
@@ -1400,7 +1400,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		WARN_ON(1);	/* for now */
 		return 0;
 	}
@@ -1443,7 +1443,7 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 	if (!size)
 		return 0;
 
-	if (unlikely(i->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(i))) {
 		struct pipe_inode_info *pipe = i->pipe;
 		size_t off;
 		int idx;
@@ -1481,11 +1481,11 @@ EXPORT_SYMBOL(iov_iter_npages);
 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 {
 	*new = *old;
-	if (unlikely(new->type & ITER_PIPE)) {
+	if (unlikely(iov_iter_is_pipe(new))) {
 		WARN_ON(1);
 		return NULL;
 	}
-	if (new->type & ITER_BVEC)
+	if (iov_iter_is_bvec(new))
 		return new->bvec = kmemdup(new->bvec,
 				    new->nr_segs * sizeof(struct bio_vec),
 				    flags);
diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4..bdeee0168ea7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2122,7 +2122,7 @@ find_page:
 					!mapping->a_ops->is_partially_uptodate)
 				goto page_not_up_to_date;
 			/* pipes can't handle partially uptodate pages */
-			if (unlikely(iter->type & ITER_PIPE))
+			if (unlikely(iov_iter_is_pipe(iter)))
 				goto page_not_up_to_date;
 			if (!trylock_page(page))
 				goto page_not_up_to_date;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 7728b0acde09..4d7d2070e9c8 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -322,7 +322,7 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
 	if (!iov_iter_count(data))
 		return 0;
 
-	if (!(data->type & ITER_KVEC)) {
+	if (iov_iter_is_kvec(data)) {
 		int n;
 		/*
 		 * We allow only p9_max_pages pinned. We wait for the
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index a525fc4c2a4b..ad64b9c8b600 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -799,7 +799,7 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	struct crypto_tfm *tfm = crypto_aead_tfm(ctx->aead_send);
 	bool async_capable = tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC;
 	unsigned char record_type = TLS_RECORD_TYPE_DATA;
-	bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
 	bool eor = !(msg->msg_flags & MSG_MORE);
 	size_t try_to_copy, copied = 0;
 	struct sk_msg *msg_pl, *msg_en;
@@ -1457,7 +1457,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	bool cmsg = false;
 	int target, err = 0;
 	long timeo;
-	bool is_kvec = msg->msg_iter.type & ITER_KVEC;
+	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
 	int num_async = 0;
 
 	flags |= nonblock;
-- 
cgit v1.2.3


From aa563d7bca6e882ec2bdae24603c8f016401a144 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:56 +0100
Subject: iov_iter: Separate type from direction and use accessor functions

In the iov_iter struct, separate the iterator type from the iterator
direction and use accessor functions to access them in most places.

Convert a bunch of places to use switch-statements to access them rather
then chains of bitwise-AND statements.  This makes it easier to add further
iterator types.  Also, this can be more efficient as to implement a switch
of small contiguous integers, the compiler can use ~50% fewer compare
instructions than it has to use bitwise-and instructions.

Further, cease passing the iterator type into the iterator setup function.
The iterator function can set that itself.  Only the direction is required.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 drivers/block/drbd/drbd_main.c           |  2 +-
 drivers/block/drbd/drbd_receiver.c       |  2 +-
 drivers/block/loop.c                     |  9 ++++-----
 drivers/block/nbd.c                      | 12 +++++-------
 drivers/fsi/fsi-sbefifo.c                |  4 ++--
 drivers/isdn/mISDN/l1oip_core.c          |  3 +--
 drivers/misc/vmw_vmci/vmci_queue_pair.c  |  6 +++---
 drivers/nvme/target/io-cmd-file.c        |  2 +-
 drivers/target/iscsi/iscsi_target_util.c |  6 ++----
 drivers/target/target_core_file.c        |  6 +++---
 drivers/usb/usbip/usbip_common.c         |  2 +-
 drivers/xen/pvcalls-back.c               |  8 ++++----
 fs/9p/vfs_addr.c                         |  4 ++--
 fs/9p/vfs_dir.c                          |  2 +-
 fs/9p/xattr.c                            |  4 ++--
 fs/afs/rxrpc.c                           | 15 +++++++--------
 fs/ceph/file.c                           |  5 ++---
 fs/cifs/connect.c                        |  4 ++--
 fs/cifs/misc.c                           |  2 +-
 fs/cifs/smb2ops.c                        |  4 ++--
 fs/cifs/transport.c                      |  8 +++-----
 fs/dlm/lowcomms.c                        |  2 +-
 fs/nfsd/vfs.c                            |  4 ++--
 fs/ocfs2/cluster/tcp.c                   |  2 +-
 fs/orangefs/inode.c                      |  2 +-
 fs/splice.c                              |  7 +++----
 include/linux/uio.h                      | 10 +++++-----
 lib/iov_iter.c                           | 28 +++++++++++++++-------------
 mm/page_io.c                             |  2 +-
 net/9p/client.c                          |  2 +-
 net/bluetooth/6lowpan.c                  |  2 +-
 net/bluetooth/a2mp.c                     |  2 +-
 net/bluetooth/smp.c                      |  2 +-
 net/ceph/messenger.c                     |  6 +++---
 net/netfilter/ipvs/ip_vs_sync.c          |  2 +-
 net/smc/smc_clc.c                        |  4 ++--
 net/socket.c                             |  6 +++---
 net/sunrpc/svcsock.c                     |  2 +-
 net/tipc/topsrv.c                        |  2 +-
 net/tls/tls_device.c                     |  4 ++--
 40 files changed, 96 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index ef8212a4b73e..ded9735d44af 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1856,7 +1856,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock,
 
 	/* THINK  if (signal_pending) return ... ? */
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg.msg_iter, WRITE, &iov, 1, size);
 
 	if (sock == connection->data.socket) {
 		rcu_read_lock();
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 75f6b47169e6..fcc70642b004 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -516,7 +516,7 @@ static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flag
 	struct msghdr msg = {
 		.msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
 	};
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
 	return sock_recvmsg(sock, &msg, msg.msg_flags);
 }
 
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index ea9debf59b22..cb0cc8685076 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -268,7 +268,7 @@ static int lo_write_bvec(struct file *file, struct bio_vec *bvec, loff_t *ppos)
 	struct iov_iter i;
 	ssize_t bw;
 
-	iov_iter_bvec(&i, ITER_BVEC | WRITE, bvec, 1, bvec->bv_len);
+	iov_iter_bvec(&i, WRITE, bvec, 1, bvec->bv_len);
 
 	file_start_write(file);
 	bw = vfs_iter_write(file, &i, ppos, 0);
@@ -346,7 +346,7 @@ static int lo_read_simple(struct loop_device *lo, struct request *rq,
 	ssize_t len;
 
 	rq_for_each_segment(bvec, rq, iter) {
-		iov_iter_bvec(&i, ITER_BVEC, &bvec, 1, bvec.bv_len);
+		iov_iter_bvec(&i, READ, &bvec, 1, bvec.bv_len);
 		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
 		if (len < 0)
 			return len;
@@ -387,7 +387,7 @@ static int lo_read_transfer(struct loop_device *lo, struct request *rq,
 		b.bv_offset = 0;
 		b.bv_len = bvec.bv_len;
 
-		iov_iter_bvec(&i, ITER_BVEC, &b, 1, b.bv_len);
+		iov_iter_bvec(&i, READ, &b, 1, b.bv_len);
 		len = vfs_iter_read(lo->lo_backing_file, &i, &pos, 0);
 		if (len < 0) {
 			ret = len;
@@ -554,8 +554,7 @@ static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
 	}
 	atomic_set(&cmd->ref, 2);
 
-	iov_iter_bvec(&iter, ITER_BVEC | rw, bvec,
-		      segments, blk_rq_bytes(rq));
+	iov_iter_bvec(&iter, rw, bvec, segments, blk_rq_bytes(rq));
 	iter.iov_offset = offset;
 
 	cmd->iocb.ki_pos = pos;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 14a51254c3db..4d4d6129ff66 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -473,7 +473,7 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
 	u32 nbd_cmd_flags = 0;
 	int sent = nsock->sent, skip = 0;
 
-	iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+	iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
 
 	switch (req_op(req)) {
 	case REQ_OP_DISCARD:
@@ -564,8 +564,7 @@ send_pages:
 
 			dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
 				req, bvec.bv_len);
-			iov_iter_bvec(&from, ITER_BVEC | WRITE,
-				      &bvec, 1, bvec.bv_len);
+			iov_iter_bvec(&from, WRITE, &bvec, 1, bvec.bv_len);
 			if (skip) {
 				if (skip >= iov_iter_count(&from)) {
 					skip -= iov_iter_count(&from);
@@ -624,7 +623,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 	int ret = 0;
 
 	reply.magic = 0;
-	iov_iter_kvec(&to, READ | ITER_KVEC, &iov, 1, sizeof(reply));
+	iov_iter_kvec(&to, READ, &iov, 1, sizeof(reply));
 	result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
 	if (result <= 0) {
 		if (!nbd_disconnected(config))
@@ -678,8 +677,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
 		struct bio_vec bvec;
 
 		rq_for_each_segment(bvec, req, iter) {
-			iov_iter_bvec(&to, ITER_BVEC | READ,
-				      &bvec, 1, bvec.bv_len);
+			iov_iter_bvec(&to, READ, &bvec, 1, bvec.bv_len);
 			result = sock_xmit(nbd, index, 0, &to, MSG_WAITALL, NULL);
 			if (result <= 0) {
 				dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
@@ -1073,7 +1071,7 @@ static void send_disconnects(struct nbd_device *nbd)
 	for (i = 0; i < config->num_connections; i++) {
 		struct nbd_sock *nsock = config->socks[i];
 
-		iov_iter_kvec(&from, WRITE | ITER_KVEC, &iov, 1, sizeof(request));
+		iov_iter_kvec(&from, WRITE, &iov, 1, sizeof(request));
 		mutex_lock(&nsock->tx_lock);
 		ret = sock_xmit(nbd, i, 1, &from, 0, NULL);
 		if (ret <= 0)
diff --git a/drivers/fsi/fsi-sbefifo.c b/drivers/fsi/fsi-sbefifo.c
index ae861342626e..d92f5b87c251 100644
--- a/drivers/fsi/fsi-sbefifo.c
+++ b/drivers/fsi/fsi-sbefifo.c
@@ -638,7 +638,7 @@ static void sbefifo_collect_async_ffdc(struct sbefifo *sbefifo)
 	}
         ffdc_iov.iov_base = ffdc;
 	ffdc_iov.iov_len = SBEFIFO_MAX_FFDC_SIZE;
-        iov_iter_kvec(&ffdc_iter, WRITE | ITER_KVEC, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
+        iov_iter_kvec(&ffdc_iter, WRITE, &ffdc_iov, 1, SBEFIFO_MAX_FFDC_SIZE);
 	cmd[0] = cpu_to_be32(2);
 	cmd[1] = cpu_to_be32(SBEFIFO_CMD_GET_SBE_FFDC);
 	rc = sbefifo_do_command(sbefifo, cmd, 2, &ffdc_iter);
@@ -735,7 +735,7 @@ int sbefifo_submit(struct device *dev, const __be32 *command, size_t cmd_len,
 	rbytes = (*resp_len) * sizeof(__be32);
 	resp_iov.iov_base = response;
 	resp_iov.iov_len = rbytes;
-        iov_iter_kvec(&resp_iter, WRITE | ITER_KVEC, &resp_iov, 1, rbytes);
+        iov_iter_kvec(&resp_iter, WRITE, &resp_iov, 1, rbytes);
 
 	/* Perform the command */
 	mutex_lock(&sbefifo->lock);
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index b05022f94f18..072bb5e36c18 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -718,8 +718,7 @@ l1oip_socket_thread(void *data)
 		printk(KERN_DEBUG "%s: socket created and open\n",
 		       __func__);
 	while (!signal_pending(current)) {
-		iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1,
-				recvbuf_size);
+		iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, recvbuf_size);
 		recvlen = sock_recvmsg(socket, &msg, 0);
 		if (recvlen > 0) {
 			l1oip_socket_parse(hc, &sin_rx, recvbuf, recvlen);
diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index bd52f29b4a4e..264f4ed8eef2 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -3030,7 +3030,7 @@ ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
 	if (!qpair || !buf)
 		return VMCI_ERROR_INVALID_ARGS;
 
-	iov_iter_kvec(&from, WRITE | ITER_KVEC, &v, 1, buf_size);
+	iov_iter_kvec(&from, WRITE, &v, 1, buf_size);
 
 	qp_lock(qpair);
 
@@ -3074,7 +3074,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
 	if (!qpair || !buf)
 		return VMCI_ERROR_INVALID_ARGS;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+	iov_iter_kvec(&to, READ, &v, 1, buf_size);
 
 	qp_lock(qpair);
 
@@ -3119,7 +3119,7 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
 	if (!qpair || !buf)
 		return VMCI_ERROR_INVALID_ARGS;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &v, 1, buf_size);
+	iov_iter_kvec(&to, READ, &v, 1, buf_size);
 
 	qp_lock(qpair);
 
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index 81a9dc5290a8..97f282eca3c2 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -101,7 +101,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
 		rw = READ;
 	}
 
-	iov_iter_bvec(&iter, ITER_BVEC | rw, req->f.bvec, nr_segs, count);
+	iov_iter_bvec(&iter, rw, req->f.bvec, nr_segs, count);
 
 	iocb->ki_pos = pos;
 	iocb->ki_filp = req->ns->file;
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 49be1e41290c..991482576417 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1258,8 +1258,7 @@ static int iscsit_do_rx_data(
 		return -1;
 
 	memset(&msg, 0, sizeof(struct msghdr));
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC,
-		      count->iov, count->iov_count, data);
+	iov_iter_kvec(&msg.msg_iter, READ, count->iov, count->iov_count, data);
 
 	while (msg_data_left(&msg)) {
 		rx_loop = sock_recvmsg(conn->sock, &msg, MSG_WAITALL);
@@ -1315,8 +1314,7 @@ int tx_data(
 
 	memset(&msg, 0, sizeof(struct msghdr));
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC,
-		      iov, iov_count, data);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iov, iov_count, data);
 
 	while (msg_data_left(&msg)) {
 		int tx_loop = sock_sendmsg(conn->sock, &msg);
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 16751ae55d7b..49b110d1b972 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -303,7 +303,7 @@ fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 		len += sg->length;
 	}
 
-	iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len);
+	iov_iter_bvec(&iter, is_write, bvec, sgl_nents, len);
 
 	aio_cmd->cmd = cmd;
 	aio_cmd->len = len;
@@ -353,7 +353,7 @@ static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
 		len += sg->length;
 	}
 
-	iov_iter_bvec(&iter, ITER_BVEC, bvec, sgl_nents, len);
+	iov_iter_bvec(&iter, READ, bvec, sgl_nents, len);
 	if (is_write)
 		ret = vfs_iter_write(fd, &iter, &pos, 0);
 	else
@@ -490,7 +490,7 @@ fd_execute_write_same(struct se_cmd *cmd)
 		len += se_dev->dev_attrib.block_size;
 	}
 
-	iov_iter_bvec(&iter, ITER_BVEC, bvec, nolb, len);
+	iov_iter_bvec(&iter, READ, bvec, nolb, len);
 	ret = vfs_iter_write(fd_dev->fd_file, &iter, &pos, 0);
 
 	kfree(bvec);
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index 9756752c0681..45da3e01c7b0 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -309,7 +309,7 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 	if (!sock || !buf || !size)
 		return -EINVAL;
 
-	iov_iter_kvec(&msg.msg_iter, READ|ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, size);
 
 	usbip_dbg_xmit("enter\n");
 
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index b1092fbefa63..2e5d845b5091 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -137,13 +137,13 @@ static void pvcalls_conn_back_read(void *opaque)
 	if (masked_prod < masked_cons) {
 		vec[0].iov_base = data->in + masked_prod;
 		vec[0].iov_len = wanted;
-		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted);
+		iov_iter_kvec(&msg.msg_iter, WRITE, vec, 1, wanted);
 	} else {
 		vec[0].iov_base = data->in + masked_prod;
 		vec[0].iov_len = array_size - masked_prod;
 		vec[1].iov_base = data->in;
 		vec[1].iov_len = wanted - vec[0].iov_len;
-		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted);
+		iov_iter_kvec(&msg.msg_iter, WRITE, vec, 2, wanted);
 	}
 
 	atomic_set(&map->read, 0);
@@ -195,13 +195,13 @@ static void pvcalls_conn_back_write(struct sock_mapping *map)
 	if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) {
 		vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
 		vec[0].iov_len = size;
-		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size);
+		iov_iter_kvec(&msg.msg_iter, READ, vec, 1, size);
 	} else {
 		vec[0].iov_base = data->out + pvcalls_mask(cons, array_size);
 		vec[0].iov_len = array_size - pvcalls_mask(cons, array_size);
 		vec[1].iov_base = data->out;
 		vec[1].iov_len = size - vec[0].iov_len;
-		iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size);
+		iov_iter_kvec(&msg.msg_iter, READ, vec, 2, size);
 	}
 
 	atomic_set(&map->write, 0);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index e1cbdfdb7c68..0bcbcc20f769 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -65,7 +65,7 @@ static int v9fs_fid_readpage(struct p9_fid *fid, struct page *page)
 	if (retval == 0)
 		return retval;
 
-	iov_iter_bvec(&to, ITER_BVEC | READ, &bvec, 1, PAGE_SIZE);
+	iov_iter_bvec(&to, READ, &bvec, 1, PAGE_SIZE);
 
 	retval = p9_client_read(fid, page_offset(page), &to, &err);
 	if (err) {
@@ -175,7 +175,7 @@ static int v9fs_vfs_writepage_locked(struct page *page)
 	bvec.bv_page = page;
 	bvec.bv_offset = 0;
 	bvec.bv_len = len;
-	iov_iter_bvec(&from, ITER_BVEC | WRITE, &bvec, 1, len);
+	iov_iter_bvec(&from, WRITE, &bvec, 1, len);
 
 	/* We should have writeback_fid always set */
 	BUG_ON(!v9inode->writeback_fid);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index b0405d6aac85..d5db3c968a03 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -133,7 +133,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
 		if (rdir->tail == rdir->head) {
 			struct iov_iter to;
 			int n;
-			iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buflen);
+			iov_iter_kvec(&to, READ, &kvec, 1, buflen);
 			n = p9_client_read(file->private_data, ctx->pos, &to,
 					   &err);
 			if (err)
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index 352abc39e891..ac8ff8ca4c11 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -32,7 +32,7 @@ ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
 	struct iov_iter to;
 	int err;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &kvec, 1, buffer_size);
+	iov_iter_kvec(&to, READ, &kvec, 1, buffer_size);
 
 	attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
 	if (IS_ERR(attr_fid)) {
@@ -107,7 +107,7 @@ int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
 	struct iov_iter from;
 	int retval, err;
 
-	iov_iter_kvec(&from, WRITE | ITER_KVEC, &kvec, 1, value_len);
+	iov_iter_kvec(&from, WRITE, &kvec, 1, value_len);
 
 	p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu flags = %d\n",
 		 name, value_len, flags);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 77a83790a31f..639c16882e93 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -286,7 +286,7 @@ static void afs_load_bvec(struct afs_call *call, struct msghdr *msg,
 		offset = 0;
 	}
 
-	iov_iter_bvec(&msg->msg_iter, WRITE | ITER_BVEC, bv, nr, bytes);
+	iov_iter_bvec(&msg->msg_iter, WRITE, bv, nr, bytes);
 }
 
 /*
@@ -401,8 +401,7 @@ long afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call,
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1,
-		      call->request_size);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, call->request_size);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
@@ -432,7 +431,7 @@ error_do_abort:
 		rxrpc_kernel_abort_call(call->net->socket, rxcall,
 					RX_USER_ABORT, ret, "KSD");
 	} else {
-		iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, NULL, 0, 0);
+		iov_iter_kvec(&msg.msg_iter, READ, NULL, 0, 0);
 		rxrpc_kernel_recv_data(call->net->socket, rxcall,
 				       &msg.msg_iter, false,
 				       &call->abort_code, &call->service_id);
@@ -468,7 +467,7 @@ static void afs_deliver_to_call(struct afs_call *call)
 		if (state == AFS_CALL_SV_AWAIT_ACK) {
 			struct iov_iter iter;
 
-			iov_iter_kvec(&iter, READ | ITER_KVEC, NULL, 0, 0);
+			iov_iter_kvec(&iter, READ, NULL, 0, 0);
 			ret = rxrpc_kernel_recv_data(call->net->socket,
 						     call->rxcall, &iter, false,
 						     &remote_abort,
@@ -825,7 +824,7 @@ void afs_send_empty_reply(struct afs_call *call)
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+	iov_iter_kvec(&msg.msg_iter, WRITE, NULL, 0, 0);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
@@ -864,7 +863,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	iov[0].iov_len		= len;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iov, 1, len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
@@ -905,7 +904,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
 
 	iov.iov_base = buf + call->offset;
 	iov.iov_len = count - call->offset;
-	iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, count - call->offset);
+	iov_iter_kvec(&iter, READ, &iov, 1, count - call->offset);
 
 	ret = rxrpc_kernel_recv_data(net->socket, call->rxcall, &iter,
 				     want_more, &remote_abort,
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 524ecc95b04d..5dd433aa9b23 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -821,7 +821,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
 				aio_req->total_len = rc + zlen;
 			}
 
-			iov_iter_bvec(&i, ITER_BVEC, osd_data->bvec_pos.bvecs,
+			iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs,
 				      osd_data->num_bvecs,
 				      osd_data->bvec_pos.iter.bi_size);
 			iov_iter_advance(&i, rc);
@@ -1044,8 +1044,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 				int zlen = min_t(size_t, len - ret,
 						 size - pos - ret);
 
-				iov_iter_bvec(&i, ITER_BVEC, bvecs, num_pages,
-					      len);
+				iov_iter_bvec(&i, READ, bvecs, num_pages, len);
 				iov_iter_advance(&i, ret);
 				iov_iter_zero(zlen, &i);
 				ret += zlen;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 52d71b64c0c6..11bcd2fb90b1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -588,7 +588,7 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
 {
 	struct msghdr smb_msg;
 	struct kvec iov = {.iov_base = buf, .iov_len = to_read};
-	iov_iter_kvec(&smb_msg.msg_iter, READ | ITER_KVEC, &iov, 1, to_read);
+	iov_iter_kvec(&smb_msg.msg_iter, READ, &iov, 1, to_read);
 
 	return cifs_readv_from_socket(server, &smb_msg);
 }
@@ -600,7 +600,7 @@ cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
 	struct msghdr smb_msg;
 	struct bio_vec bv = {
 		.bv_page = page, .bv_len = to_read, .bv_offset = page_offset};
-	iov_iter_bvec(&smb_msg.msg_iter, READ | ITER_BVEC, &bv, 1, to_read);
+	iov_iter_bvec(&smb_msg.msg_iter, READ, &bv, 1, to_read);
 	return cifs_readv_from_socket(server, &smb_msg);
 }
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 7b5b960a04b8..10cff44832d8 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -857,7 +857,7 @@ setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw)
 	ctx->bv = bv;
 	ctx->len = saved_len - count;
 	ctx->npages = npages;
-	iov_iter_bvec(&ctx->iter, ITER_BVEC | rw, ctx->bv, npages, ctx->len);
+	iov_iter_bvec(&ctx->iter, rw, ctx->bv, npages, ctx->len);
 	return 0;
 }
 
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 89985a0a6819..26f8a65b8722 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -2962,13 +2962,13 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid,
 			return 0;
 		}
 
-		iov_iter_bvec(&iter, WRITE | ITER_BVEC, bvec, npages, data_len);
+		iov_iter_bvec(&iter, WRITE, bvec, npages, data_len);
 	} else if (buf_len >= data_offset + data_len) {
 		/* read response payload is in buf */
 		WARN_ONCE(npages > 0, "read data can be either in buf or in pages");
 		iov.iov_base = buf + data_offset;
 		iov.iov_len = data_len;
-		iov_iter_kvec(&iter, WRITE | ITER_KVEC, &iov, 1, data_len);
+		iov_iter_kvec(&iter, WRITE, &iov, 1, data_len);
 	} else {
 		/* read response payload cannot be in both buf and pages */
 		WARN_ONCE(1, "buf can not contain only a part of read data");
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b48f43963da6..146b618802a5 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -307,8 +307,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 			.iov_base = &rfc1002_marker,
 			.iov_len  = 4
 		};
-		iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC, &hiov,
-			      1, 4);
+		iov_iter_kvec(&smb_msg.msg_iter, WRITE, &hiov, 1, 4);
 		rc = smb_send_kvec(server, &smb_msg, &sent);
 		if (rc < 0)
 			goto uncork;
@@ -329,8 +328,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 			size += iov[i].iov_len;
 		}
 
-		iov_iter_kvec(&smb_msg.msg_iter, WRITE | ITER_KVEC,
-			      iov, n_vec, size);
+		iov_iter_kvec(&smb_msg.msg_iter, WRITE, iov, n_vec, size);
 
 		rc = smb_send_kvec(server, &smb_msg, &sent);
 		if (rc < 0)
@@ -346,7 +344,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 			rqst_page_get_length(&rqst[j], i, &bvec.bv_len,
 					     &bvec.bv_offset);
 
-			iov_iter_bvec(&smb_msg.msg_iter, WRITE | ITER_BVEC,
+			iov_iter_bvec(&smb_msg.msg_iter, WRITE,
 				      &bvec, 1, bvec.bv_len);
 			rc = smb_send_kvec(server, &smb_msg, &sent);
 			if (rc < 0)
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index a5e4a221435c..76976d6e50f9 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -674,7 +674,7 @@ static int receive_from_sock(struct connection *con)
 		nvec = 2;
 	}
 	len = iov[0].iov_len + iov[1].iov_len;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nvec, len);
+	iov_iter_kvec(&msg.msg_iter, READ, iov, nvec, len);
 
 	r = ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT | MSG_NOSIGNAL);
 	if (ret <= 0)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b53e76391e52..8b90f5480904 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -923,7 +923,7 @@ __be32 nfsd_readv(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	int host_err;
 
 	trace_nfsd_read_vector(rqstp, fhp, offset, *count);
-	iov_iter_kvec(&iter, READ | ITER_KVEC, vec, vlen, *count);
+	iov_iter_kvec(&iter, READ, vec, vlen, *count);
 	host_err = vfs_iter_read(file, &iter, &offset, 0);
 	return nfsd_finish_read(rqstp, fhp, file, offset, count, host_err);
 }
@@ -999,7 +999,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	if (stable && !use_wgather)
 		flags |= RWF_SYNC;
 
-	iov_iter_kvec(&iter, WRITE | ITER_KVEC, vec, vlen, *cnt);
+	iov_iter_kvec(&iter, WRITE, vec, vlen, *cnt);
 	host_err = vfs_iter_write(file, &iter, &pos, flags);
 	if (host_err < 0)
 		goto out_nfserr;
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 7d9eea7d4a87..e9f236af1927 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -916,7 +916,7 @@ static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len)
 {
 	struct kvec vec = { .iov_len = len, .iov_base = data, };
 	struct msghdr msg = { .msg_flags = MSG_DONTWAIT, };
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, len);
+	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, len);
 	return sock_recvmsg(sock, &msg, MSG_DONTWAIT);
 }
 
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 31932879b716..136a8bdc1d91 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -25,7 +25,7 @@ static int read_one_page(struct page *page)
 	struct iov_iter to;
 	struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};
 
-	iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
+	iov_iter_bvec(&to, READ, &bv, 1, PAGE_SIZE);
 
 	gossip_debug(GOSSIP_INODE_DEBUG,
 		    "orangefs_readpage called with page %p\n",
diff --git a/fs/splice.c b/fs/splice.c
index b3daa971f597..3553f1956508 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -301,7 +301,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
 	struct kiocb kiocb;
 	int idx, ret;
 
-	iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len);
+	iov_iter_pipe(&to, READ, pipe, len);
 	idx = to.idx;
 	init_sync_kiocb(&kiocb, in);
 	kiocb.ki_pos = *ppos;
@@ -386,7 +386,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	 */
 	offset = *ppos & ~PAGE_MASK;
 
-	iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len + offset);
+	iov_iter_pipe(&to, READ, pipe, len + offset);
 
 	res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
 	if (res <= 0)
@@ -745,8 +745,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 			left -= this_len;
 		}
 
-		iov_iter_bvec(&from, ITER_BVEC | WRITE, array, n,
-			      sd.total_len - left);
+		iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left);
 		ret = vfs_iter_write(out, &from, &sd.pos, 0);
 		if (ret <= 0)
 			break;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index fcabc959c794..3d8f1acc142c 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -29,7 +29,7 @@ enum iter_type {
 };
 
 struct iov_iter {
-	int type;
+	unsigned int type;
 	size_t iov_offset;
 	size_t count;
 	union {
@@ -212,13 +212,13 @@ size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 unsigned long iov_iter_gap_alignment(const struct iov_iter *i);
-void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
+void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
-void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *kvec,
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction, const struct kvec *kvec,
 			unsigned long nr_segs, size_t count);
-void iov_iter_bvec(struct iov_iter *i, int direction, const struct bio_vec *bvec,
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_vec *bvec,
 			unsigned long nr_segs, size_t count);
-void iov_iter_pipe(struct iov_iter *i, int direction, struct pipe_inode_info *pipe,
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
 			size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 42d39116a556..c42b928b15ef 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -428,17 +428,19 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 }
 EXPORT_SYMBOL(iov_iter_fault_in_readable);
 
-void iov_iter_init(struct iov_iter *i, int direction,
+void iov_iter_init(struct iov_iter *i, unsigned int direction,
 			const struct iovec *iov, unsigned long nr_segs,
 			size_t count)
 {
+	WARN_ON(direction & ~(READ | WRITE));
+	direction &= READ | WRITE;
+
 	/* It will get better.  Eventually... */
 	if (uaccess_kernel()) {
-		direction |= ITER_KVEC;
-		i->type = direction;
+		i->type = ITER_KVEC | direction;
 		i->kvec = (struct kvec *)iov;
 	} else {
-		i->type = direction;
+		i->type = ITER_IOVEC | direction;
 		i->iov = iov;
 	}
 	i->nr_segs = nr_segs;
@@ -1060,12 +1062,12 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
-void iov_iter_kvec(struct iov_iter *i, int direction,
+void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
 			const struct kvec *kvec, unsigned long nr_segs,
 			size_t count)
 {
-	BUG_ON(!(direction & ITER_KVEC));
-	i->type = direction;
+	WARN_ON(direction & ~(READ | WRITE));
+	i->type = ITER_KVEC | (direction & (READ | WRITE));
 	i->kvec = kvec;
 	i->nr_segs = nr_segs;
 	i->iov_offset = 0;
@@ -1073,12 +1075,12 @@ void iov_iter_kvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_kvec);
 
-void iov_iter_bvec(struct iov_iter *i, int direction,
+void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
 			const struct bio_vec *bvec, unsigned long nr_segs,
 			size_t count)
 {
-	BUG_ON(!(direction & ITER_BVEC));
-	i->type = direction;
+	WARN_ON(direction & ~(READ | WRITE));
+	i->type = ITER_BVEC | (direction & (READ | WRITE));
 	i->bvec = bvec;
 	i->nr_segs = nr_segs;
 	i->iov_offset = 0;
@@ -1086,13 +1088,13 @@ void iov_iter_bvec(struct iov_iter *i, int direction,
 }
 EXPORT_SYMBOL(iov_iter_bvec);
 
-void iov_iter_pipe(struct iov_iter *i, int direction,
+void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
 			struct pipe_inode_info *pipe,
 			size_t count)
 {
-	BUG_ON(direction != ITER_PIPE);
+	BUG_ON(direction != READ);
 	WARN_ON(pipe->nrbufs == pipe->buffers);
-	i->type = direction;
+	i->type = ITER_PIPE | READ;
 	i->pipe = pipe;
 	i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
 	i->iov_offset = 0;
diff --git a/mm/page_io.c b/mm/page_io.c
index aafd19ec1db4..86de453a60cf 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -294,7 +294,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		};
 		struct iov_iter from;
 
-		iov_iter_bvec(&from, ITER_BVEC | WRITE, &bv, 1, PAGE_SIZE);
+		iov_iter_bvec(&from, WRITE, &bv, 1, PAGE_SIZE);
 		init_sync_kiocb(&kiocb, swap_file);
 		kiocb.ki_pos = page_file_offset(page);
 
diff --git a/net/9p/client.c b/net/9p/client.c
index deae53a7dffc..a9cd1401bd09 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -2070,7 +2070,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
 	struct kvec kv = {.iov_base = data, .iov_len = count};
 	struct iov_iter to;
 
-	iov_iter_kvec(&to, READ | ITER_KVEC, &kv, 1, count);
+	iov_iter_kvec(&to, READ, &kv, 1, count);
 
 	p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
 				fid->fid, (unsigned long long) offset, count);
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 4e2576fc0c59..828e87fe8027 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -467,7 +467,7 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
 	iv.iov_len = skb->len;
 
 	memset(&msg, 0, sizeof(msg));
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, skb->len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, skb->len);
 
 	err = l2cap_chan_send(chan, &msg, skb->len);
 	if (err > 0) {
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 51c2cf2d8923..58fc6333d412 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -63,7 +63,7 @@ static void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *dat
 
 	memset(&msg, 0, sizeof(msg));
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, &iv, 1, total_len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, &iv, 1, total_len);
 
 	l2cap_chan_send(chan, &msg, total_len);
 
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a1c1b7e8a45c..c822e626761b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -622,7 +622,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
 
 	memset(&msg, 0, sizeof(msg));
 
-	iov_iter_kvec(&msg.msg_iter, WRITE | ITER_KVEC, iv, 2, 1 + len);
+	iov_iter_kvec(&msg.msg_iter, WRITE, iv, 2, 1 + len);
 
 	l2cap_chan_send(chan, &msg, 1 + len);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0a187196aeed..e493ff77b378 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -526,7 +526,7 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
 	if (!buf)
 		msg.msg_flags |= MSG_TRUNC;
 
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, len);
 	r = sock_recvmsg(sock, &msg, msg.msg_flags);
 	if (r == -EAGAIN)
 		r = 0;
@@ -545,7 +545,7 @@ static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
 	int r;
 
 	BUG_ON(page_offset + length > PAGE_SIZE);
-	iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+	iov_iter_bvec(&msg.msg_iter, READ, &bvec, 1, length);
 	r = sock_recvmsg(sock, &msg, msg.msg_flags);
 	if (r == -EAGAIN)
 		r = 0;
@@ -607,7 +607,7 @@ static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
 	else
 		msg.msg_flags |= MSG_EOR;  /* superfluous, but what the hell */
 
-	iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+	iov_iter_bvec(&msg.msg_iter, WRITE, &bvec, 1, size);
 	ret = sock_sendmsg(sock, &msg);
 	if (ret == -EAGAIN)
 		ret = 0;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index d4020c5e831d..2526be6b3d90 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1616,7 +1616,7 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
 	EnterFunction(7);
 
 	/* Receive a packet */
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, buflen);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, buflen);
 	len = sock_recvmsg(sock, &msg, MSG_DONTWAIT);
 	if (len < 0)
 		return len;
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 52241d679cc9..89c3a8c7859a 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -286,7 +286,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 	 */
 	krflags = MSG_PEEK | MSG_WAITALL;
 	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
+	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1,
 			sizeof(struct smc_clc_msg_hdr));
 	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 	if (signal_pending(current)) {
@@ -325,7 +325,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 
 	/* receive the complete CLC message */
 	memset(&msg, 0, sizeof(struct msghdr));
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
+	iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, datlen);
 	krflags = MSG_WAITALL;
 	len = sock_recvmsg(smc->clcsock, &msg, krflags);
 	if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
diff --git a/net/socket.c b/net/socket.c
index b68801c7d0ab..fae408abea54 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -635,7 +635,7 @@ EXPORT_SYMBOL(sock_sendmsg);
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
-	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 	return sock_sendmsg(sock, msg);
 }
 EXPORT_SYMBOL(kernel_sendmsg);
@@ -648,7 +648,7 @@ int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
 	if (!sock->ops->sendmsg_locked)
 		return sock_no_sendmsg_locked(sk, msg, size);
 
-	iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, WRITE, vec, num, size);
 
 	return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg));
 }
@@ -823,7 +823,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	mm_segment_t oldfs = get_fs();
 	int result;
 
-	iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
+	iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
 	set_fs(KERNEL_DS);
 	result = sock_recvmsg(sock, msg, flags);
 	set_fs(oldfs);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5445145e639c..0b46ec0bf74e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -338,7 +338,7 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
 	rqstp->rq_xprt_hlen = 0;
 
 	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, nr, buflen);
+	iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen);
 	len = sock_recvmsg(svsk->sk_sock, &msg, msg.msg_flags);
 	/* If we read a full record, then assume there may be more
 	 * data to read (stream based sockets only!)
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index d8956f7daac4..afa02eeec403 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -394,7 +394,7 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con)
 	iov.iov_base = &s;
 	iov.iov_len = sizeof(s);
 	msg.msg_name = NULL;
-	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
+	iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, iov.iov_len);
 	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
 	if (ret == -EWOULDBLOCK)
 		return -EWOULDBLOCK;
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 276edbc04f38..d753e362d2d9 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -489,7 +489,7 @@ int tls_device_sendpage(struct sock *sk, struct page *page,
 
 	iov.iov_base = kaddr + offset;
 	iov.iov_len = size;
-	iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, &iov, 1, size);
+	iov_iter_kvec(&msg_iter, WRITE, &iov, 1, size);
 	rc = tls_push_data(sk, &msg_iter, size,
 			   flags, TLS_RECORD_TYPE_DATA);
 	kunmap(page);
@@ -538,7 +538,7 @@ static int tls_device_push_pending_record(struct sock *sk, int flags)
 {
 	struct iov_iter	msg_iter;
 
-	iov_iter_kvec(&msg_iter, WRITE | ITER_KVEC, NULL, 0, 0);
+	iov_iter_kvec(&msg_iter, WRITE, NULL, 0, 0);
 	return tls_push_data(sk, &msg_iter, 0, flags, TLS_RECORD_TYPE_DATA);
 }
 
-- 
cgit v1.2.3


From 9ea9ce0427aab02a2fd88fc608267cf6952119f1 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 20 Oct 2018 00:57:56 +0100
Subject: iov_iter: Add I/O discard iterator

Add a new iterator, ITER_DISCARD, that can only be used in READ mode and
just discards any data copied to it.

This is useful in a network filesystem for discarding any unwanted data
sent by a server.

Signed-off-by: David Howells <dhowells@redhat.com>
---
 include/linux/uio.h |  7 +++++++
 lib/iov_iter.c      | 55 ++++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 55 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3d8f1acc142c..55ce99ddb912 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -26,6 +26,7 @@ enum iter_type {
 	ITER_KVEC = 2,
 	ITER_BVEC = 4,
 	ITER_PIPE = 8,
+	ITER_DISCARD = 16,
 };
 
 struct iov_iter {
@@ -72,6 +73,11 @@ static inline bool iov_iter_is_pipe(const struct iov_iter *i)
 	return iov_iter_type(i) == ITER_PIPE;
 }
 
+static inline bool iov_iter_is_discard(const struct iov_iter *i)
+{
+	return iov_iter_type(i) == ITER_DISCARD;
+}
+
 static inline unsigned char iov_iter_rw(const struct iov_iter *i)
 {
 	return i->type & (READ | WRITE);
@@ -220,6 +226,7 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction, const struct bio_
 			unsigned long nr_segs, size_t count);
 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode_info *pipe,
 			size_t count);
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index c42b928b15ef..7ebccb5c1637 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -83,6 +83,7 @@
 			const struct kvec *kvec;		\
 			struct kvec v;				\
 			iterate_kvec(i, n, v, kvec, skip, (K))	\
+		} else if (unlikely(i->type & ITER_DISCARD)) {	\
 		} else {					\
 			const struct iovec *iov;		\
 			struct iovec v;				\
@@ -114,6 +115,8 @@
 			}					\
 			i->nr_segs -= kvec - i->kvec;		\
 			i->kvec = kvec;				\
+		} else if (unlikely(i->type & ITER_DISCARD)) {	\
+			skip += n;				\
 		} else {					\
 			const struct iovec *iov;		\
 			struct iovec v;				\
@@ -838,7 +841,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 		size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
 		return wanted;
-	} else if (likely(!iov_iter_is_pipe(i)))
+	} else if (unlikely(iov_iter_is_discard(i)))
+		return bytes;
+	else if (likely(!iov_iter_is_pipe(i)))
 		return copy_page_to_iter_iovec(page, offset, bytes, i);
 	else
 		return copy_page_to_iter_pipe(page, offset, bytes, i);
@@ -850,7 +855,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 {
 	if (unlikely(!page_copy_sane(page, offset, bytes)))
 		return 0;
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -910,7 +915,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 		kunmap_atomic(kaddr);
 		return 0;
 	}
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		kunmap_atomic(kaddr);
 		WARN_ON(1);
 		return 0;
@@ -978,6 +983,10 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 		pipe_advance(i, size);
 		return;
 	}
+	if (unlikely(iov_iter_is_discard(i))) {
+		i->count -= size;
+		return;
+	}
 	iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -1013,6 +1022,8 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll)
 		pipe_truncate(i);
 		return;
 	}
+	if (unlikely(iov_iter_is_discard(i)))
+		return;
 	if (unroll <= i->iov_offset) {
 		i->iov_offset -= unroll;
 		return;
@@ -1055,6 +1066,8 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 		return i->count;	// it is a silly place, anyway
 	if (i->nr_segs == 1)
 		return i->count;
+	if (unlikely(iov_iter_is_discard(i)))
+		return i->count;
 	else if (iov_iter_is_bvec(i))
 		return min(i->count, i->bvec->bv_len - i->iov_offset);
 	else
@@ -1103,6 +1116,24 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_pipe);
 
+/**
+ * iov_iter_discard - Initialise an I/O iterator that discards data
+ * @i: The iterator to initialise.
+ * @direction: The direction of the transfer.
+ * @count: The size of the I/O buffer in bytes.
+ *
+ * Set up an I/O iterator that just discards everything that's written to it.
+ * It's only available as a READ iterator.
+ */
+void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
+{
+	BUG_ON(direction != READ);
+	i->type = ITER_DISCARD | READ;
+	i->count = count;
+	i->iov_offset = 0;
+}
+EXPORT_SYMBOL(iov_iter_discard);
+
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
 	unsigned long res = 0;
@@ -1127,7 +1158,7 @@ unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
 	unsigned long res = 0;
 	size_t size = i->count;
 
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);
 		return ~0U;
 	}
@@ -1197,6 +1228,9 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 
 	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_get_pages(i, pages, maxsize, maxpages, start);
+	if (unlikely(iov_iter_is_discard(i)))
+		return -EFAULT;
+
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -1274,6 +1308,9 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 
 	if (unlikely(iov_iter_is_pipe(i)))
 		return pipe_get_pages_alloc(i, pages, maxsize, start);
+	if (unlikely(iov_iter_is_discard(i)))
+		return -EFAULT;
+
 	iterate_all_kinds(i, maxsize, v, ({
 		unsigned long addr = (unsigned long)v.iov_base;
 		size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
@@ -1315,7 +1352,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);
 		return 0;
 	}
@@ -1357,7 +1394,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);
 		return false;
 	}
@@ -1402,7 +1439,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
 	__wsum sum, next;
 	size_t off = 0;
 	sum = *csum;
-	if (unlikely(iov_iter_is_pipe(i))) {
+	if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
 		WARN_ON(1);	/* for now */
 		return 0;
 	}
@@ -1444,6 +1481,8 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 
 	if (!size)
 		return 0;
+	if (unlikely(iov_iter_is_discard(i)))
+		return 0;
 
 	if (unlikely(iov_iter_is_pipe(i))) {
 		struct pipe_inode_info *pipe = i->pipe;
@@ -1487,6 +1526,8 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 		WARN_ON(1);
 		return NULL;
 	}
+	if (unlikely(iov_iter_is_discard(new)))
+		return NULL;
 	if (iov_iter_is_bvec(new))
 		return new->bvec = kmemdup(new->bvec,
 				    new->nr_segs * sizeof(struct bio_vec),
-- 
cgit v1.2.3


From 721fb6fbfd2132164c2e8777cc837f9b2c1794dc Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 17 Oct 2018 13:07:05 +0200
Subject: fsnotify: Fix busy inodes during unmount

Detaching of mark connector from fsnotify_put_mark() can race with
unmounting of the filesystem like:

  CPU1				CPU2
fsnotify_put_mark()
  spin_lock(&conn->lock);
  ...
  inode = fsnotify_detach_connector_from_object(conn)
  spin_unlock(&conn->lock);
				generic_shutdown_super()
				  fsnotify_unmount_inodes()
				    sees connector detached for inode
				      -> nothing to do
				  evict_inode()
				    barfs on pending inode reference
  iput(inode);

Resulting in "Busy inodes after unmount" message and possible kernel
oops. Make fsnotify_unmount_inodes() properly wait for outstanding inode
references from detached connectors.

Note that the accounting of outstanding inode references in the
superblock can cause some cacheline contention on the counter. OTOH it
happens only during deletion of the last notification mark from an inode
(or during unlinking of watched inode) and that is not too bad. I have
measured time to create & delete inotify watch 100000 times from 64
processes in parallel (each process having its own inotify group and its
own file on a shared superblock) on a 64 CPU machine. Average and
standard deviation of 15 runs look like:

	Avg		Stddev
Vanilla	9.817400	0.276165
Fixed	9.710467	0.228294

So there's no statistically significant difference.

Fixes: 6b3f05d24d35 ("fsnotify: Detach mark from object list when last reference is dropped")
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.c |  3 +++
 fs/notify/mark.c     | 39 +++++++++++++++++++++++++++++++--------
 include/linux/fs.h   |  3 +++
 3 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 875975504409..2172ba516c61 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -96,6 +96,9 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
 
 	if (iput_inode)
 		iput(iput_inode);
+	/* Wait for outstanding inode references from connectors */
+	wait_var_event(&sb->s_fsnotify_inode_refs,
+		       !atomic_long_read(&sb->s_fsnotify_inode_refs));
 }
 
 void fsnotify_sb_delete(struct super_block *sb)
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index b5172ccb2e60..d2dd16cb5989 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -181,17 +181,20 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work)
 	}
 }
 
-static struct inode *fsnotify_detach_connector_from_object(
-					struct fsnotify_mark_connector *conn)
+static void *fsnotify_detach_connector_from_object(
+					struct fsnotify_mark_connector *conn,
+					unsigned int *type)
 {
 	struct inode *inode = NULL;
 
+	*type = conn->type;
 	if (conn->type == FSNOTIFY_OBJ_TYPE_DETACHED)
 		return NULL;
 
 	if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) {
 		inode = fsnotify_conn_inode(conn);
 		inode->i_fsnotify_mask = 0;
+		atomic_long_inc(&inode->i_sb->s_fsnotify_inode_refs);
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) {
 		fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0;
 	} else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) {
@@ -215,10 +218,29 @@ static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark)
 	fsnotify_put_group(group);
 }
 
+/* Drop object reference originally held by a connector */
+static void fsnotify_drop_object(unsigned int type, void *objp)
+{
+	struct inode *inode;
+	struct super_block *sb;
+
+	if (!objp)
+		return;
+	/* Currently only inode references are passed to be dropped */
+	if (WARN_ON_ONCE(type != FSNOTIFY_OBJ_TYPE_INODE))
+		return;
+	inode = objp;
+	sb = inode->i_sb;
+	iput(inode);
+	if (atomic_long_dec_and_test(&sb->s_fsnotify_inode_refs))
+		wake_up_var(&sb->s_fsnotify_inode_refs);
+}
+
 void fsnotify_put_mark(struct fsnotify_mark *mark)
 {
 	struct fsnotify_mark_connector *conn;
-	struct inode *inode = NULL;
+	void *objp = NULL;
+	unsigned int type = FSNOTIFY_OBJ_TYPE_DETACHED;
 	bool free_conn = false;
 
 	/* Catch marks that were actually never attached to object */
@@ -238,7 +260,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 	conn = mark->connector;
 	hlist_del_init_rcu(&mark->obj_list);
 	if (hlist_empty(&conn->list)) {
-		inode = fsnotify_detach_connector_from_object(conn);
+		objp = fsnotify_detach_connector_from_object(conn, &type);
 		free_conn = true;
 	} else {
 		__fsnotify_recalc_mask(conn);
@@ -246,7 +268,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 	mark->connector = NULL;
 	spin_unlock(&conn->lock);
 
-	iput(inode);
+	fsnotify_drop_object(type, objp);
 
 	if (free_conn) {
 		spin_lock(&destroy_lock);
@@ -713,7 +735,8 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
 {
 	struct fsnotify_mark_connector *conn;
 	struct fsnotify_mark *mark, *old_mark = NULL;
-	struct inode *inode;
+	void *objp;
+	unsigned int type;
 
 	conn = fsnotify_grab_connector(connp);
 	if (!conn)
@@ -739,11 +762,11 @@ void fsnotify_destroy_marks(fsnotify_connp_t *connp)
 	 * mark references get dropped. It would lead to strange results such
 	 * as delaying inode deletion or blocking unmount.
 	 */
-	inode = fsnotify_detach_connector_from_object(conn);
+	objp = fsnotify_detach_connector_from_object(conn, &type);
 	spin_unlock(&conn->lock);
 	if (old_mark)
 		fsnotify_put_mark(old_mark);
-	iput(inode);
+	fsnotify_drop_object(type, objp);
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6da94deb957f..00b23b21e78a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1437,6 +1437,9 @@ struct super_block {
 	/* Number of inodes with nlink == 0 but still referenced */
 	atomic_long_t s_remove_count;
 
+	/* Pending fsnotify inode refs */
+	atomic_long_t s_fsnotify_inode_refs;
+
 	/* Being remounted read-only */
 	int s_readonly_remount;
 
-- 
cgit v1.2.3


From a7fe5190c03f8137ef08db84a58dd4daf2c4785d Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Thu, 4 Oct 2018 14:04:03 +0200
Subject: cpuidle: menu: Remove get_loadavg() from the performance multiplier

The function get_loadavg() returns almost always zero. To be more
precise, statistically speaking for a total of 1023379 times passing
in the function, the load is equal to zero 1020728 times, greater than
100, 610 times, the remaining is between 0 and 5.

In 2011, the get_loadavg() was removed from the Android tree because
of the above [1]. At this time, the load was:

unsigned long this_cpu_load(void)
{
        struct rq *this = this_rq();
        return this->cpu_load[0];
}

In 2014, the code was changed by commit 372ba8cb46b2 (cpuidle: menu: Lookup CPU
runqueues less) and the load is:

void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
{
        struct rq *rq = this_rq();
        *nr_waiters = atomic_read(&rq->nr_iowait);
        *load = rq->load.weight;
}

with the same result.

Both measurements show using the load in this code path does no matter
anymore. Removing it.

[1] https://android.googlesource.com/kernel/common/+/4dedd9f124703207895777ac6e91dacde0f7cc17

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 25 ++++++-------------------
 include/linux/sched/stat.h       |  1 -
 kernel/sched/core.c              |  7 -------
 3 files changed, 6 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 575a68f31761..76df4f947f07 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -134,11 +134,6 @@ struct menu_device {
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
-static inline int get_loadavg(unsigned long load)
-{
-	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
-}
-
 static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
 {
 	int bucket = 0;
@@ -172,18 +167,10 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
  * to be, the higher this multiplier, and thus the higher
  * the barrier to go to an expensive C state.
  */
-static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load)
+static inline int performance_multiplier(unsigned long nr_iowaiters)
 {
-	int mult = 1;
-
-	/* for higher loadavg, we are more reluctant */
-
-	mult += 2 * get_loadavg(load);
-
-	/* for IO wait tasks (per cpu!) we add 5x each */
-	mult += 10 * nr_iowaiters;
-
-	return mult;
+	/* for IO wait tasks (per cpu!) we add 10x each */
+	return 1 + 10 * nr_iowaiters;
 }
 
 static DEFINE_PER_CPU(struct menu_device, menu_devices);
@@ -301,7 +288,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	int idx;
 	unsigned int interactivity_req;
 	unsigned int predicted_us;
-	unsigned long nr_iowaiters, cpu_load;
+	unsigned long nr_iowaiters;
 	ktime_t delta_next;
 
 	if (data->needs_update) {
@@ -312,7 +299,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	/* determine the expected residency time, round up */
 	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
 
-	get_iowait_load(&nr_iowaiters, &cpu_load);
+	nr_iowaiters = nr_iowait_cpu(dev->cpu);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
 
 	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
@@ -356,7 +343,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * Use the performance multiplier and the user-configurable
 		 * latency_req to determine the maximum exit latency.
 		 */
-		interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+		interactivity_req = predicted_us / performance_multiplier(nr_iowaiters);
 		if (latency_req > interactivity_req)
 			latency_req = interactivity_req;
 	}
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 04f1321d14c4..f30954cc059d 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -20,7 +20,6 @@ extern unsigned long nr_running(void);
 extern bool single_task_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
-extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 
 static inline int sched_info_on(void)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9245c56b8f5f..c8d5c279be14 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2887,13 +2887,6 @@ unsigned long nr_iowait_cpu(int cpu)
 	return atomic_read(&cpu_rq(cpu)->nr_iowait);
 }
 
-void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
-{
-	struct rq *rq = this_rq();
-	*nr_waiters = atomic_read(&rq->nr_iowait);
-	*load = rq->load.weight;
-}
-
 /*
  * IO-wait accounting, and how its mostly bollocks (on SMP).
  *
-- 
cgit v1.2.3


From a91e138022bc29b5d2bbc56b41de3e0db6261e28 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 12 Oct 2018 19:08:43 +0900
Subject: block: Introduce blkdev_nr_zones() helper

Introduce the blkdev_nr_zones() helper function to get the total
number of zones of a zoned block device. This number is always 0 for a
regular block device (q->limits.zoned == BLK_ZONED_NONE case).

Replace hard-coded number of zones calculation in dmz_get_zoned_device()
with a call to this helper.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-zoned.c            | 27 +++++++++++++++++++++++++++
 drivers/md/dm-zoned-target.c |  3 +--
 include/linux/blkdev.h       |  5 +++++
 3 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index c461cf63f1f4..32e377f755d8 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -63,6 +63,33 @@ void __blk_req_zone_write_unlock(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
 
+static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
+					     sector_t nr_sectors)
+{
+	unsigned long zone_sectors = blk_queue_zone_sectors(q);
+
+	return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
+}
+
+/**
+ * blkdev_nr_zones - Get number of zones
+ * @bdev:	Target block device
+ *
+ * Description:
+ *    Return the total number of zones of a zoned block device.
+ *    For a regular block device, the number of zones is always 0.
+ */
+unsigned int blkdev_nr_zones(struct block_device *bdev)
+{
+	struct request_queue *q = bdev_get_queue(bdev);
+
+	if (!blk_queue_is_zoned(q))
+		return 0;
+
+	return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
+}
+EXPORT_SYMBOL_GPL(blkdev_nr_zones);
+
 /*
  * Check that a zone report belongs to the partition.
  * If yes, fix its start sector and write pointer, copy it in the
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index a44183ff4be0..12d96a263623 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -702,8 +702,7 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
 	dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
 	dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);
 
-	dev->nr_zones = (dev->capacity + dev->zone_nr_sectors - 1)
-		>> dev->zone_nr_sectors_shift;
+	dev->nr_zones = blkdev_nr_zones(dev->bdev);
 
 	dmz->dev = dev;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7d423721b327..ca5fdc1b7745 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -401,6 +401,7 @@ struct blk_zone_report_hdr {
 	u8		padding[60];
 };
 
+extern unsigned int blkdev_nr_zones(struct block_device *bdev);
 extern int blkdev_report_zones(struct block_device *bdev,
 			       sector_t sector, struct blk_zone *zones,
 			       unsigned int *nr_zones, gfp_t gfp_mask);
@@ -414,6 +415,10 @@ extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
 
 #else /* CONFIG_BLK_DEV_ZONED */
 
+static inline unsigned int blkdev_nr_zones(struct block_device *bdev)
+{
+	return 0;
+}
 static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
 					    fmode_t mode, unsigned int cmd,
 					    unsigned long arg)
-- 
cgit v1.2.3


From 965b652e901886ea54f93c60027b5be76328d958 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 12 Oct 2018 19:08:48 +0900
Subject: block: Expose queue nr_zones in sysfs

Expose through sysfs the nr_zones field of struct request_queue.
Exposing this value helps in debugging disk issues as well as
facilitating scripts based use of the disk (e.g. blktests).

For zoned block devices, the nr_zones field indicates the total number
of zones of the device calculated using the known disk capacity and
zone size. This number of zones is always 0 for regular block devices.

Since nr_zones is defined conditionally with CONFIG_BLK_DEV_ZONED,
introduce the blk_queue_nr_zones() function to return the correct value
for any device, regardless if CONFIG_BLK_DEV_ZONED is set.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c      | 11 +++++++++++
 include/linux/blkdev.h | 10 ++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 3772671cf2bc..92be8092ca4f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -300,6 +300,11 @@ static ssize_t queue_zoned_show(struct request_queue *q, char *page)
 	}
 }
 
+static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
+{
+	return queue_var_show(blk_queue_nr_zones(q), page);
+}
+
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
 	return queue_var_show((blk_queue_nomerges(q) << 1) |
@@ -637,6 +642,11 @@ static struct queue_sysfs_entry queue_zoned_entry = {
 	.show = queue_zoned_show,
 };
 
+static struct queue_sysfs_entry queue_nr_zones_entry = {
+	.attr = {.name = "nr_zones", .mode = 0444 },
+	.show = queue_nr_zones_show,
+};
+
 static struct queue_sysfs_entry queue_nomerges_entry = {
 	.attr = {.name = "nomerges", .mode = 0644 },
 	.show = queue_nomerges_show,
@@ -727,6 +737,7 @@ static struct attribute *default_attrs[] = {
 	&queue_write_zeroes_max_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_zoned_entry.attr,
+	&queue_nr_zones_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ca5fdc1b7745..6bb845f9601a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -811,6 +811,11 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 }
 
 #ifdef CONFIG_BLK_DEV_ZONED
+static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+{
+	return blk_queue_is_zoned(q) ? q->nr_zones : 0;
+}
+
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 					     sector_t sector)
 {
@@ -826,6 +831,11 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 		return false;
 	return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
 }
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+{
+	return 0;
+}
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 static inline bool rq_is_sync(struct request *rq)
-- 
cgit v1.2.3


From e76239a3748c90a8b0e197f8f4544a8ce52f126e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 12 Oct 2018 19:08:49 +0900
Subject: block: add a report_zones method

Dispatching a report zones command through the request queue is a major
pain due to the command reply payload rewriting necessary. Given that
blkdev_report_zones() is executing everything synchronously, implement
report zones as a block device file operation instead, allowing major
simplification of the code in many places.

sd, null-blk, dm-linear and dm-flakey being the only block device
drivers supporting exposing zoned block devices, these drivers are
modified to provide the device side implementation of the
report_zones() block device file operation.

For device mappers, a new report_zones() target type operation is
defined so that the upper block layer calls blkdev_report_zones() can
be propagated down to the underlying devices of the dm targets.
Implementation for this new operation is added to the dm-linear and
dm-flakey targets.

Reviewed-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
[Damien]
* Changed method block_device argument to gendisk
* Various bug fixes and improvements
* Added support for null_blk, dm-linear and dm-flakey.
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c               |   1 -
 block/blk-mq-debugfs.c         |   1 -
 block/blk-zoned.c              | 164 +++++++++++++--------------------------
 drivers/block/null_blk.h       |  11 ++-
 drivers/block/null_blk_main.c  |  23 +-----
 drivers/block/null_blk_zoned.c |  57 ++++----------
 drivers/md/dm-flakey.c         |  30 +++++---
 drivers/md/dm-linear.c         |  35 +++++----
 drivers/md/dm.c                | 169 ++++++++++++++++++++---------------------
 drivers/scsi/sd.c              |  13 +---
 drivers/scsi/sd.h              |  11 ++-
 drivers/scsi/sd_zbc.c          | 153 ++++++++++++-------------------------
 include/linux/blk_types.h      |   2 -
 include/linux/blkdev.h         |   8 +-
 include/linux/device-mapper.h  |  12 ++-
 include/trace/events/f2fs.h    |   1 -
 16 files changed, 266 insertions(+), 425 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 3ed60723e242..bc6ea87d10e0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2300,7 +2300,6 @@ generic_make_request_checks(struct bio *bio)
 		if (!q->limits.max_write_same_sectors)
 			goto not_supported;
 		break;
-	case REQ_OP_ZONE_REPORT:
 	case REQ_OP_ZONE_RESET:
 		if (!blk_queue_is_zoned(q))
 			goto not_supported;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 41b86f50d126..10b284a1f18d 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -283,7 +283,6 @@ static const char *const op_name[] = {
 	REQ_OP_NAME(WRITE),
 	REQ_OP_NAME(FLUSH),
 	REQ_OP_NAME(DISCARD),
-	REQ_OP_NAME(ZONE_REPORT),
 	REQ_OP_NAME(SECURE_ERASE),
 	REQ_OP_NAME(ZONE_RESET),
 	REQ_OP_NAME(WRITE_SAME),
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 5d967fd39fbd..90cf503091d5 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -93,13 +93,10 @@ unsigned int blkdev_nr_zones(struct block_device *bdev)
 EXPORT_SYMBOL_GPL(blkdev_nr_zones);
 
 /*
- * Check that a zone report belongs to the partition.
- * If yes, fix its start sector and write pointer, copy it in the
- * zone information array and return true. Return false otherwise.
+ * Check that a zone report belongs to this partition, and if yes, fix its start
+ * sector and write pointer and return true. Return false otherwise.
  */
-static bool blkdev_report_zone(struct block_device *bdev,
-			       struct blk_zone *rep,
-			       struct blk_zone *zone)
+static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep)
 {
 	sector_t offset = get_start_sect(bdev);
 
@@ -114,11 +111,36 @@ static bool blkdev_report_zone(struct block_device *bdev,
 		rep->wp = rep->start + rep->len;
 	else
 		rep->wp -= offset;
-	memcpy(zone, rep, sizeof(struct blk_zone));
-
 	return true;
 }
 
+static int blk_report_zones(struct gendisk *disk, sector_t sector,
+			    struct blk_zone *zones, unsigned int *nr_zones,
+			    gfp_t gfp_mask)
+{
+	struct request_queue *q = disk->queue;
+	unsigned int z = 0, n, nrz = *nr_zones;
+	sector_t capacity = get_capacity(disk);
+	int ret;
+
+	while (z < nrz && sector < capacity) {
+		n = nrz - z;
+		ret = disk->fops->report_zones(disk, sector, &zones[z], &n,
+					       gfp_mask);
+		if (ret)
+			return ret;
+		if (!n)
+			break;
+		sector += blk_queue_zone_sectors(q) * n;
+		z += n;
+	}
+
+	WARN_ON(z > *nr_zones);
+	*nr_zones = z;
+
+	return 0;
+}
+
 /**
  * blkdev_report_zones - Get zones information
  * @bdev:	Target block device
@@ -133,130 +155,46 @@ static bool blkdev_report_zone(struct block_device *bdev,
  *    requested by @nr_zones. The number of zones actually reported is
  *    returned in @nr_zones.
  */
-int blkdev_report_zones(struct block_device *bdev,
-			sector_t sector,
-			struct blk_zone *zones,
-			unsigned int *nr_zones,
+int blkdev_report_zones(struct block_device *bdev, sector_t sector,
+			struct blk_zone *zones, unsigned int *nr_zones,
 			gfp_t gfp_mask)
 {
 	struct request_queue *q = bdev_get_queue(bdev);
-	struct blk_zone_report_hdr *hdr;
-	unsigned int nrz = *nr_zones;
-	struct page *page;
-	unsigned int nr_rep;
-	size_t rep_bytes;
-	unsigned int nr_pages;
-	struct bio *bio;
-	struct bio_vec *bv;
-	unsigned int i, n, nz;
-	unsigned int ofst;
-	void *addr;
+	unsigned int i, nrz;
 	int ret;
 
-	if (!q)
-		return -ENXIO;
-
 	if (!blk_queue_is_zoned(q))
 		return -EOPNOTSUPP;
 
-	if (!nrz)
-		return 0;
-
-	if (sector > bdev->bd_part->nr_sects) {
-		*nr_zones = 0;
-		return 0;
-	}
-
 	/*
-	 * The zone report has a header. So make room for it in the
-	 * payload. Also make sure that the report fits in a single BIO
-	 * that will not be split down the stack.
+	 * A block device that advertized itself as zoned must have a
+	 * report_zones method. If it does not have one defined, the device
+	 * driver has a bug. So warn about that.
 	 */
-	rep_bytes = sizeof(struct blk_zone_report_hdr) +
-		sizeof(struct blk_zone) * nrz;
-	rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
-	if (rep_bytes > (queue_max_sectors(q) << 9))
-		rep_bytes = queue_max_sectors(q) << 9;
-
-	nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
-			 rep_bytes >> PAGE_SHIFT);
-	nr_pages = min_t(unsigned int, nr_pages,
-			 queue_max_segments(q));
-
-	bio = bio_alloc(gfp_mask, nr_pages);
-	if (!bio)
-		return -ENOMEM;
+	if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones))
+		return -EOPNOTSUPP;
 
-	bio_set_dev(bio, bdev);
-	bio->bi_iter.bi_sector = blk_zone_start(q, sector);
-	bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
-
-	for (i = 0; i < nr_pages; i++) {
-		page = alloc_page(gfp_mask);
-		if (!page) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
-			__free_page(page);
-			break;
-		}
+	if (!*nr_zones || sector >= bdev->bd_part->nr_sects) {
+		*nr_zones = 0;
+		return 0;
 	}
 
-	if (i == 0)
-		ret = -ENOMEM;
-	else
-		ret = submit_bio_wait(bio);
+	nrz = min(*nr_zones,
+		  __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
+	ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector,
+			       zones, &nrz, gfp_mask);
 	if (ret)
-		goto out;
-
-	/*
-	 * Process the report result: skip the header and go through the
-	 * reported zones to fixup and fixup the zone information for
-	 * partitions. At the same time, return the zone information into
-	 * the zone array.
-	 */
-	n = 0;
-	nz = 0;
-	nr_rep = 0;
-	bio_for_each_segment_all(bv, bio, i) {
+		return ret;
 
-		if (!bv->bv_page)
+	for (i = 0; i < nrz; i++) {
+		if (!blkdev_report_zone(bdev, zones))
 			break;
-
-		addr = kmap_atomic(bv->bv_page);
-
-		/* Get header in the first page */
-		ofst = 0;
-		if (!nr_rep) {
-			hdr = addr;
-			nr_rep = hdr->nr_zones;
-			ofst = sizeof(struct blk_zone_report_hdr);
-		}
-
-		/* Fixup and report zones */
-		while (ofst < bv->bv_len &&
-		       n < nr_rep && nz < nrz) {
-			if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
-				nz++;
-			ofst += sizeof(struct blk_zone);
-			n++;
-		}
-
-		kunmap_atomic(addr);
-
-		if (n >= nr_rep || nz >= nrz)
-			break;
-
+		zones++;
 	}
 
-	*nr_zones = nz;
-out:
-	bio_for_each_segment_all(bv, bio, i)
-		__free_page(bv->bv_page);
-	bio_put(bio);
+	*nr_zones = i;
 
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(blkdev_report_zones);
 
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 34e0030f0592..7685df43f1ef 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -87,7 +87,9 @@ struct nullb {
 #ifdef CONFIG_BLK_DEV_ZONED
 int null_zone_init(struct nullb_device *dev);
 void null_zone_exit(struct nullb_device *dev);
-blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio);
+int null_zone_report(struct gendisk *disk, sector_t sector,
+		     struct blk_zone *zones, unsigned int *nr_zones,
+		     gfp_t gfp_mask);
 void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 			unsigned int nr_sectors);
 void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
@@ -97,10 +99,11 @@ static inline int null_zone_init(struct nullb_device *dev)
 	return -EINVAL;
 }
 static inline void null_zone_exit(struct nullb_device *dev) {}
-static inline blk_status_t null_zone_report(struct nullb *nullb,
-					    struct bio *bio)
+static inline int null_zone_report(struct gendisk *disk, sector_t sector,
+				   struct blk_zone *zones,
+				   unsigned int *nr_zones, gfp_t gfp_mask)
 {
-	return BLK_STS_NOTSUPP;
+	return -EOPNOTSUPP;
 }
 static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
 				   unsigned int nr_sectors)
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index e94591021682..5ba426dbf377 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1129,34 +1129,12 @@ static void null_restart_queue_async(struct nullb *nullb)
 		blk_mq_start_stopped_hw_queues(q, true);
 }
 
-static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd)
-{
-	struct nullb_device *dev = cmd->nq->dev;
-
-	if (dev->queue_mode == NULL_Q_BIO) {
-		if (bio_op(cmd->bio) == REQ_OP_ZONE_REPORT) {
-			cmd->error = null_zone_report(nullb, cmd->bio);
-			return true;
-		}
-	} else {
-		if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) {
-			cmd->error = null_zone_report(nullb, cmd->rq->bio);
-			return true;
-		}
-	}
-
-	return false;
-}
-
 static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
 {
 	struct nullb_device *dev = cmd->nq->dev;
 	struct nullb *nullb = dev->nullb;
 	int err = 0;
 
-	if (cmd_report_zone(nullb, cmd))
-		goto out;
-
 	if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
 		struct request *rq = cmd->rq;
 
@@ -1443,6 +1421,7 @@ static const struct block_device_operations null_fops = {
 	.owner =	THIS_MODULE,
 	.open =		null_open,
 	.release =	null_release,
+	.report_zones =	null_zone_report,
 };
 
 static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index 7c6b86d98700..c0b0e4a3fa8f 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -48,54 +48,27 @@ void null_zone_exit(struct nullb_device *dev)
 	kvfree(dev->zones);
 }
 
-static void null_zone_fill_bio(struct nullb_device *dev, struct bio *bio,
-			       unsigned int zno, unsigned int nr_zones)
+int null_zone_report(struct gendisk *disk, sector_t sector,
+		     struct blk_zone *zones, unsigned int *nr_zones,
+		     gfp_t gfp_mask)
 {
-	struct blk_zone_report_hdr *hdr = NULL;
-	struct bio_vec bvec;
-	struct bvec_iter iter;
-	void *addr;
-	unsigned int zones_to_cpy;
-
-	bio_for_each_segment(bvec, bio, iter) {
-		addr = kmap_atomic(bvec.bv_page);
-
-		zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone);
-
-		if (!hdr) {
-			hdr = (struct blk_zone_report_hdr *)addr;
-			hdr->nr_zones = nr_zones;
-			zones_to_cpy--;
-			addr += sizeof(struct blk_zone_report_hdr);
-		}
-
-		zones_to_cpy = min_t(unsigned int, zones_to_cpy, nr_zones);
-
-		memcpy(addr, &dev->zones[zno],
-				zones_to_cpy * sizeof(struct blk_zone));
-
-		kunmap_atomic(addr);
+	struct nullb *nullb = disk->private_data;
+	struct nullb_device *dev = nullb->dev;
+	unsigned int zno, nrz = 0;
 
-		nr_zones -= zones_to_cpy;
-		zno += zones_to_cpy;
+	if (!dev->zoned)
+		/* Not a zoned null device */
+		return -EOPNOTSUPP;
 
-		if (!nr_zones)
-			break;
+	zno = null_zone_no(dev, sector);
+	if (zno < dev->nr_zones) {
+		nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno);
+		memcpy(zones, &dev->zones[zno], nrz * sizeof(struct blk_zone));
 	}
-}
 
-blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio)
-{
-	struct nullb_device *dev = nullb->dev;
-	unsigned int zno = null_zone_no(dev, bio->bi_iter.bi_sector);
-	unsigned int nr_zones = dev->nr_zones - zno;
-	unsigned int max_zones;
+	*nr_zones = nrz;
 
-	max_zones = (bio->bi_iter.bi_size / sizeof(struct blk_zone)) - 1;
-	nr_zones = min_t(unsigned int, nr_zones, max_zones);
-	null_zone_fill_bio(nullb->dev, bio, zno, nr_zones);
-
-	return BLK_STS_OK;
+	return 0;
 }
 
 void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 32aabe27b37c..3cb97fa4c11d 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -315,10 +315,6 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
 	if (bio_op(bio) == REQ_OP_ZONE_RESET)
 		goto map_bio;
 
-	/* We need to remap reported zones, so remember the BIO iter */
-	if (bio_op(bio) == REQ_OP_ZONE_REPORT)
-		goto map_bio;
-
 	/* Are we alive ? */
 	elapsed = (jiffies - fc->start_time) / HZ;
 	if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
@@ -380,11 +376,6 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
 	if (bio_op(bio) == REQ_OP_ZONE_RESET)
 		return DM_ENDIO_DONE;
 
-	if (bio_op(bio) == REQ_OP_ZONE_REPORT) {
-		dm_remap_zone_report(ti, bio, fc->start);
-		return DM_ENDIO_DONE;
-	}
-
 	if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
 		if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
 		    all_corrupt_bio_flags_match(bio, fc)) {
@@ -457,6 +448,26 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
 	return 0;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
+static int flakey_report_zones(struct dm_target *ti, sector_t sector,
+			       struct blk_zone *zones, unsigned int *nr_zones,
+			       gfp_t gfp_mask)
+{
+	struct flakey_c *fc = ti->private;
+	int ret;
+
+	/* Do report and remap it */
+	ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector),
+				  zones, nr_zones, gfp_mask);
+	if (ret != 0)
+		return ret;
+
+	if (*nr_zones)
+		dm_remap_zone_report(ti, fc->start, zones, nr_zones);
+	return 0;
+}
+#endif
+
 static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
 {
 	struct flakey_c *fc = ti->private;
@@ -469,6 +480,7 @@ static struct target_type flakey_target = {
 	.version = {1, 5, 0},
 #ifdef CONFIG_BLK_DEV_ZONED
 	.features = DM_TARGET_ZONED_HM,
+	.report_zones = flakey_report_zones,
 #endif
 	.module = THIS_MODULE,
 	.ctr    = flakey_ctr,
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 2f7c44a006c4..8d7ddee6ac4d 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -102,19 +102,6 @@ static int linear_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
-#ifdef CONFIG_BLK_DEV_ZONED
-static int linear_end_io(struct dm_target *ti, struct bio *bio,
-			 blk_status_t *error)
-{
-	struct linear_c *lc = ti->private;
-
-	if (!*error && bio_op(bio) == REQ_OP_ZONE_REPORT)
-		dm_remap_zone_report(ti, bio, lc->start);
-
-	return DM_ENDIO_DONE;
-}
-#endif
-
 static void linear_status(struct dm_target *ti, status_type_t type,
 			  unsigned status_flags, char *result, unsigned maxlen)
 {
@@ -148,6 +135,26 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev
 	return 0;
 }
 
+#ifdef CONFIG_BLK_DEV_ZONED
+static int linear_report_zones(struct dm_target *ti, sector_t sector,
+			       struct blk_zone *zones, unsigned int *nr_zones,
+			       gfp_t gfp_mask)
+{
+	struct linear_c *lc = (struct linear_c *) ti->private;
+	int ret;
+
+	/* Do report and remap it */
+	ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector),
+				  zones, nr_zones, gfp_mask);
+	if (ret != 0)
+		return ret;
+
+	if (*nr_zones)
+		dm_remap_zone_report(ti, lc->start, zones, nr_zones);
+	return 0;
+}
+#endif
+
 static int linear_iterate_devices(struct dm_target *ti,
 				  iterate_devices_callout_fn fn, void *data)
 {
@@ -211,8 +218,8 @@ static struct target_type linear_target = {
 	.name   = "linear",
 	.version = {1, 4, 0},
 #ifdef CONFIG_BLK_DEV_ZONED
-	.end_io = linear_end_io,
 	.features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM,
+	.report_zones = linear_report_zones,
 #else
 	.features = DM_TARGET_PASSES_INTEGRITY,
 #endif
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 45abb54037fc..6be21dc210a1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -458,6 +458,57 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return dm_get_geometry(md, geo);
 }
 
+static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
+			       struct blk_zone *zones, unsigned int *nr_zones,
+			       gfp_t gfp_mask)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+	struct mapped_device *md = disk->private_data;
+	struct dm_target *tgt;
+	struct dm_table *map;
+	int srcu_idx, ret;
+
+	if (dm_suspended_md(md))
+		return -EAGAIN;
+
+	map = dm_get_live_table(md, &srcu_idx);
+	if (!map)
+		return -EIO;
+
+	tgt = dm_table_find_target(map, sector);
+	if (!dm_target_is_valid(tgt)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	/*
+	 * If we are executing this, we already know that the block device
+	 * is a zoned device and so each target should have support for that
+	 * type of drive. A missing report_zones method means that the target
+	 * driver has a problem.
+	 */
+	if (WARN_ON(!tgt->type->report_zones)) {
+		ret = -EIO;
+		goto out;
+	}
+
+	/*
+	 * blkdev_report_zones() will loop and call this again to cover all the
+	 * zones of the target, eventually moving on to the next target.
+	 * So there is no need to loop here trying to fill the entire array
+	 * of zones.
+	 */
+	ret = tgt->type->report_zones(tgt, sector, zones,
+				      nr_zones, gfp_mask);
+
+out:
+	dm_put_live_table(md, srcu_idx);
+	return ret;
+#else
+	return -ENOTSUPP;
+#endif
+}
+
 static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
 			    struct block_device **bdev)
 	__acquires(md->io_barrier)
@@ -1155,93 +1206,49 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
 EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
 
 /*
- * The zone descriptors obtained with a zone report indicate zone positions
- * within the target backing device, regardless of that device is a partition
- * and regardless of the target mapping start sector on the device or partition.
- * The zone descriptors start sector and write pointer position must be adjusted
- * to match their relative position within the dm device.
- * A target may call dm_remap_zone_report() after completion of a
- * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the
- * backing device.
+ * The zone descriptors obtained with a zone report indicate
+ * zone positions within the underlying device of the target. The zone
+ * descriptors must be remapped to match their position within the dm device.
+ * The caller target should obtain the zones information using
+ * blkdev_report_zones() to ensure that remapping for partition offset is
+ * already handled.
  */
-void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start)
+void dm_remap_zone_report(struct dm_target *ti, sector_t start,
+			  struct blk_zone *zones, unsigned int *nr_zones)
 {
 #ifdef CONFIG_BLK_DEV_ZONED
-	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
-	struct bio *report_bio = tio->io->orig_bio;
-	struct blk_zone_report_hdr *hdr = NULL;
 	struct blk_zone *zone;
-	unsigned int nr_rep = 0;
-	unsigned int ofst;
-	sector_t part_offset;
-	struct bio_vec bvec;
-	struct bvec_iter iter;
-	void *addr;
-
-	if (bio->bi_status)
-		return;
-
-	/*
-	 * bio sector was incremented by the request size on completion. Taking
-	 * into account the original request sector, the target start offset on
-	 * the backing device and the target mapping offset (ti->begin), the
-	 * start sector of the backing device. The partition offset is always 0
-	 * if the target uses a whole device.
-	 */
-	part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio));
+	unsigned int nrz = *nr_zones;
+	int i;
 
 	/*
-	 * Remap the start sector of the reported zones. For sequential zones,
-	 * also remap the write pointer position.
+	 * Remap the start sector and write pointer position of the zones in
+	 * the array. Since we may have obtained from the target underlying
+	 * device more zones that the target size, also adjust the number
+	 * of zones.
 	 */
-	bio_for_each_segment(bvec, report_bio, iter) {
-		addr = kmap_atomic(bvec.bv_page);
-
-		/* Remember the report header in the first page */
-		if (!hdr) {
-			hdr = addr;
-			ofst = sizeof(struct blk_zone_report_hdr);
-		} else
-			ofst = 0;
-
-		/* Set zones start sector */
-		while (hdr->nr_zones && ofst < bvec.bv_len) {
-			zone = addr + ofst;
-			zone->start -= part_offset;
-			if (zone->start >= start + ti->len) {
-				hdr->nr_zones = 0;
-				break;
-			}
-			zone->start = zone->start + ti->begin - start;
-			if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
-				if (zone->cond == BLK_ZONE_COND_FULL)
-					zone->wp = zone->start + zone->len;
-				else if (zone->cond == BLK_ZONE_COND_EMPTY)
-					zone->wp = zone->start;
-				else
-					zone->wp = zone->wp + ti->begin - start - part_offset;
-			}
-			ofst += sizeof(struct blk_zone);
-			hdr->nr_zones--;
-			nr_rep++;
+	for (i = 0; i < nrz; i++) {
+		zone = zones + i;
+		if (zone->start >= start + ti->len) {
+			memset(zone, 0, sizeof(struct blk_zone) * (nrz - i));
+			break;
 		}
 
-		if (addr != hdr)
-			kunmap_atomic(addr);
+		zone->start = zone->start + ti->begin - start;
+		if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+			continue;
 
-		if (!hdr->nr_zones)
-			break;
-	}
-
-	if (hdr) {
-		hdr->nr_zones = nr_rep;
-		kunmap_atomic(hdr);
+		if (zone->cond == BLK_ZONE_COND_FULL)
+			zone->wp = zone->start + zone->len;
+		else if (zone->cond == BLK_ZONE_COND_EMPTY)
+			zone->wp = zone->start;
+		else
+			zone->wp = zone->wp + ti->begin - start;
 	}
 
-	bio_advance(report_bio, report_bio->bi_iter.bi_size);
-
+	*nr_zones = i;
 #else /* !CONFIG_BLK_DEV_ZONED */
-	bio->bi_status = BLK_STS_NOTSUPP;
+	*nr_zones = 0;
 #endif
 }
 EXPORT_SYMBOL_GPL(dm_remap_zone_report);
@@ -1327,8 +1334,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio,
 			return r;
 	}
 
-	if (bio_op(bio) != REQ_OP_ZONE_REPORT)
-		bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
+	bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
 	clone->bi_iter.bi_size = to_bytes(len);
 
 	if (unlikely(bio_integrity(bio) != NULL))
@@ -1541,7 +1547,6 @@ static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
  */
 static int __split_and_process_non_flush(struct clone_info *ci)
 {
-	struct bio *bio = ci->bio;
 	struct dm_target *ti;
 	unsigned len;
 	int r;
@@ -1553,11 +1558,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
 	if (unlikely(__process_abnormal_io(ci, ti, &r)))
 		return r;
 
-	if (bio_op(bio) == REQ_OP_ZONE_REPORT)
-		len = ci->sector_count;
-	else
-		len = min_t(sector_t, max_io_len(ci->sector, ti),
-			    ci->sector_count);
+	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
 
 	r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
 	if (r < 0)
@@ -1616,9 +1617,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
 				 * We take a clone of the original to store in
 				 * ci.io->orig_bio to be used by end_io_acct() and
 				 * for dec_pending to use for completion handling.
-				 * As this path is not used for REQ_OP_ZONE_REPORT,
-				 * the usage of io->orig_bio in dm_remap_zone_report()
-				 * won't be affected by this reassignment.
 				 */
 				struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
 							  GFP_NOIO, &md->queue->bio_split);
@@ -3167,6 +3165,7 @@ static const struct block_device_operations dm_blk_dops = {
 	.release = dm_blk_close,
 	.ioctl = dm_blk_ioctl,
 	.getgeo = dm_blk_getgeo,
+	.report_zones = dm_blk_report_zones,
 	.pr_ops = &dm_pr_ops,
 	.owner = THIS_MODULE
 };
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index b762d0fd773c..42c0f299021d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1272,8 +1272,6 @@ static int sd_init_command(struct scsi_cmnd *cmd)
 	case REQ_OP_READ:
 	case REQ_OP_WRITE:
 		return sd_setup_read_write_cmnd(cmd);
-	case REQ_OP_ZONE_REPORT:
-		return sd_zbc_setup_report_cmnd(cmd);
 	case REQ_OP_ZONE_RESET:
 		return sd_zbc_setup_reset_cmnd(cmd);
 	default:
@@ -1802,6 +1800,7 @@ static const struct block_device_operations sd_fops = {
 	.check_events		= sd_check_events,
 	.revalidate_disk	= sd_revalidate_disk,
 	.unlock_native_capacity	= sd_unlock_native_capacity,
+	.report_zones		= sd_zbc_report_zones,
 	.pr_ops			= &sd_pr_ops,
 };
 
@@ -1953,16 +1952,6 @@ static int sd_done(struct scsi_cmnd *SCpnt)
 			scsi_set_resid(SCpnt, blk_rq_bytes(req));
 		}
 		break;
-	case REQ_OP_ZONE_REPORT:
-		if (!result) {
-			good_bytes = scsi_bufflen(SCpnt)
-				- scsi_get_resid(SCpnt);
-			scsi_set_resid(SCpnt, 0);
-		} else {
-			good_bytes = 0;
-			scsi_set_resid(SCpnt, blk_rq_bytes(req));
-		}
-		break;
 	default:
 		/*
 		 * In case of bogus fw or device, we could end up having
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index a7d4f50b67d4..f72f20fd0d8b 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -273,10 +273,12 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
 extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
 extern void sd_zbc_remove(struct scsi_disk *sdkp);
 extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
-extern int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd);
 extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
 extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
 			    struct scsi_sense_hdr *sshdr);
+extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
+			       struct blk_zone *zones, unsigned int *nr_zones,
+			       gfp_t gfp_mask);
 
 #else /* CONFIG_BLK_DEV_ZONED */
 
@@ -290,11 +292,6 @@ static inline void sd_zbc_remove(struct scsi_disk *sdkp) {}
 
 static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
 
-static inline int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd)
-{
-	return BLKPREP_INVALID;
-}
-
 static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
 {
 	return BLKPREP_INVALID;
@@ -304,6 +301,8 @@ static inline void sd_zbc_complete(struct scsi_cmnd *cmd,
 				   unsigned int good_bytes,
 				   struct scsi_sense_hdr *sshdr) {}
 
+#define sd_zbc_report_zones NULL
+
 #endif /* CONFIG_BLK_DEV_ZONED */
 
 #endif /* _SCSI_DISK_H */
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 0678e1e108b0..0f2cfc81fce3 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -62,7 +62,7 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
 }
 
 /**
- * sd_zbc_report_zones - Issue a REPORT ZONES scsi command.
+ * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
  * @sdkp: The target disk
  * @buf: Buffer to use for the reply
  * @buflen: the buffer size
@@ -75,9 +75,9 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
  * zones and will only report the count of zones fitting in the command reply
  * buffer.
  */
-static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
-			       unsigned int buflen, sector_t lba,
-			       bool partial)
+static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
+				  unsigned int buflen, sector_t lba,
+				  bool partial)
 {
 	struct scsi_device *sdp = sdkp->device;
 	const int timeout = sdp->request_queue->rq_timeout;
@@ -118,108 +118,56 @@ static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
 }
 
 /**
- * sd_zbc_setup_report_cmnd - Prepare a REPORT ZONES scsi command
- * @cmd: The command to setup
+ * sd_zbc_report_zones - Disk report zones operation.
+ * @disk: The target disk
+ * @sector: Start 512B sector of the report
+ * @zones: Array of zone descriptors
+ * @nr_zones: Number of descriptors in the array
+ * @gfp_mask: Memory allocation mask
  *
- * Call in sd_init_command() for a REQ_OP_ZONE_REPORT request.
+ * Execute a report zones command on the target disk.
  */
-int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd)
+int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
+			struct blk_zone *zones, unsigned int *nr_zones,
+			gfp_t gfp_mask)
 {
-	struct request *rq = cmd->request;
-	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
-	sector_t lba, sector = blk_rq_pos(rq);
-	unsigned int nr_bytes = blk_rq_bytes(rq);
-	int ret;
-
-	WARN_ON(nr_bytes == 0);
+	struct scsi_disk *sdkp = scsi_disk(disk);
+	unsigned int i, buflen, nrz = *nr_zones;
+	unsigned char *buf;
+	size_t offset = 0;
+	int ret = 0;
 
 	if (!sd_is_zoned(sdkp))
 		/* Not a zoned device */
-		return BLKPREP_KILL;
-
-	ret = scsi_init_io(cmd);
-	if (ret != BLKPREP_OK)
-		return ret;
-
-	cmd->cmd_len = 16;
-	memset(cmd->cmnd, 0, cmd->cmd_len);
-	cmd->cmnd[0] = ZBC_IN;
-	cmd->cmnd[1] = ZI_REPORT_ZONES;
-	lba = sectors_to_logical(sdkp->device, sector);
-	put_unaligned_be64(lba, &cmd->cmnd[2]);
-	put_unaligned_be32(nr_bytes, &cmd->cmnd[10]);
-	/* Do partial report for speeding things up */
-	cmd->cmnd[14] = ZBC_REPORT_ZONE_PARTIAL;
-
-	cmd->sc_data_direction = DMA_FROM_DEVICE;
-	cmd->sdb.length = nr_bytes;
-	cmd->transfersize = sdkp->device->sector_size;
-	cmd->allowed = 0;
+		return -EOPNOTSUPP;
 
-	return BLKPREP_OK;
-}
-
-/**
- * sd_zbc_report_zones_complete - Process a REPORT ZONES scsi command reply.
- * @scmd: The completed report zones command
- * @good_bytes: reply size in bytes
- *
- * Convert all reported zone descriptors to struct blk_zone. The conversion
- * is done in-place, directly in the request specified sg buffer.
- */
-static void sd_zbc_report_zones_complete(struct scsi_cmnd *scmd,
-					 unsigned int good_bytes)
-{
-	struct request *rq = scmd->request;
-	struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
-	struct sg_mapping_iter miter;
-	struct blk_zone_report_hdr hdr;
-	struct blk_zone zone;
-	unsigned int offset, bytes = 0;
-	unsigned long flags;
-	u8 *buf;
-
-	if (good_bytes < 64)
-		return;
-
-	memset(&hdr, 0, sizeof(struct blk_zone_report_hdr));
-
-	sg_miter_start(&miter, scsi_sglist(scmd), scsi_sg_count(scmd),
-		       SG_MITER_TO_SG | SG_MITER_ATOMIC);
+	/*
+	 * Get a reply buffer for the number of requested zones plus a header.
+	 * For ATA, buffers must be aligned to 512B.
+	 */
+	buflen = roundup((nrz + 1) * 64, 512);
+	buf = kmalloc(buflen, gfp_mask);
+	if (!buf)
+		return -ENOMEM;
 
-	local_irq_save(flags);
-	while (sg_miter_next(&miter) && bytes < good_bytes) {
+	ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
+			sectors_to_logical(sdkp->device, sector), true);
+	if (ret)
+		goto out_free_buf;
 
-		buf = miter.addr;
-		offset = 0;
+	nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64);
+	for (i = 0; i < nrz; i++) {
+		offset += 64;
+		sd_zbc_parse_report(sdkp, buf + offset, zones);
+		zones++;
+	}
 
-		if (bytes == 0) {
-			/* Set the report header */
-			hdr.nr_zones = min_t(unsigned int,
-					 (good_bytes - 64) / 64,
-					 get_unaligned_be32(&buf[0]) / 64);
-			memcpy(buf, &hdr, sizeof(struct blk_zone_report_hdr));
-			offset += 64;
-			bytes += 64;
-		}
+	*nr_zones = nrz;
 
-		/* Parse zone descriptors */
-		while (offset < miter.length && hdr.nr_zones) {
-			WARN_ON(offset > miter.length);
-			buf = miter.addr + offset;
-			sd_zbc_parse_report(sdkp, buf, &zone);
-			memcpy(buf, &zone, sizeof(struct blk_zone));
-			offset += 64;
-			bytes += 64;
-			hdr.nr_zones--;
-		}
-
-		if (!hdr.nr_zones)
-			break;
+out_free_buf:
+	kfree(buf);
 
-	}
-	sg_miter_stop(&miter);
-	local_irq_restore(flags);
+	return ret;
 }
 
 /**
@@ -302,13 +250,6 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
 	case REQ_OP_WRITE_ZEROES:
 	case REQ_OP_WRITE_SAME:
 		break;
-
-	case REQ_OP_ZONE_REPORT:
-
-		if (!result)
-			sd_zbc_report_zones_complete(cmd, good_bytes);
-		break;
-
 	}
 }
 
@@ -390,7 +331,7 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
 		return -ENOMEM;
 
 	/* Do a report zone to get max_lba and the same field */
-	ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false);
+	ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false);
 	if (ret)
 		goto out_free;
 
@@ -447,8 +388,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
 		}
 
 		if (block < sdkp->capacity) {
-			ret = sd_zbc_report_zones(sdkp, buf,
-						  SD_ZBC_BUF_SIZE, block, true);
+			ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
+						     block, true);
 			if (ret)
 				goto out_free;
 		}
@@ -565,8 +506,8 @@ sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
 		goto out;
 
 	while (lba < sdkp->capacity) {
-		ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
-					  lba, true);
+		ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, lba,
+					     true);
 		if (ret)
 			goto out;
 		lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 9578c7ab1eb6..093a818c5b68 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -283,8 +283,6 @@ enum req_opf {
 	REQ_OP_FLUSH		= 2,
 	/* discard sectors */
 	REQ_OP_DISCARD		= 3,
-	/* get zone information */
-	REQ_OP_ZONE_REPORT	= 4,
 	/* securely erase sectors */
 	REQ_OP_SECURE_ERASE	= 5,
 	/* seset a zone write pointer */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6bb845f9601a..51fe6472ce02 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -396,11 +396,6 @@ struct queue_limits {
 
 #ifdef CONFIG_BLK_DEV_ZONED
 
-struct blk_zone_report_hdr {
-	unsigned int	nr_zones;
-	u8		padding[60];
-};
-
 extern unsigned int blkdev_nr_zones(struct block_device *bdev);
 extern int blkdev_report_zones(struct block_device *bdev,
 			       sector_t sector, struct blk_zone *zones,
@@ -1867,6 +1862,9 @@ struct block_device_operations {
 	int (*getgeo)(struct block_device *, struct hd_geometry *);
 	/* this callback is with swap_lock and sometimes page table lock held */
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
+	int (*report_zones)(struct gendisk *, sector_t sector,
+			    struct blk_zone *zones, unsigned int *nr_zones,
+			    gfp_t gfp_mask);
 	struct module *owner;
 	const struct pr_ops *pr_ops;
 };
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 6fb0808e87c8..a23b396a8edc 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -92,6 +92,11 @@ typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv,
 
 typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev);
 
+typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector,
+				   struct blk_zone *zones,
+				   unsigned int *nr_zones,
+				   gfp_t gfp_mask);
+
 /*
  * These iteration functions are typically used to check (and combine)
  * properties of underlying devices.
@@ -180,6 +185,9 @@ struct target_type {
 	dm_status_fn status;
 	dm_message_fn message;
 	dm_prepare_ioctl_fn prepare_ioctl;
+#ifdef CONFIG_BLK_DEV_ZONED
+	dm_report_zones_fn report_zones;
+#endif
 	dm_busy_fn busy;
 	dm_iterate_devices_fn iterate_devices;
 	dm_io_hints_fn io_hints;
@@ -420,8 +428,8 @@ struct gendisk *dm_disk(struct mapped_device *md);
 int dm_suspended(struct dm_target *ti);
 int dm_noflush_suspending(struct dm_target *ti);
 void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
-void dm_remap_zone_report(struct dm_target *ti, struct bio *bio,
-			  sector_t start);
+void dm_remap_zone_report(struct dm_target *ti, sector_t start,
+			  struct blk_zone *zones, unsigned int *nr_zones);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 795698925d20..3ec73f17ee2a 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -82,7 +82,6 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
 		{ REQ_OP_WRITE,			"WRITE" },		\
 		{ REQ_OP_FLUSH,			"FLUSH" },		\
 		{ REQ_OP_DISCARD,		"DISCARD" },		\
-		{ REQ_OP_ZONE_REPORT,		"ZONE_REPORT" },	\
 		{ REQ_OP_SECURE_ERASE,		"SECURE_ERASE" },	\
 		{ REQ_OP_ZONE_RESET,		"ZONE_RESET" },		\
 		{ REQ_OP_WRITE_SAME,		"WRITE_SAME" },		\
-- 
cgit v1.2.3


From bf5054569653c491ece544cc7ee333ae53b47121 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 12 Oct 2018 19:08:50 +0900
Subject: block: Introduce blk_revalidate_disk_zones()

Drivers exposing zoned block devices have to initialize and maintain
correctness (i.e. revalidate) of the device zone bitmaps attached to
the device request queue (seq_zones_bitmap and seq_zones_wlock).

To simplify coding this, introduce a generic helper function
blk_revalidate_disk_zones() suitable for most (and likely all) cases.
This new function always update the seq_zones_bitmap and seq_zones_wlock
bitmaps as well as the queue nr_zones field when called for a disk
using a request based queue. For a disk using a BIO based queue, only
the number of zones is updated since these queues do not have
schedulers and so do not need the zone bitmaps.

With this change, the zone bitmap initialization code in sd_zbc.c can be
replaced with a call to this function in sd_zbc_read_zones(), which is
called from the disk revalidate block operation method.

A call to blk_revalidate_disk_zones() is also added to the null_blk
driver for devices created with the zoned mode enabled.

Finally, to ensure that zoned devices created with dm-linear or
dm-flakey expose the correct number of zones through sysfs, a call to
blk_revalidate_disk_zones() is added to dm_table_set_restrictions().

The zone bitmaps allocated and initialized with
blk_revalidate_disk_zones() are freed automatically from
__blk_release_queue() using the block internal function
blk_queue_free_zone_bitmaps().

Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-sysfs.c             |   2 +
 block/blk-zoned.c             | 136 ++++++++++++++++++++++++++
 block/blk.h                   |   6 ++
 drivers/block/null_blk_main.c |   7 ++
 drivers/md/dm-table.c         |  10 ++
 drivers/scsi/sd.c             |   2 -
 drivers/scsi/sd.h             |   4 -
 drivers/scsi/sd_zbc.c         | 218 +++++-------------------------------------
 include/linux/blkdev.h        |   7 ++
 9 files changed, 194 insertions(+), 198 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 92be8092ca4f..0641533597f1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -852,6 +852,8 @@ static void __blk_release_queue(struct work_struct *work)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
+	blk_queue_free_zone_bitmaps(q);
+
 	if (!q->mq_ops) {
 		if (q->exit_rq_fn)
 			q->exit_rq_fn(q, q->fq->flush_rq);
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 90cf503091d5..13ba2011a306 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -12,6 +12,7 @@
 #include <linux/module.h>
 #include <linux/rbtree.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 
 #include "blk.h"
 
@@ -359,3 +360,138 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
 	return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
 				  GFP_KERNEL);
 }
+
+static inline unsigned long *blk_alloc_zone_bitmap(int node,
+						   unsigned int nr_zones)
+{
+	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
+			    GFP_NOIO, node);
+}
+
+/*
+ * Allocate an array of struct blk_zone to get nr_zones zone information.
+ * The allocated array may be smaller than nr_zones.
+ */
+static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones)
+{
+	size_t size = *nr_zones * sizeof(struct blk_zone);
+	struct page *page;
+	int order;
+
+	for (order = get_order(size); order > 0; order--) {
+		page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order);
+		if (page) {
+			*nr_zones = min_t(unsigned int, *nr_zones,
+				(PAGE_SIZE << order) / sizeof(struct blk_zone));
+			return page_address(page);
+		}
+	}
+
+	return NULL;
+}
+
+void blk_queue_free_zone_bitmaps(struct request_queue *q)
+{
+	kfree(q->seq_zones_bitmap);
+	q->seq_zones_bitmap = NULL;
+	kfree(q->seq_zones_wlock);
+	q->seq_zones_wlock = NULL;
+}
+
+/**
+ * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
+ * @disk:	Target disk
+ *
+ * Helper function for low-level device drivers to (re) allocate and initialize
+ * a disk request queue zone bitmaps. This functions should normally be called
+ * within the disk ->revalidate method. For BIO based queues, no zone bitmap
+ * is allocated.
+ */
+int blk_revalidate_disk_zones(struct gendisk *disk)
+{
+	struct request_queue *q = disk->queue;
+	unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
+	unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
+	unsigned int i, rep_nr_zones = 0, z = 0, nrz;
+	struct blk_zone *zones = NULL;
+	sector_t sector = 0;
+	int ret = 0;
+
+	/*
+	 * BIO based queues do not use a scheduler so only q->nr_zones
+	 * needs to be updated so that the sysfs exposed value is correct.
+	 */
+	if (!queue_is_rq_based(q)) {
+		q->nr_zones = nr_zones;
+		return 0;
+	}
+
+	if (!blk_queue_is_zoned(q) || !nr_zones) {
+		nr_zones = 0;
+		goto update;
+	}
+
+	/* Allocate bitmaps */
+	ret = -ENOMEM;
+	seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
+	if (!seq_zones_wlock)
+		goto out;
+	seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
+	if (!seq_zones_bitmap)
+		goto out;
+
+	/* Get zone information and initialize seq_zones_bitmap */
+	rep_nr_zones = nr_zones;
+	zones = blk_alloc_zones(q->node, &rep_nr_zones);
+	if (!zones)
+		goto out;
+
+	while (z < nr_zones) {
+		nrz = min(nr_zones - z, rep_nr_zones);
+		ret = blk_report_zones(disk, sector, zones, &nrz, GFP_NOIO);
+		if (ret)
+			goto out;
+		if (!nrz)
+			break;
+		for (i = 0; i < nrz; i++) {
+			if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+				set_bit(z, seq_zones_bitmap);
+			z++;
+		}
+		sector += nrz * blk_queue_zone_sectors(q);
+	}
+
+	if (WARN_ON(z != nr_zones)) {
+		ret = -EIO;
+		goto out;
+	}
+
+update:
+	/*
+	 * Install the new bitmaps, making sure the queue is stopped and
+	 * all I/Os are completed (i.e. a scheduler is not referencing the
+	 * bitmaps).
+	 */
+	blk_mq_freeze_queue(q);
+	q->nr_zones = nr_zones;
+	swap(q->seq_zones_wlock, seq_zones_wlock);
+	swap(q->seq_zones_bitmap, seq_zones_bitmap);
+	blk_mq_unfreeze_queue(q);
+
+out:
+	free_pages((unsigned long)zones,
+		   get_order(rep_nr_zones * sizeof(struct blk_zone)));
+	kfree(seq_zones_wlock);
+	kfree(seq_zones_bitmap);
+
+	if (ret) {
+		pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
+		blk_mq_freeze_queue(q);
+		blk_queue_free_zone_bitmaps(q);
+		blk_mq_unfreeze_queue(q);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
+
diff --git a/block/blk.h b/block/blk.h
index 93574baaa6b8..a1841b8ff129 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -490,4 +490,10 @@ static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
 
 struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
 
+#ifdef CONFIG_BLK_DEV_ZONED
+void blk_queue_free_zone_bitmaps(struct request_queue *q);
+#else
+static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
+#endif
+
 #endif /* BLK_INTERNAL_H */
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 5ba426dbf377..09339203dfba 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -1528,6 +1528,13 @@ static int null_gendisk_register(struct nullb *nullb)
 	disk->queue		= nullb->q;
 	strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
 
+	if (nullb->dev->zoned) {
+		int ret = blk_revalidate_disk_zones(disk);
+
+		if (ret != 0)
+			return ret;
+	}
+
 	add_disk(disk);
 	return 0;
 }
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 3d0e2c198f06..fb4bea20657b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1937,6 +1937,16 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	 */
 	if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
 		blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+
+	/*
+	 * For a zoned target, the number of zones should be updated for the
+	 * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
+	 * target, this is all that is needed. For a request based target, the
+	 * queue zone bitmaps must also be updated.
+	 * Use blk_revalidate_disk_zones() to handle this.
+	 */
+	if (blk_queue_is_zoned(q))
+		blk_revalidate_disk_zones(t->md->disk);
 }
 
 unsigned int dm_table_get_num_targets(struct dm_table *t)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 42c0f299021d..3bb2b3351e35 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3414,8 +3414,6 @@ static int sd_remove(struct device *dev)
 	del_gendisk(sdkp->disk);
 	sd_shutdown(dev);
 
-	sd_zbc_remove(sdkp);
-
 	free_opal_dev(sdkp->opal_dev);
 
 	blk_register_region(devt, SD_MINORS, NULL,
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index f72f20fd0d8b..1d63f3a23ffb 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -76,7 +76,6 @@ struct scsi_disk {
 #ifdef CONFIG_BLK_DEV_ZONED
 	u32		nr_zones;
 	u32		zone_blocks;
-	u32		zone_shift;
 	u32		zones_optimal_open;
 	u32		zones_optimal_nonseq;
 	u32		zones_max_open;
@@ -271,7 +270,6 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp)
 #ifdef CONFIG_BLK_DEV_ZONED
 
 extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
-extern void sd_zbc_remove(struct scsi_disk *sdkp);
 extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
 extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
 extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
@@ -288,8 +286,6 @@ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp,
 	return 0;
 }
 
-static inline void sd_zbc_remove(struct scsi_disk *sdkp) {}
-
 static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
 
 static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 0f2cfc81fce3..e06c48c866e4 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -425,191 +425,10 @@ out_free:
 	return ret;
 }
 
-/**
- * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
- * @nr_zones: Number of zones to allocate space for.
- * @numa_node: NUMA node to allocate the memory from.
- */
-static inline unsigned long *
-sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
-{
-	return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
-			    GFP_KERNEL, numa_node);
-}
-
-/**
- * sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones
- * @sdkp: disk used
- * @buf: report reply buffer
- * @buflen: length of @buf
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
- * @seq_zones_bitmap: bitmap of sequential zones to set
- *
- * Parse reported zone descriptors in @buf to identify sequential zones and
- * set the reported zone bit in @seq_zones_bitmap accordingly.
- * Since read-only and offline zones cannot be written, do not
- * mark them as sequential in the bitmap.
- * Return the LBA after the last zone reported.
- */
-static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
-				     unsigned int buflen, u32 zone_shift,
-				     unsigned long *seq_zones_bitmap)
-{
-	sector_t lba, next_lba = sdkp->capacity;
-	unsigned int buf_len, list_length;
-	unsigned char *rec;
-	u8 type, cond;
-
-	list_length = get_unaligned_be32(&buf[0]) + 64;
-	buf_len = min(list_length, buflen);
-	rec = buf + 64;
-
-	while (rec < buf + buf_len) {
-		type = rec[0] & 0x0f;
-		cond = (rec[1] >> 4) & 0xf;
-		lba = get_unaligned_be64(&rec[16]);
-		if (type != ZBC_ZONE_TYPE_CONV &&
-		    cond != ZBC_ZONE_COND_READONLY &&
-		    cond != ZBC_ZONE_COND_OFFLINE)
-			set_bit(lba >> zone_shift, seq_zones_bitmap);
-		next_lba = lba + get_unaligned_be64(&rec[8]);
-		rec += 64;
-	}
-
-	return next_lba;
-}
-
-/**
- * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
- * @sdkp: target disk
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
- * @nr_zones: number of zones to set up a seq zone bitmap for
- *
- * Allocate a zone bitmap and initialize it by identifying sequential zones.
- */
-static unsigned long *
-sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
-			      u32 nr_zones)
-{
-	struct request_queue *q = sdkp->disk->queue;
-	unsigned long *seq_zones_bitmap;
-	sector_t lba = 0;
-	unsigned char *buf;
-	int ret = -ENOMEM;
-
-	seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
-	if (!seq_zones_bitmap)
-		return ERR_PTR(-ENOMEM);
-
-	buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
-	if (!buf)
-		goto out;
-
-	while (lba < sdkp->capacity) {
-		ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, lba,
-					     true);
-		if (ret)
-			goto out;
-		lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
-					   zone_shift, seq_zones_bitmap);
-	}
-
-	if (lba != sdkp->capacity) {
-		/* Something went wrong */
-		ret = -EIO;
-	}
-
-out:
-	kfree(buf);
-	if (ret) {
-		kfree(seq_zones_bitmap);
-		return ERR_PTR(ret);
-	}
-	return seq_zones_bitmap;
-}
-
-static void sd_zbc_cleanup(struct scsi_disk *sdkp)
-{
-	struct request_queue *q = sdkp->disk->queue;
-
-	kfree(q->seq_zones_bitmap);
-	q->seq_zones_bitmap = NULL;
-
-	kfree(q->seq_zones_wlock);
-	q->seq_zones_wlock = NULL;
-
-	q->nr_zones = 0;
-}
-
-static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
-{
-	struct request_queue *q = sdkp->disk->queue;
-	u32 zone_shift = ilog2(zone_blocks);
-	u32 nr_zones;
-	int ret;
-
-	/* chunk_sectors indicates the zone size */
-	blk_queue_chunk_sectors(q,
-			logical_to_sectors(sdkp->device, zone_blocks));
-	nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
-
-	/*
-	 * Initialize the device request queue information if the number
-	 * of zones changed.
-	 */
-	if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
-		unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
-		size_t zone_bitmap_size;
-
-		if (nr_zones) {
-			seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
-								   q->node);
-			if (!seq_zones_wlock) {
-				ret = -ENOMEM;
-				goto err;
-			}
-
-			seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
-							zone_shift, nr_zones);
-			if (IS_ERR(seq_zones_bitmap)) {
-				ret = PTR_ERR(seq_zones_bitmap);
-				kfree(seq_zones_wlock);
-				goto err;
-			}
-		}
-		zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
-			sizeof(unsigned long);
-		blk_mq_freeze_queue(q);
-		if (q->nr_zones != nr_zones) {
-			/* READ16/WRITE16 is mandatory for ZBC disks */
-			sdkp->device->use_16_for_rw = 1;
-			sdkp->device->use_10_for_rw = 0;
-
-			sdkp->zone_blocks = zone_blocks;
-			sdkp->zone_shift = zone_shift;
-			sdkp->nr_zones = nr_zones;
-			q->nr_zones = nr_zones;
-			swap(q->seq_zones_wlock, seq_zones_wlock);
-			swap(q->seq_zones_bitmap, seq_zones_bitmap);
-		} else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
-				  zone_bitmap_size) != 0) {
-			memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
-			       zone_bitmap_size);
-		}
-		blk_mq_unfreeze_queue(q);
-		kfree(seq_zones_wlock);
-		kfree(seq_zones_bitmap);
-	}
-
-	return 0;
-
-err:
-	sd_zbc_cleanup(sdkp);
-	return ret;
-}
-
 int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 {
+	struct gendisk *disk = sdkp->disk;
+	unsigned int nr_zones;
 	u32 zone_blocks;
 	int ret;
 
@@ -634,24 +453,39 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
 		goto err;
 
 	/* The drive satisfies the kernel restrictions: set it up */
-	ret = sd_zbc_setup(sdkp, zone_blocks);
-	if (ret)
-		goto err;
+	blk_queue_chunk_sectors(sdkp->disk->queue,
+			logical_to_sectors(sdkp->device, zone_blocks));
+	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
+
+	/* READ16/WRITE16 is mandatory for ZBC disks */
+	sdkp->device->use_16_for_rw = 1;
+	sdkp->device->use_10_for_rw = 0;
+
+	/*
+	 * If something changed, revalidate the disk zone bitmaps once we have
+	 * the capacity, that is on the second revalidate execution during disk
+	 * scan and always during normal revalidate.
+	 */
+	if (sdkp->first_scan)
+		return 0;
+	if (sdkp->zone_blocks != zone_blocks ||
+	    sdkp->nr_zones != nr_zones ||
+	    disk->queue->nr_zones != nr_zones) {
+		ret = blk_revalidate_disk_zones(disk);
+		if (ret != 0)
+			goto err;
+		sdkp->zone_blocks = zone_blocks;
+		sdkp->nr_zones = nr_zones;
+	}
 
 	return 0;
 
 err:
 	sdkp->capacity = 0;
-	sd_zbc_cleanup(sdkp);
 
 	return ret;
 }
 
-void sd_zbc_remove(struct scsi_disk *sdkp)
-{
-	sd_zbc_cleanup(sdkp);
-}
-
 void sd_zbc_print_zones(struct scsi_disk *sdkp)
 {
 	if (!sd_is_zoned(sdkp) || !sdkp->capacity)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 51fe6472ce02..4293dc1cd160 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -402,6 +402,7 @@ extern int blkdev_report_zones(struct block_device *bdev,
 			       unsigned int *nr_zones, gfp_t gfp_mask);
 extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
 			      sector_t nr_sectors, gfp_t gfp_mask);
+extern int blk_revalidate_disk_zones(struct gendisk *disk);
 
 extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 				     unsigned int cmd, unsigned long arg);
@@ -414,6 +415,12 @@ static inline unsigned int blkdev_nr_zones(struct block_device *bdev)
 {
 	return 0;
 }
+
+static inline int blk_revalidate_disk_zones(struct gendisk *disk)
+{
+	return 0;
+}
+
 static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
 					    fmode_t mode, unsigned int cmd,
 					    unsigned long arg)
-- 
cgit v1.2.3


From ede95a63b5e84ddeea6b0c473b36ab8bfd8c6ce3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 23 Oct 2018 01:11:04 +0200
Subject: bpf: add bpf_jit_limit knob to restrict unpriv allocations

Rick reported that the BPF JIT could potentially fill the entire module
space with BPF programs from unprivileged users which would prevent later
attempts to load normal kernel modules or privileged BPF programs, for
example. If JIT was enabled but unsuccessful to generate the image, then
before commit 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config")
we would always fall back to the BPF interpreter. Nowadays in the case
where the CONFIG_BPF_JIT_ALWAYS_ON could be set, then the load will abort
with a failure since the BPF interpreter was compiled out.

Add a global limit and enforce it for unprivileged users such that in case
of BPF interpreter compiled out we fail once the limit has been reached
or we fall back to BPF interpreter earlier w/o using module mem if latter
was compiled in. In a next step, fair share among unprivileged users can
be resolved in particular for the case where we would fail hard once limit
is reached.

Fixes: 290af86629b2 ("bpf: introduce BPF_JIT_ALWAYS_ON config")
Fixes: 0a14842f5a3c ("net: filter: Just In Time compiler for x86-64")
Co-Developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/sysctl/net.txt |  8 ++++++++
 include/linux/filter.h       |  1 +
 kernel/bpf/core.c            | 49 +++++++++++++++++++++++++++++++++++++++++---
 net/core/sysctl_net_core.c   | 10 +++++++--
 4 files changed, 63 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 9ecde517728c..2793d4eac55f 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -92,6 +92,14 @@ Values :
 	0 - disable JIT kallsyms export (default value)
 	1 - enable JIT kallsyms export for privileged users only
 
+bpf_jit_limit
+-------------
+
+This enforces a global limit for memory allocations to the BPF JIT
+compiler in order to reject unprivileged JIT requests once it has
+been surpassed. bpf_jit_limit contains the value of the global limit
+in bytes.
+
 dev_weight
 --------------
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 91b4c934f02e..de629b706d1d 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -854,6 +854,7 @@ bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
 extern int bpf_jit_enable;
 extern int bpf_jit_harden;
 extern int bpf_jit_kallsyms;
+extern int bpf_jit_limit;
 
 typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7c7eeea8cffc..6377225b2082 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -365,10 +365,13 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
 }
 
 #ifdef CONFIG_BPF_JIT
+# define BPF_JIT_LIMIT_DEFAULT	(PAGE_SIZE * 40000)
+
 /* All BPF JIT sysctl knobs here. */
 int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
 int bpf_jit_harden   __read_mostly;
 int bpf_jit_kallsyms __read_mostly;
+int bpf_jit_limit    __read_mostly = BPF_JIT_LIMIT_DEFAULT;
 
 static __always_inline void
 bpf_get_prog_addr_region(const struct bpf_prog *prog,
@@ -577,27 +580,64 @@ int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 	return ret;
 }
 
+static atomic_long_t bpf_jit_current;
+
+#if defined(MODULES_VADDR)
+static int __init bpf_jit_charge_init(void)
+{
+	/* Only used as heuristic here to derive limit. */
+	bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
+					    PAGE_SIZE), INT_MAX);
+	return 0;
+}
+pure_initcall(bpf_jit_charge_init);
+#endif
+
+static int bpf_jit_charge_modmem(u32 pages)
+{
+	if (atomic_long_add_return(pages, &bpf_jit_current) >
+	    (bpf_jit_limit >> PAGE_SHIFT)) {
+		if (!capable(CAP_SYS_ADMIN)) {
+			atomic_long_sub(pages, &bpf_jit_current);
+			return -EPERM;
+		}
+	}
+
+	return 0;
+}
+
+static void bpf_jit_uncharge_modmem(u32 pages)
+{
+	atomic_long_sub(pages, &bpf_jit_current);
+}
+
 struct bpf_binary_header *
 bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 		     unsigned int alignment,
 		     bpf_jit_fill_hole_t bpf_fill_ill_insns)
 {
 	struct bpf_binary_header *hdr;
-	unsigned int size, hole, start;
+	u32 size, hole, start, pages;
 
 	/* Most of BPF filters are really small, but if some of them
 	 * fill a page, allow at least 128 extra bytes to insert a
 	 * random section of illegal instructions.
 	 */
 	size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
+	pages = size / PAGE_SIZE;
+
+	if (bpf_jit_charge_modmem(pages))
+		return NULL;
 	hdr = module_alloc(size);
-	if (hdr == NULL)
+	if (!hdr) {
+		bpf_jit_uncharge_modmem(pages);
 		return NULL;
+	}
 
 	/* Fill space with illegal/arch-dep instructions. */
 	bpf_fill_ill_insns(hdr, size);
 
-	hdr->pages = size / PAGE_SIZE;
+	hdr->pages = pages;
 	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
 		     PAGE_SIZE - sizeof(*hdr));
 	start = (get_random_int() % hole) & ~(alignment - 1);
@@ -610,7 +650,10 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 
 void bpf_jit_binary_free(struct bpf_binary_header *hdr)
 {
+	u32 pages = hdr->pages;
+
 	module_memfree(hdr);
+	bpf_jit_uncharge_modmem(pages);
 }
 
 /* This symbol is only overridden by archs that have different
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b1a2c5e38530..37b4667128a3 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -279,7 +279,6 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
 	return ret;
 }
 
-# ifdef CONFIG_HAVE_EBPF_JIT
 static int
 proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
 				    void __user *buffer, size_t *lenp,
@@ -290,7 +289,6 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
 
 	return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 }
-# endif
 #endif
 
 static struct ctl_table net_core_table[] = {
@@ -397,6 +395,14 @@ static struct ctl_table net_core_table[] = {
 		.extra2		= &one,
 	},
 # endif
+	{
+		.procname	= "bpf_jit_limit",
+		.data		= &bpf_jit_limit,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax_bpf_restricted,
+		.extra1		= &one,
+	},
 #endif
 	{
 		.procname	= "netdev_tstamp_prequeue",
-- 
cgit v1.2.3


From 70025f84e5b79627a6739533c4fe7cef5b605886 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 9 Oct 2018 17:46:51 +0100
Subject: KEYS: Provide key type operations for asymmetric key ops [ver #2]

Provide five new operations in the key_type struct that can be used to
provide access to asymmetric key operations.  These will be implemented for
the asymmetric key type in a later patch and may refer to a key retained in
RAM by the kernel or a key retained in crypto hardware.

     int (*asym_query)(const struct kernel_pkey_params *params,
		       struct kernel_pkey_query *info);
     int (*asym_eds_op)(struct kernel_pkey_params *params,
			const void *in, void *out);
     int (*asym_verify_signature)(struct kernel_pkey_params *params,
			          const void *in, const void *in2);

Since encrypt, decrypt and sign are identical in their interfaces, they're
rolled together in the asym_eds_op() operation and there's an operation ID
in the params argument to distinguish them.

Verify is different in that we supply the data and the signature instead
and get an error value (or 0) as the only result on the expectation that
this may well be how a hardware crypto device may work.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Denis Kenzior <denkenz@gmail.com>
Tested-by: Denis Kenzior <denkenz@gmail.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 Documentation/security/keys/core.rst | 106 +++++++++++++++++++++++++++++++++++
 include/linux/key-type.h             |  11 ++++
 include/linux/keyctl.h               |  46 +++++++++++++++
 include/uapi/linux/keyctl.h          |   5 ++
 4 files changed, 168 insertions(+)
 create mode 100644 include/linux/keyctl.h

(limited to 'include/linux')

diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst
index 9ce7256c6edb..c144978479d5 100644
--- a/Documentation/security/keys/core.rst
+++ b/Documentation/security/keys/core.rst
@@ -1483,6 +1483,112 @@ The structure has a number of fields, some of which are mandatory:
      attempted key link operation. If there is no match, -EINVAL is returned.
 
 
+  *  ``int (*asym_eds_op)(struct kernel_pkey_params *params,
+			  const void *in, void *out);``
+     ``int (*asym_verify_signature)(struct kernel_pkey_params *params,
+				    const void *in, const void *in2);``
+
+     These methods are optional.  If provided the first allows a key to be
+     used to encrypt, decrypt or sign a blob of data, and the second allows a
+     key to verify a signature.
+
+     In all cases, the following information is provided in the params block::
+
+	struct kernel_pkey_params {
+		struct key	*key;
+		const char	*encoding;
+		const char	*hash_algo;
+		char		*info;
+		__u32		in_len;
+		union {
+			__u32	out_len;
+			__u32	in2_len;
+		};
+		enum kernel_pkey_operation op : 8;
+	};
+
+     This includes the key to be used; a string indicating the encoding to use
+     (for instance, "pkcs1" may be used with an RSA key to indicate
+     RSASSA-PKCS1-v1.5 or RSAES-PKCS1-v1.5 encoding or "raw" if no encoding);
+     the name of the hash algorithm used to generate the data for a signature
+     (if appropriate); the sizes of the input and output (or second input)
+     buffers; and the ID of the operation to be performed.
+
+     For a given operation ID, the input and output buffers are used as
+     follows::
+
+	Operation ID		in,in_len	out,out_len	in2,in2_len
+	=======================	===============	===============	===============
+	kernel_pkey_encrypt	Raw data	Encrypted data	-
+	kernel_pkey_decrypt	Encrypted data	Raw data	-
+	kernel_pkey_sign	Raw data	Signature	-
+	kernel_pkey_verify	Raw data	-		Signature
+
+     asym_eds_op() deals with encryption, decryption and signature creation as
+     specified by params->op.  Note that params->op is also set for
+     asym_verify_signature().
+
+     Encrypting and signature creation both take raw data in the input buffer
+     and return the encrypted result in the output buffer.  Padding may have
+     been added if an encoding was set.  In the case of signature creation,
+     depending on the encoding, the padding created may need to indicate the
+     digest algorithm - the name of which should be supplied in hash_algo.
+
+     Decryption takes encrypted data in the input buffer and returns the raw
+     data in the output buffer.  Padding will get checked and stripped off if
+     an encoding was set.
+
+     Verification takes raw data in the input buffer and the signature in the
+     second input buffer and checks that the one matches the other.  Padding
+     will be validated.  Depending on the encoding, the digest algorithm used
+     to generate the raw data may need to be indicated in hash_algo.
+
+     If successful, asym_eds_op() should return the number of bytes written
+     into the output buffer.  asym_verify_signature() should return 0.
+
+     A variety of errors may be returned, including EOPNOTSUPP if the operation
+     is not supported; EKEYREJECTED if verification fails; ENOPKG if the
+     required crypto isn't available.
+
+
+  *  ``int (*asym_query)(const struct kernel_pkey_params *params,
+			 struct kernel_pkey_query *info);``
+
+     This method is optional.  If provided it allows information about the
+     public or asymmetric key held in the key to be determined.
+
+     The parameter block is as for asym_eds_op() and co. but in_len and out_len
+     are unused.  The encoding and hash_algo fields should be used to reduce
+     the returned buffer/data sizes as appropriate.
+
+     If successful, the following information is filled in::
+
+	struct kernel_pkey_query {
+		__u32		supported_ops;
+		__u32		key_size;
+		__u16		max_data_size;
+		__u16		max_sig_size;
+		__u16		max_enc_size;
+		__u16		max_dec_size;
+	};
+
+     The supported_ops field will contain a bitmask indicating what operations
+     are supported by the key, including encryption of a blob, decryption of a
+     blob, signing a blob and verifying the signature on a blob.  The following
+     constants are defined for this::
+
+	KEYCTL_SUPPORTS_{ENCRYPT,DECRYPT,SIGN,VERIFY}
+
+     The key_size field is the size of the key in bits.  max_data_size and
+     max_sig_size are the maximum raw data and signature sizes for creation and
+     verification of a signature; max_enc_size and max_dec_size are the maximum
+     raw data and signature sizes for encryption and decryption.  The
+     max_*_size fields are measured in bytes.
+
+     If successful, 0 will be returned.  If the key doesn't support this,
+     EOPNOTSUPP will be returned.
+
+
 Request-Key Callback Service
 ============================
 
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 05d8fb5a06c4..bc9af551fc83 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -17,6 +17,9 @@
 
 #ifdef CONFIG_KEYS
 
+struct kernel_pkey_query;
+struct kernel_pkey_params;
+
 /*
  * key under-construction record
  * - passed to the request_key actor if supplied
@@ -155,6 +158,14 @@ struct key_type {
 	 */
 	struct key_restriction *(*lookup_restriction)(const char *params);
 
+	/* Asymmetric key accessor functions. */
+	int (*asym_query)(const struct kernel_pkey_params *params,
+			  struct kernel_pkey_query *info);
+	int (*asym_eds_op)(struct kernel_pkey_params *params,
+			   const void *in, void *out);
+	int (*asym_verify_signature)(struct kernel_pkey_params *params,
+				     const void *in, const void *in2);
+
 	/* internal fields */
 	struct list_head	link;		/* link in types list */
 	struct lock_class_key	lock_class;	/* key->sem lock class */
diff --git a/include/linux/keyctl.h b/include/linux/keyctl.h
new file mode 100644
index 000000000000..c7c48c79ce0e
--- /dev/null
+++ b/include/linux/keyctl.h
@@ -0,0 +1,46 @@
+/* keyctl kernel bits
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef __LINUX_KEYCTL_H
+#define __LINUX_KEYCTL_H
+
+#include <uapi/linux/keyctl.h>
+
+struct kernel_pkey_query {
+	__u32		supported_ops;	/* Which ops are supported */
+	__u32		key_size;	/* Size of the key in bits */
+	__u16		max_data_size;	/* Maximum size of raw data to sign in bytes */
+	__u16		max_sig_size;	/* Maximum size of signature in bytes */
+	__u16		max_enc_size;	/* Maximum size of encrypted blob in bytes */
+	__u16		max_dec_size;	/* Maximum size of decrypted blob in bytes */
+};
+
+enum kernel_pkey_operation {
+	kernel_pkey_encrypt,
+	kernel_pkey_decrypt,
+	kernel_pkey_sign,
+	kernel_pkey_verify,
+};
+
+struct kernel_pkey_params {
+	struct key	*key;
+	const char	*encoding;	/* Encoding (eg. "oaep" or "raw" for none) */
+	const char	*hash_algo;	/* Digest algorithm used (eg. "sha1") or NULL if N/A */
+	char		*info;		/* Modified info string to be released later */
+	__u32		in_len;		/* Input data size */
+	union {
+		__u32	out_len;	/* Output buffer size (enc/dec/sign) */
+		__u32	in2_len;	/* 2nd input data size (verify) */
+	};
+	enum kernel_pkey_operation op : 8;
+};
+
+#endif /* __LINUX_KEYCTL_H */
diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 0f3cb13db8e9..1d1e9f2877af 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -82,4 +82,9 @@ struct keyctl_kdf_params {
 	__u32 __spare[8];
 };
 
+#define KEYCTL_SUPPORTS_ENCRYPT		0x01
+#define KEYCTL_SUPPORTS_DECRYPT		0x02
+#define KEYCTL_SUPPORTS_SIGN		0x04
+#define KEYCTL_SUPPORTS_VERIFY		0x08
+
 #endif /*  _LINUX_KEYCTL_H */
-- 
cgit v1.2.3


From 4d54954a197175c0dcb3c82af0c0740d0c5f827a Mon Sep 17 00:00:00 2001
From: Sebastien Boisvert <sebhtml@videotron.qc.ca>
Date: Fri, 26 Oct 2018 15:02:23 -0700
Subject: include/linux/pfn_t.h: force '~' to be parsed as an unary operator

Tracing the event "fs_dax:dax_pmd_insert_mapping" with perf produces this
warning:

      [fs_dax:dax_pmd_insert_mapping] unknown op '~'

It is printed in process_op (tools/lib/traceevent/event-parse.c) because
'~' is parsed as a binary operator.

perf reads the format of fs_dax:dax_pmd_insert_mapping ("print fmt") from
/sys/kernel/debug/tracing/events/fs_dax/dax_pmd_insert_mapping/format .

The format contains:

~(((u64) ~(~(((1UL) << 12)-1)))
         ^
         \ interpreted as a binary operator by process_op().

This part is generated in the declaration of the event class
dax_pmd_insert_mapping_class in include/trace/events/fs_dax.h :

		__print_flags_u64(__entry->pfn_val & PFN_FLAGS_MASK, "|",
			PFN_FLAGS_TRACE),

This patch adds a pair of parentheses in the declaration of PFN_FLAGS_MASK
to make sure that '~' is parsed as a unary operator by perf.

The part of the format that was problematic is now:

~(((u64) (~(~(((1UL) << 12)-1))))

Now, all the '~' are parsed as unary operators.

Link: http://lkml.kernel.org/r/20181021145939.8760-1-sebhtml@videotron.qc.ca
Signed-off-by: Sebastien Boisvert <sebhtml@videotron.qc.ca>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Cc: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "Tzvetomir Stoyanov (VMware)" <tz.stoyanov@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ross Zwisler <zwisler@kernel.org>
Cc: Elenie Godzaridis <arangradient@gmail.com>
Cc: <stable@vger.kerenl.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pfn_t.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 21713dc14ce2..673546ba7342 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -10,7 +10,7 @@
  * PFN_DEV - pfn is not covered by system memmap by default
  * PFN_MAP - pfn has a dynamic page mapping established by a device driver
  */
-#define PFN_FLAGS_MASK (((u64) ~PAGE_MASK) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
+#define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
 #define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
 #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
 #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
-- 
cgit v1.2.3


From 74f213ea25b99fddcf34cbe07dabdb01136bcd86 Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <aryabinin@virtuozzo.com>
Date: Fri, 26 Oct 2018 15:02:27 -0700
Subject: include/linux/linkage.h: align weak symbols

Since WEAK() supposed to be used instead of ENTRY() to define weak
symbols, but unlike ENTRY() it doesn't have ALIGN directive.  It seems
there is no actual reason to not have, so let's add ALIGN to WEAK() too.

Link: http://lkml.kernel.org/r/20180920135631.23833-1-aryabinin@virtuozzo.com
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Will Deacon <will.deacon@arm.com>, Catalin Marinas <catalin.marinas@arm.com>
Cc: Kyeongdon Kim <kyeongdon.kim@lge.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/linkage.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index d7618c41f74c..7c47b1a471d4 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -90,6 +90,7 @@
 #ifndef WEAK
 #define WEAK(name)	   \
 	.weak name ASM_NL   \
+	ALIGN ASM_NL \
 	name:
 #endif
 
-- 
cgit v1.2.3


From 5780a02fd1e87641ad6a8dd6891a1e890cf45c5d Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Fri, 26 Oct 2018 15:02:59 -0700
Subject: fs/iomap.c: change return type to vm_fault_t

Change iomap_page_mkwrite() return type to vm_fault_t.

see commit 1c8f422059ae ("mm: change return type to vm_fault_t") for
reference.

Link: http://lkml.kernel.org/r/20180827172050.GA18673@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/iomap.c            | 2 +-
 include/linux/iomap.h | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/iomap.c b/fs/iomap.c
index ec15cf2ec696..90c2febc93ac 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1057,7 +1057,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
 	return length;
 }
 
-int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
+vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops)
 {
 	struct page *page = vmf->page;
 	struct inode *inode = file_inode(vmf->vma->vm_file);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 3555d54bf79a..9a4258154b25 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -6,6 +6,7 @@
 #include <linux/bitmap.h>
 #include <linux/mm.h>
 #include <linux/types.h>
+#include <linux/mm_types.h>
 
 struct address_space;
 struct fiemap_extent_info;
@@ -141,7 +142,8 @@ int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
 		bool *did_zero, const struct iomap_ops *ops);
 int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
 		const struct iomap_ops *ops);
-int iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops);
+vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf,
+			const struct iomap_ops *ops);
 int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		loff_t start, loff_t len, const struct iomap_ops *ops);
 loff_t iomap_seek_hole(struct inode *inode, loff_t offset,
-- 
cgit v1.2.3


From 9b6f7e163cd0f468d1b9696b785659d3c27c8667 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 26 Oct 2018 15:03:19 -0700
Subject: mm: rework memcg kernel stack accounting

If CONFIG_VMAP_STACK is set, kernel stacks are allocated using
__vmalloc_node_range() with __GFP_ACCOUNT.  So kernel stack pages are
charged against corresponding memory cgroups on allocation and uncharged
on releasing them.

The problem is that we do cache kernel stacks in small per-cpu caches and
do reuse them for new tasks, which can belong to different memory cgroups.

Each stack page still holds a reference to the original cgroup, so the
cgroup can't be released until the vmap area is released.

To make this happen we need more than two subsequent exits without forks
in between on the current cpu, which makes it very unlikely to happen.  As
a result, I saw a significant number of dying cgroups (in theory, up to 2
* number_of_cpu + number_of_tasks), which can't be released even by
significant memory pressure.

As a cgroup structure can take a significant amount of memory (first of
all, per-cpu data like memcg statistics), it leads to a noticeable waste
of memory.

Link: http://lkml.kernel.org/r/20180827162621.30187-1-guro@fb.com
Fixes: ac496bf48d97 ("fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y")
Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 13 ++++++++++-
 kernel/fork.c              | 55 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 61 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 652f602167df..4399cc3f00e4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1268,10 +1268,11 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
 void memcg_kmem_put_cache(struct kmem_cache *cachep);
 int memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
 			    struct mem_cgroup *memcg);
+
+#ifdef CONFIG_MEMCG_KMEM
 int memcg_kmem_charge(struct page *page, gfp_t gfp, int order);
 void memcg_kmem_uncharge(struct page *page, int order);
 
-#ifdef CONFIG_MEMCG_KMEM
 extern struct static_key_false memcg_kmem_enabled_key;
 extern struct workqueue_struct *memcg_kmem_cache_wq;
 
@@ -1307,6 +1308,16 @@ extern int memcg_expand_shrinker_maps(int new_id);
 extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
 				   int nid, int shrinker_id);
 #else
+
+static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
+{
+	return 0;
+}
+
+static inline void memcg_kmem_uncharge(struct page *page, int order)
+{
+}
+
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
 
diff --git a/kernel/fork.c b/kernel/fork.c
index f0b58479534f..3c719fec46c5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -223,9 +223,14 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 		return s->addr;
 	}
 
+	/*
+	 * Allocated stacks are cached and later reused by new threads,
+	 * so memcg accounting is performed manually on assigning/releasing
+	 * stacks to tasks. Drop __GFP_ACCOUNT.
+	 */
 	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
 				     VMALLOC_START, VMALLOC_END,
-				     THREADINFO_GFP,
+				     THREADINFO_GFP & ~__GFP_ACCOUNT,
 				     PAGE_KERNEL,
 				     0, node, __builtin_return_address(0));
 
@@ -248,9 +253,19 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 static inline void free_thread_stack(struct task_struct *tsk)
 {
 #ifdef CONFIG_VMAP_STACK
-	if (task_stack_vm_area(tsk)) {
+	struct vm_struct *vm = task_stack_vm_area(tsk);
+
+	if (vm) {
 		int i;
 
+		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+			mod_memcg_page_state(vm->pages[i],
+					     MEMCG_KERNEL_STACK_KB,
+					     -(int)(PAGE_SIZE / 1024));
+
+			memcg_kmem_uncharge(vm->pages[i], 0);
+		}
+
 		for (i = 0; i < NR_CACHED_STACKS; i++) {
 			if (this_cpu_cmpxchg(cached_stacks[i],
 					NULL, tsk->stack_vm_area) != NULL)
@@ -351,10 +366,6 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 					    NR_KERNEL_STACK_KB,
 					    PAGE_SIZE / 1024 * account);
 		}
-
-		/* All stack pages belong to the same memcg. */
-		mod_memcg_page_state(vm->pages[0], MEMCG_KERNEL_STACK_KB,
-				     account * (THREAD_SIZE / 1024));
 	} else {
 		/*
 		 * All stack pages are in the same zone and belong to the
@@ -370,6 +381,35 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 	}
 }
 
+static int memcg_charge_kernel_stack(struct task_struct *tsk)
+{
+#ifdef CONFIG_VMAP_STACK
+	struct vm_struct *vm = task_stack_vm_area(tsk);
+	int ret;
+
+	if (vm) {
+		int i;
+
+		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
+			/*
+			 * If memcg_kmem_charge() fails, page->mem_cgroup
+			 * pointer is NULL, and both memcg_kmem_uncharge()
+			 * and mod_memcg_page_state() in free_thread_stack()
+			 * will ignore this page. So it's safe.
+			 */
+			ret = memcg_kmem_charge(vm->pages[i], GFP_KERNEL, 0);
+			if (ret)
+				return ret;
+
+			mod_memcg_page_state(vm->pages[i],
+					     MEMCG_KERNEL_STACK_KB,
+					     PAGE_SIZE / 1024);
+		}
+	}
+#endif
+	return 0;
+}
+
 static void release_task_stack(struct task_struct *tsk)
 {
 	if (WARN_ON(tsk->state != TASK_DEAD))
@@ -807,6 +847,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (!stack)
 		goto free_tsk;
 
+	if (memcg_charge_kernel_stack(tsk))
+		goto free_stack;
+
 	stack_vm_area = task_stack_vm_area(tsk);
 
 	err = arch_dup_task_struct(tsk, orig);
-- 
cgit v1.2.3


From 68600f623d69da428c6163275f97ca126e1a8ec5 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 26 Oct 2018 15:03:27 -0700
Subject: mm: don't miss the last page because of round-off error

I've noticed, that dying memory cgroups are often pinned in memory by a
single pagecache page.  Even under moderate memory pressure they sometimes
stayed in such state for a long time.  That looked strange.

My investigation showed that the problem is caused by applying the LRU
pressure balancing math:

  scan = div64_u64(scan * fraction[lru], denominator),

where

  denominator = fraction[anon] + fraction[file] + 1.

Because fraction[lru] is always less than denominator, if the initial scan
size is 1, the result is always 0.

This means the last page is not scanned and has
no chances to be reclaimed.

Fix this by rounding up the result of the division.

In practice this change significantly improves the speed of dying cgroups
reclaim.

[guro@fb.com: prevent double calculation of DIV64_U64_ROUND_UP() arguments]
  Link: http://lkml.kernel.org/r/20180829213311.GA13501@castle
Link: http://lkml.kernel.org/r/20180827162621.30187-3-guro@fb.com
Signed-off-by: Roman Gushchin <guro@fb.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/math64.h | 3 +++
 mm/vmscan.c            | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/math64.h b/include/linux/math64.h
index 837f2f2d1d34..bb2c84afb80c 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -281,4 +281,7 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
 }
 #endif /* mul_u64_u32_div */
 
+#define DIV64_U64_ROUND_UP(ll, d)	\
+	({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); })
+
 #endif /* _LINUX_MATH64_H */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index c5ef7240cbcb..961401c46334 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2456,9 +2456,11 @@ out:
 			/*
 			 * Scan types proportional to swappiness and
 			 * their relative recent reclaim efficiency.
+			 * Make sure we don't miss the last page
+			 * because of a round-off error.
 			 */
-			scan = div64_u64(scan * fraction[file],
-					 denominator);
+			scan = DIV64_U64_ROUND_UP(scan * fraction[file],
+						  denominator);
 			break;
 		case SCAN_FILE:
 		case SCAN_ANON:
-- 
cgit v1.2.3


From 33490af3f5c15757448b6c454ca93b48a333aa1b Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 26 Oct 2018 15:03:35 -0700
Subject: mm, mmu_notifier: be explicit about range invalition non-blocking
 mode

If invalidate_range_start() is called for !blocking mode then all
callbacks have to guarantee they will no block/sleep.  The same obviously
applies to invalidate_range_end because this operation pairs with the
former and they are called from the same context.  Make sure this is
appropriately documented.

Link: http://lkml.kernel.org/r/20180827112623.8992-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Jerome Glisse <jglisse@redhat.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmu_notifier.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 133ba78820ee..698e371aafe3 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -153,7 +153,9 @@ struct mmu_notifier_ops {
 	 *
 	 * If blockable argument is set to false then the callback cannot
 	 * sleep and has to return with -EAGAIN. 0 should be returned
-	 * otherwise.
+	 * otherwise. Please note that if invalidate_range_start approves
+	 * a non-blocking behavior then the same applies to
+	 * invalidate_range_end.
 	 *
 	 */
 	int (*invalidate_range_start)(struct mmu_notifier *mn,
-- 
cgit v1.2.3


From 4e15a073a168b62311db911a55c4d4f1500c2821 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 26 Oct 2018 15:03:39 -0700
Subject: Revert "mm, mmu_notifier: annotate mmu notifiers with blockable
 invalidate callbacks"

Revert 5ff7091f5a2ca ("mm, mmu_notifier: annotate mmu notifiers with
blockable invalidate callbacks").

MMU_INVALIDATE_DOES_NOT_BLOCK flags was the only one used and it is no
longer needed since 93065ac753e4 ("mm, oom: distinguish blockable mode for
mmu notifiers").  We now have a full support for per range !blocking
behavior so we can drop the stop gap workaround which the per notifier
flag was used for.

Link: http://lkml.kernel.org/r/20180827112623.8992-4-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/infiniband/hw/hfi1/mmu_rb.c |  1 -
 drivers/iommu/amd_iommu_v2.c        |  1 -
 drivers/iommu/intel-svm.c           |  1 -
 drivers/misc/sgi-gru/grutlbpurge.c  |  1 -
 include/linux/mmu_notifier.h        | 23 -----------------------
 mm/mmu_notifier.c                   | 31 -------------------------------
 virt/kvm/kvm_main.c                 |  1 -
 7 files changed, 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/hfi1/mmu_rb.c b/drivers/infiniband/hw/hfi1/mmu_rb.c
index e1c7996c018e..475b769e120c 100644
--- a/drivers/infiniband/hw/hfi1/mmu_rb.c
+++ b/drivers/infiniband/hw/hfi1/mmu_rb.c
@@ -77,7 +77,6 @@ static void do_remove(struct mmu_rb_handler *handler,
 static void handle_remove(struct work_struct *work);
 
 static const struct mmu_notifier_ops mn_opts = {
-	.flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.invalidate_range_start = mmu_notifier_range_start,
 };
 
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 58da65df03f5..fd552235bd13 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -427,7 +427,6 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 
 static const struct mmu_notifier_ops iommu_mn = {
-	.flags			= MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.release		= mn_release,
 	.clear_flush_young      = mn_clear_flush_young,
 	.invalidate_range       = mn_invalidate_range,
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 4a03e5090952..db301efe126d 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -273,7 +273,6 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
 }
 
 static const struct mmu_notifier_ops intel_mmuops = {
-	.flags = MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.release = intel_mm_release,
 	.change_pte = intel_change_pte,
 	.invalidate_range = intel_invalidate_range,
diff --git a/drivers/misc/sgi-gru/grutlbpurge.c b/drivers/misc/sgi-gru/grutlbpurge.c
index be28f05bfafa..03b49d52092e 100644
--- a/drivers/misc/sgi-gru/grutlbpurge.c
+++ b/drivers/misc/sgi-gru/grutlbpurge.c
@@ -261,7 +261,6 @@ static void gru_release(struct mmu_notifier *mn, struct mm_struct *mm)
 
 
 static const struct mmu_notifier_ops gru_mmuops = {
-	.flags			= MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.invalidate_range_start	= gru_invalidate_range_start,
 	.invalidate_range_end	= gru_invalidate_range_end,
 	.release		= gru_release,
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 698e371aafe3..9893a6432adf 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -2,7 +2,6 @@
 #ifndef _LINUX_MMU_NOTIFIER_H
 #define _LINUX_MMU_NOTIFIER_H
 
-#include <linux/types.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/mm_types.h>
@@ -11,9 +10,6 @@
 struct mmu_notifier;
 struct mmu_notifier_ops;
 
-/* mmu_notifier_ops flags */
-#define MMU_INVALIDATE_DOES_NOT_BLOCK	(0x01)
-
 #ifdef CONFIG_MMU_NOTIFIER
 
 /*
@@ -30,15 +26,6 @@ struct mmu_notifier_mm {
 };
 
 struct mmu_notifier_ops {
-	/*
-	 * Flags to specify behavior of callbacks for this MMU notifier.
-	 * Used to determine which context an operation may be called.
-	 *
-	 * MMU_INVALIDATE_DOES_NOT_BLOCK: invalidate_range_* callbacks do not
-	 *	block
-	 */
-	int flags;
-
 	/*
 	 * Called either by mmu_notifier_unregister or when the mm is
 	 * being destroyed by exit_mmap, always before all pages are
@@ -183,10 +170,6 @@ struct mmu_notifier_ops {
 	 * Note that this function might be called with just a sub-range
 	 * of what was passed to invalidate_range_start()/end(), if
 	 * called between those functions.
-	 *
-	 * If this callback cannot block, and invalidate_range_{start,end}
-	 * cannot block, mmu_notifier_ops.flags should have
-	 * MMU_INVALIDATE_DOES_NOT_BLOCK set.
 	 */
 	void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
 				 unsigned long start, unsigned long end);
@@ -241,7 +224,6 @@ extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  bool only_end);
 extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
 				  unsigned long start, unsigned long end);
-extern bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm);
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
 {
@@ -495,11 +477,6 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 {
 }
 
-static inline bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
-{
-	return false;
-}
-
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
 }
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 82bb1a939c0e..5119ff846769 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -247,37 +247,6 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm,
 }
 EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range);
 
-/*
- * Must be called while holding mm->mmap_sem for either read or write.
- * The result is guaranteed to be valid until mm->mmap_sem is dropped.
- */
-bool mm_has_blockable_invalidate_notifiers(struct mm_struct *mm)
-{
-	struct mmu_notifier *mn;
-	int id;
-	bool ret = false;
-
-	WARN_ON_ONCE(!rwsem_is_locked(&mm->mmap_sem));
-
-	if (!mm_has_notifiers(mm))
-		return ret;
-
-	id = srcu_read_lock(&srcu);
-	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
-		if (!mn->ops->invalidate_range &&
-		    !mn->ops->invalidate_range_start &&
-		    !mn->ops->invalidate_range_end)
-				continue;
-
-		if (!(mn->ops->flags & MMU_INVALIDATE_DOES_NOT_BLOCK)) {
-			ret = true;
-			break;
-		}
-	}
-	srcu_read_unlock(&srcu, id);
-	return ret;
-}
-
 static int do_mmu_notifier_register(struct mmu_notifier *mn,
 				    struct mm_struct *mm,
 				    int take_mmap_sem)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 786ade1843a2..2679e476b6c3 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -497,7 +497,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 }
 
 static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
-	.flags			= MMU_INVALIDATE_DOES_NOT_BLOCK,
 	.invalidate_range_start	= kvm_mmu_notifier_invalidate_range_start,
 	.invalidate_range_end	= kvm_mmu_notifier_invalidate_range_end,
 	.clear_flush_young	= kvm_mmu_notifier_clear_flush_young,
-- 
cgit v1.2.3


From 5d7476374564645b1a2d299e242ad7b17b1104ee Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 15:04:10 -0700
Subject: mm: remove vm_insert_mixed()

All callers are now converted to vmf_insert_mixed() so convert
vmf_insert_mixed() from being a compatibility wrapper into the real
function.

Link: http://lkml.kernel.org/r/20180828145728.11873-3-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 15 +--------------
 mm/memory.c        | 14 ++++++++++----
 2 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index daa2b8f1e9a8..ecc6f9347756 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2506,7 +2506,7 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn, pgprot_t pgprot);
-int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn);
 vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
 		unsigned long addr, pfn_t pfn);
@@ -2525,19 +2525,6 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
 	return VM_FAULT_NOPAGE;
 }
 
-static inline vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma,
-				unsigned long addr, pfn_t pfn)
-{
-	int err = vm_insert_mixed(vma, addr, pfn);
-
-	if (err == -ENOMEM)
-		return VM_FAULT_OOM;
-	if (err < 0 && err != -EBUSY)
-		return VM_FAULT_SIGBUS;
-
-	return VM_FAULT_NOPAGE;
-}
-
 static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
 			unsigned long addr, unsigned long pfn)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 21a5e6e4758b..200aaf291e98 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1693,13 +1693,19 @@ static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 	return insert_pfn(vma, addr, pfn, pgprot, mkwrite);
 }
 
-int vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
-			pfn_t pfn)
+vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
+		pfn_t pfn)
 {
-	return __vm_insert_mixed(vma, addr, pfn, false);
+	int err = __vm_insert_mixed(vma, addr, pfn, false);
 
+	if (err == -ENOMEM)
+		return VM_FAULT_OOM;
+	if (err < 0 && err != -EBUSY)
+		return VM_FAULT_SIGBUS;
+
+	return VM_FAULT_NOPAGE;
 }
-EXPORT_SYMBOL(vm_insert_mixed);
+EXPORT_SYMBOL(vmf_insert_mixed);
 
 /*
  *  If the insertion of PTE failed because someone else already added a
-- 
cgit v1.2.3


From f5e6d1d5f8f3080aa7a51acea1f77085f45abe9c Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 15:04:13 -0700
Subject: mm: introduce vmf_insert_pfn_prot()

Like vm_insert_pfn_prot(), but returns a vm_fault_t instead of an errno.
Also unexport vm_insert_pfn_prot as it has no modular users.

Link: http://lkml.kernel.org/r/20180828145728.11873-4-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/memory.c        | 47 +++++++++++++++++++++++++++++++----------------
 2 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index ecc6f9347756..f1293bdc6de2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2506,6 +2506,8 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn, pgprot_t pgprot);
+vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot);
 vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
 			pfn_t pfn);
 vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,
diff --git a/mm/memory.c b/mm/memory.c
index 200aaf291e98..b3eecb3aa65f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1596,21 +1596,6 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL(vm_insert_pfn);
 
-/**
- * vm_insert_pfn_prot - insert single pfn into user vma with specified pgprot
- * @vma: user vma to map to
- * @addr: target user address of this page
- * @pfn: source kernel pfn
- * @pgprot: pgprot flags for the inserted page
- *
- * This is exactly like vm_insert_pfn, except that it allows drivers to
- * to override pgprot on a per-page basis.
- *
- * This only makes sense for IO mappings, and it makes no sense for
- * cow mappings.  In general, using multiple vmas is preferable;
- * vm_insert_pfn_prot should only be used if using multiple VMAs is
- * impractical.
- */
 int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn, pgprot_t pgprot)
 {
@@ -1640,7 +1625,37 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 
 	return ret;
 }
-EXPORT_SYMBOL(vm_insert_pfn_prot);
+
+/**
+ * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ * @pgprot: pgprot flags for the inserted page
+ *
+ * This is exactly like vmf_insert_pfn(), except that it allows drivers to
+ * to override pgprot on a per-page basis.
+ *
+ * This only makes sense for IO mappings, and it makes no sense for
+ * COW mappings.  In general, using multiple vmas is preferable;
+ * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * impractical.
+ *
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn, pgprot_t pgprot)
+{
+	int err = vm_insert_pfn_prot(vma, addr, pfn, pgprot);
+
+	if (err == -ENOMEM)
+		return VM_FAULT_OOM;
+	if (err < 0 && err != -EBUSY)
+		return VM_FAULT_SIGBUS;
+
+	return VM_FAULT_NOPAGE;
+}
+EXPORT_SYMBOL(vmf_insert_pfn_prot);
 
 static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
 {
-- 
cgit v1.2.3


From bc12e6ad9617831727e4201e7cbf5c3b868cc8fd Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 15:04:21 -0700
Subject: mm: make vm_insert_pfn_prot() static

Now this is no longer used outside mm/memory.c, make it static.

Link: http://lkml.kernel.org/r/20180828145728.11873-6-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 --
 mm/memory.c        | 50 +++++++++++++++++++++++++-------------------------
 2 files changed, 25 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f1293bdc6de2..0f5db0455e61 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2504,8 +2504,6 @@ int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
 int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
-int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
-			unsigned long pfn, pgprot_t pgprot);
 vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn, pgprot_t pgprot);
 vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index b3eecb3aa65f..6365144f8267 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1572,31 +1572,7 @@ out:
 	return retval;
 }
 
-/**
- * vm_insert_pfn - insert single pfn into user vma
- * @vma: user vma to map to
- * @addr: target user address of this page
- * @pfn: source kernel pfn
- *
- * Similar to vm_insert_page, this allows drivers to insert individual pages
- * they've allocated into a user vma. Same comments apply.
- *
- * This function should only be called from a vm_ops->fault handler, and
- * in that case the handler should return NULL.
- *
- * vma cannot be a COW mapping.
- *
- * As this is called only for pages that do not currently exist, we
- * do not need to flush old virtual caches or the TLB.
- */
-int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-			unsigned long pfn)
-{
-	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
-}
-EXPORT_SYMBOL(vm_insert_pfn);
-
-int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
+static int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn, pgprot_t pgprot)
 {
 	int ret;
@@ -1626,6 +1602,30 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 	return ret;
 }
 
+/**
+ * vm_insert_pfn - insert single pfn into user vma
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ *
+ * Similar to vm_insert_page, this allows drivers to insert individual pages
+ * they've allocated into a user vma. Same comments apply.
+ *
+ * This function should only be called from a vm_ops->fault handler, and
+ * in that case the handler should return NULL.
+ *
+ * vma cannot be a COW mapping.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
+ */
+int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn)
+{
+	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_pfn);
+
 /**
  * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
  * @vma: user vma to map to
-- 
cgit v1.2.3


From 67fa1666223d7c825f6651add97f0011fe155f36 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 15:04:26 -0700
Subject: mm: remove references to vm_insert_pfn()

Documentation and comments.

Link: http://lkml.kernel.org/r/20180828145728.11873-7-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/x86/pat.txt     | 4 ++--
 include/asm-generic/pgtable.h | 4 ++--
 include/linux/hmm.h           | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
index 2a4ee6302122..481d8d8536ac 100644
--- a/Documentation/x86/pat.txt
+++ b/Documentation/x86/pat.txt
@@ -90,12 +90,12 @@ pci proc               |    --    |    --      |       WC         |
 Advanced APIs for drivers
 -------------------------
 A. Exporting pages to users with remap_pfn_range, io_remap_pfn_range,
-vm_insert_pfn
+vmf_insert_pfn
 
 Drivers wanting to export some pages to userspace do it by using mmap
 interface and a combination of
 1) pgprot_noncached()
-2) io_remap_pfn_range() or remap_pfn_range() or vm_insert_pfn()
+2) io_remap_pfn_range() or remap_pfn_range() or vmf_insert_pfn()
 
 With PAT support, a new API pgprot_writecombine is being added. So, drivers can
 continue to use the above sequence, with either pgprot_noncached() or
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 88ebc6102c7c..5657a20e0c59 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -757,7 +757,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
 /*
  * Interfaces that can be used by architecture code to keep track of
  * memory type of pfn mappings specified by the remap_pfn_range,
- * vm_insert_pfn.
+ * vmf_insert_pfn.
  */
 
 /*
@@ -773,7 +773,7 @@ static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
 
 /*
  * track_pfn_insert is called when a _new_ single pfn is established
- * by vm_insert_pfn().
+ * by vmf_insert_pfn().
  */
 static inline void track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 				    pfn_t pfn)
diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 4c92e3ba3e16..dde947083d4e 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -107,7 +107,7 @@ enum hmm_pfn_flag_e {
  * HMM_PFN_ERROR: corresponding CPU page table entry points to poisoned memory
  * HMM_PFN_NONE: corresponding CPU page table entry is pte_none()
  * HMM_PFN_SPECIAL: corresponding CPU page table entry is special; i.e., the
- *      result of vm_insert_pfn() or vm_insert_page(). Therefore, it should not
+ *      result of vmf_insert_pfn() or vm_insert_page(). Therefore, it should not
  *      be mirrored by a device, because the entry will never have HMM_PFN_VALID
  *      set and the pfn value is undefined.
  *
-- 
cgit v1.2.3


From ae2b01f37044c10e975d22116755df56252b09d8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 26 Oct 2018 15:04:29 -0700
Subject: mm: remove vm_insert_pfn()

All callers are now converted to vmf_insert_pfn() so convert
vmf_insert_pfn() from being a compatibility wrapper around vm_insert_pfn()
to being a compatibility wrapper around vmf_insert_pfn_prot().

Link: http://lkml.kernel.org/r/20180828145728.11873-8-willy@infradead.org
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 15 +--------------
 mm/memory.c        | 54 +++++++++++++++++++++++++++++-------------------------
 2 files changed, 30 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0f5db0455e61..737279bb479c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2502,7 +2502,7 @@ struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
 int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
 			unsigned long pfn, unsigned long size, pgprot_t);
 int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
-int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn);
 vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 			unsigned long pfn, pgprot_t pgprot);
@@ -2525,19 +2525,6 @@ static inline vm_fault_t vmf_insert_page(struct vm_area_struct *vma,
 	return VM_FAULT_NOPAGE;
 }
 
-static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma,
-			unsigned long addr, unsigned long pfn)
-{
-	int err = vm_insert_pfn(vma, addr, pfn);
-
-	if (err == -ENOMEM)
-		return VM_FAULT_OOM;
-	if (err < 0 && err != -EBUSY)
-		return VM_FAULT_SIGBUS;
-
-	return VM_FAULT_NOPAGE;
-}
-
 static inline vm_fault_t vmf_error(int err)
 {
 	if (err == -ENOMEM)
diff --git a/mm/memory.c b/mm/memory.c
index 6365144f8267..08653d0a795a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1602,30 +1602,6 @@ static int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 	return ret;
 }
 
-/**
- * vm_insert_pfn - insert single pfn into user vma
- * @vma: user vma to map to
- * @addr: target user address of this page
- * @pfn: source kernel pfn
- *
- * Similar to vm_insert_page, this allows drivers to insert individual pages
- * they've allocated into a user vma. Same comments apply.
- *
- * This function should only be called from a vm_ops->fault handler, and
- * in that case the handler should return NULL.
- *
- * vma cannot be a COW mapping.
- *
- * As this is called only for pages that do not currently exist, we
- * do not need to flush old virtual caches or the TLB.
- */
-int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
-			unsigned long pfn)
-{
-	return vm_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
-}
-EXPORT_SYMBOL(vm_insert_pfn);
-
 /**
  * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
  * @vma: user vma to map to
@@ -1638,9 +1614,10 @@ EXPORT_SYMBOL(vm_insert_pfn);
  *
  * This only makes sense for IO mappings, and it makes no sense for
  * COW mappings.  In general, using multiple vmas is preferable;
- * vm_insert_pfn_prot should only be used if using multiple VMAs is
+ * vmf_insert_pfn_prot should only be used if using multiple VMAs is
  * impractical.
  *
+ * Context: Process context.  May allocate using %GFP_KERNEL.
  * Return: vm_fault_t value.
  */
 vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
@@ -1657,6 +1634,33 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL(vmf_insert_pfn_prot);
 
+/**
+ * vmf_insert_pfn - insert single pfn into user vma
+ * @vma: user vma to map to
+ * @addr: target user address of this page
+ * @pfn: source kernel pfn
+ *
+ * Similar to vm_insert_page, this allows drivers to insert individual pages
+ * they've allocated into a user vma. Same comments apply.
+ *
+ * This function should only be called from a vm_ops->fault handler, and
+ * in that case the handler should return the result of this function.
+ *
+ * vma cannot be a COW mapping.
+ *
+ * As this is called only for pages that do not currently exist, we
+ * do not need to flush old virtual caches or the TLB.
+ *
+ * Context: Process context.  May allocate using %GFP_KERNEL.
+ * Return: vm_fault_t value.
+ */
+vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
+			unsigned long pfn)
+{
+	return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vmf_insert_pfn);
+
 static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
 {
 	/* these checks mirror the abort conditions in vm_normal_page */
-- 
cgit v1.2.3


From cc252eae85e09552f9c1e7ac0c3227f835efdf2d Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 26 Oct 2018 15:05:34 -0700
Subject: mm, slab: combine kmalloc_caches and kmalloc_dma_caches

Patch series "kmalloc-reclaimable caches", v4.

As discussed at LSF/MM [1] here's a patchset that introduces
kmalloc-reclaimable caches (more details in the second patch) and uses
them for dcache external names.  That allows us to repurpose the
NR_INDIRECTLY_RECLAIMABLE_BYTES counter later in the series.

With patch 3/6, dcache external names are allocated from kmalloc-rcl-*
caches, eliminating the need for manual accounting.  More importantly, it
also ensures the reclaimable kmalloc allocations are grouped in pages
separate from the regular kmalloc allocations.  The need for proper
accounting of dcache external names has shown it's easy for misbehaving
process to allocate lots of them, causing premature OOMs.  Without the
added grouping, it's likely that a similar workload can interleave the
dcache external names allocations with regular kmalloc allocations (note:
I haven't searched myself for an example of such regular kmalloc
allocation, but I would be very surprised if there wasn't some).  A
pathological case would be e.g.  one 64byte regular allocations with 63
external dcache names in a page (64x64=4096), which means the page is not
freed even after reclaiming after all dcache names, and the process can
thus "steal" the whole page with single 64byte allocation.

If other kmalloc users similar to dcache external names become identified,
they can also benefit from the new functionality simply by adding
__GFP_RECLAIMABLE to the kmalloc calls.

Side benefits of the patchset (that could be also merged separately)
include removed branch for detecting __GFP_DMA kmalloc(), and shortening
kmalloc cache names in /proc/slabinfo output.  The latter is potentially
an ABI break in case there are tools parsing the names and expecting the
values to be in bytes.

This is how /proc/slabinfo looks like after booting in virtme:

...
kmalloc-rcl-4M         0      0 4194304    1 1024 : tunables    1    1    0 : slabdata      0      0      0
...
kmalloc-rcl-96         7     32    128   32    1 : tunables  120   60    8 : slabdata      1      1      0
kmalloc-rcl-64        25    128     64   64    1 : tunables  120   60    8 : slabdata      2      2      0
kmalloc-rcl-32         0      0     32  124    1 : tunables  120   60    8 : slabdata      0      0      0
kmalloc-4M             0      0 4194304    1 1024 : tunables    1    1    0 : slabdata      0      0      0
kmalloc-2M             0      0 2097152    1  512 : tunables    1    1    0 : slabdata      0      0      0
kmalloc-1M             0      0 1048576    1  256 : tunables    1    1    0 : slabdata      0      0      0
...

/proc/vmstat with renamed nr_indirectly_reclaimable_bytes counter:

...
nr_slab_reclaimable 2817
nr_slab_unreclaimable 1781
...
nr_kernel_misc_reclaimable 0
...

/proc/meminfo with new KReclaimable counter:

...
Shmem:               564 kB
KReclaimable:      11260 kB
Slab:              18368 kB
SReclaimable:      11260 kB
SUnreclaim:         7108 kB
KernelStack:        1248 kB
...

This patch (of 6):

The kmalloc caches currently mainain separate (optional) array
kmalloc_dma_caches for __GFP_DMA allocations.  There are tests for
__GFP_DMA in the allocation hotpaths.  We can avoid the branches by
combining kmalloc_caches and kmalloc_dma_caches into a single
two-dimensional array where the outer dimension is cache "type".  This
will also allow to add kmalloc-reclaimable caches as a third type.

Link: http://lkml.kernel.org/r/20180731090649.16028-2-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 42 +++++++++++++++++++++++++++++++-----------
 mm/slab.c            |  4 ++--
 mm/slab_common.c     | 31 ++++++++++++-------------------
 mm/slub.c            | 13 +++++++------
 4 files changed, 52 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index ed9cbddeb4a6..2a7137043e91 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -295,12 +295,29 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
 #define SLAB_OBJ_MIN_SIZE      (KMALLOC_MIN_SIZE < 16 ? \
                                (KMALLOC_MIN_SIZE) : 16)
 
+enum kmalloc_cache_type {
+	KMALLOC_NORMAL = 0,
+#ifdef CONFIG_ZONE_DMA
+	KMALLOC_DMA,
+#endif
+	NR_KMALLOC_TYPES
+};
+
 #ifndef CONFIG_SLOB
-extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
+extern struct kmem_cache *
+kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
+
+static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
+{
+	int is_dma = 0;
+
 #ifdef CONFIG_ZONE_DMA
-extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
+	is_dma = !!(flags & __GFP_DMA);
 #endif
 
+	return is_dma;
+}
+
 /*
  * Figure out which kmalloc slab an allocation of a certain size
  * belongs to.
@@ -501,18 +518,20 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
+#ifndef CONFIG_SLOB
+		unsigned int index;
+#endif
 		if (size > KMALLOC_MAX_CACHE_SIZE)
 			return kmalloc_large(size, flags);
 #ifndef CONFIG_SLOB
-		if (!(flags & GFP_DMA)) {
-			unsigned int index = kmalloc_index(size);
+		index = kmalloc_index(size);
 
-			if (!index)
-				return ZERO_SIZE_PTR;
+		if (!index)
+			return ZERO_SIZE_PTR;
 
-			return kmem_cache_alloc_trace(kmalloc_caches[index],
-					flags, size);
-		}
+		return kmem_cache_alloc_trace(
+				kmalloc_caches[kmalloc_type(flags)][index],
+				flags, size);
 #endif
 	}
 	return __kmalloc(size, flags);
@@ -542,13 +561,14 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 #ifndef CONFIG_SLOB
 	if (__builtin_constant_p(size) &&
-		size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
+		size <= KMALLOC_MAX_CACHE_SIZE) {
 		unsigned int i = kmalloc_index(size);
 
 		if (!i)
 			return ZERO_SIZE_PTR;
 
-		return kmem_cache_alloc_node_trace(kmalloc_caches[i],
+		return kmem_cache_alloc_node_trace(
+				kmalloc_caches[kmalloc_type(flags)][i],
 						flags, node, size);
 	}
 #endif
diff --git a/mm/slab.c b/mm/slab.c
index d73c7a4820a4..2a5654bb3b3f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1288,7 +1288,7 @@ void __init kmem_cache_init(void)
 	 * Initialize the caches that provide memory for the  kmem_cache_node
 	 * structures first.  Without this, further allocations will bug.
 	 */
-	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache(
+	kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache(
 				kmalloc_info[INDEX_NODE].name,
 				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS,
 				0, kmalloc_size(INDEX_NODE));
@@ -1304,7 +1304,7 @@ void __init kmem_cache_init(void)
 		for_each_online_node(nid) {
 			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
 
-			init_list(kmalloc_caches[INDEX_NODE],
+			init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE],
 					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
 		}
 	}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3a7ac4f15194..d880b2a3c81b 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -973,14 +973,10 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name,
 	return s;
 }
 
-struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
+struct kmem_cache *
+kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
 EXPORT_SYMBOL(kmalloc_caches);
 
-#ifdef CONFIG_ZONE_DMA
-struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1] __ro_after_init;
-EXPORT_SYMBOL(kmalloc_dma_caches);
-#endif
-
 /*
  * Conversion table for small slabs sizes / 8 to the index in the
  * kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -1040,12 +1036,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
 		index = fls(size - 1);
 	}
 
-#ifdef CONFIG_ZONE_DMA
-	if (unlikely((flags & GFP_DMA)))
-		return kmalloc_dma_caches[index];
-
-#endif
-	return kmalloc_caches[index];
+	return kmalloc_caches[kmalloc_type(flags)][index];
 }
 
 /*
@@ -1119,7 +1110,8 @@ void __init setup_kmalloc_cache_index_table(void)
 
 static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
 {
-	kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
+	kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
+					kmalloc_info[idx].name,
 					kmalloc_info[idx].size, flags, 0,
 					kmalloc_info[idx].size);
 }
@@ -1132,9 +1124,10 @@ static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
 void __init create_kmalloc_caches(slab_flags_t flags)
 {
 	int i;
+	int type = KMALLOC_NORMAL;
 
 	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
-		if (!kmalloc_caches[i])
+		if (!kmalloc_caches[type][i])
 			new_kmalloc_cache(i, flags);
 
 		/*
@@ -1142,9 +1135,9 @@ void __init create_kmalloc_caches(slab_flags_t flags)
 		 * These have to be created immediately after the
 		 * earlier power of two caches
 		 */
-		if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
+		if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[type][1] && i == 6)
 			new_kmalloc_cache(1, flags);
-		if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
+		if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[type][2] && i == 7)
 			new_kmalloc_cache(2, flags);
 	}
 
@@ -1153,7 +1146,7 @@ void __init create_kmalloc_caches(slab_flags_t flags)
 
 #ifdef CONFIG_ZONE_DMA
 	for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
-		struct kmem_cache *s = kmalloc_caches[i];
+		struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i];
 
 		if (s) {
 			unsigned int size = kmalloc_size(i);
@@ -1161,8 +1154,8 @@ void __init create_kmalloc_caches(slab_flags_t flags)
 				 "dma-kmalloc-%u", size);
 
 			BUG_ON(!n);
-			kmalloc_dma_caches[i] = create_kmalloc_cache(n,
-				size, SLAB_CACHE_DMA | flags, 0, 0);
+			kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache(
+				n, size, SLAB_CACHE_DMA | flags, 0, 0);
 		}
 	}
 #endif
diff --git a/mm/slub.c b/mm/slub.c
index 18bd07daf4e4..e3629cd7aff1 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4689,6 +4689,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
 static void __init resiliency_test(void)
 {
 	u8 *p;
+	int type = KMALLOC_NORMAL;
 
 	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
 
@@ -4701,7 +4702,7 @@ static void __init resiliency_test(void)
 	pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n",
 	       p + 16);
 
-	validate_slab_cache(kmalloc_caches[4]);
+	validate_slab_cache(kmalloc_caches[type][4]);
 
 	/* Hmmm... The next two are dangerous */
 	p = kzalloc(32, GFP_KERNEL);
@@ -4710,33 +4711,33 @@ static void __init resiliency_test(void)
 	       p);
 	pr_err("If allocated object is overwritten then not detectable\n\n");
 
-	validate_slab_cache(kmalloc_caches[5]);
+	validate_slab_cache(kmalloc_caches[type][5]);
 	p = kzalloc(64, GFP_KERNEL);
 	p += 64 + (get_cycles() & 0xff) * sizeof(void *);
 	*p = 0x56;
 	pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
 	       p);
 	pr_err("If allocated object is overwritten then not detectable\n\n");
-	validate_slab_cache(kmalloc_caches[6]);
+	validate_slab_cache(kmalloc_caches[type][6]);
 
 	pr_err("\nB. Corruption after free\n");
 	p = kzalloc(128, GFP_KERNEL);
 	kfree(p);
 	*p = 0x78;
 	pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p);
-	validate_slab_cache(kmalloc_caches[7]);
+	validate_slab_cache(kmalloc_caches[type][7]);
 
 	p = kzalloc(256, GFP_KERNEL);
 	kfree(p);
 	p[50] = 0x9a;
 	pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p);
-	validate_slab_cache(kmalloc_caches[8]);
+	validate_slab_cache(kmalloc_caches[type][8]);
 
 	p = kzalloc(512, GFP_KERNEL);
 	kfree(p);
 	p[512] = 0xab;
 	pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p);
-	validate_slab_cache(kmalloc_caches[9]);
+	validate_slab_cache(kmalloc_caches[type][9]);
 }
 #else
 #ifdef CONFIG_SYSFS
-- 
cgit v1.2.3


From 1291523f2c1d631fea34102fd241fb54a4e8f7a0 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 26 Oct 2018 15:05:38 -0700
Subject: mm, slab/slub: introduce kmalloc-reclaimable caches

Kmem caches can be created with a SLAB_RECLAIM_ACCOUNT flag, which
indicates they contain objects which can be reclaimed under memory
pressure (typically through a shrinker).  This makes the slab pages
accounted as NR_SLAB_RECLAIMABLE in vmstat, which is reflected also the
MemAvailable meminfo counter and in overcommit decisions.  The slab pages
are also allocated with __GFP_RECLAIMABLE, which is good for
anti-fragmentation through grouping pages by mobility.

The generic kmalloc-X caches are created without this flag, but sometimes
are used also for objects that can be reclaimed, which due to varying size
cannot have a dedicated kmem cache with SLAB_RECLAIM_ACCOUNT flag.  A
prominent example are dcache external names, which prompted the creation
of a new, manually managed vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES
in commit f1782c9bc547 ("dcache: account external names as indirectly
reclaimable memory").

To better handle this and any other similar cases, this patch introduces
SLAB_RECLAIM_ACCOUNT variants of kmalloc caches, named kmalloc-rcl-X.
They are used whenever the kmalloc() call passes __GFP_RECLAIMABLE among
gfp flags.  They are added to the kmalloc_caches array as a new type.
Allocations with both __GFP_DMA and __GFP_RECLAIMABLE will use a dma type
cache.

This change only applies to SLAB and SLUB, not SLOB.  This is fine, since
SLOB's target are tiny system and this patch does add some overhead of
kmem management objects.

Link: http://lkml.kernel.org/r/20180731090649.16028-3-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 16 +++++++++++++++-
 mm/slab_common.c     | 48 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 46 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 2a7137043e91..918f374e7156 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -295,8 +295,13 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
 #define SLAB_OBJ_MIN_SIZE      (KMALLOC_MIN_SIZE < 16 ? \
                                (KMALLOC_MIN_SIZE) : 16)
 
+/*
+ * Whenever changing this, take care of that kmalloc_type() and
+ * create_kmalloc_caches() still work as intended.
+ */
 enum kmalloc_cache_type {
 	KMALLOC_NORMAL = 0,
+	KMALLOC_RECLAIM,
 #ifdef CONFIG_ZONE_DMA
 	KMALLOC_DMA,
 #endif
@@ -310,12 +315,21 @@ kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
 static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
 {
 	int is_dma = 0;
+	int type_dma = 0;
+	int is_reclaimable;
 
 #ifdef CONFIG_ZONE_DMA
 	is_dma = !!(flags & __GFP_DMA);
+	type_dma = is_dma * KMALLOC_DMA;
 #endif
 
-	return is_dma;
+	is_reclaimable = !!(flags & __GFP_RECLAIMABLE);
+
+	/*
+	 * If an allocation is both __GFP_DMA and __GFP_RECLAIMABLE, return
+	 * KMALLOC_DMA and effectively ignore __GFP_RECLAIMABLE
+	 */
+	return type_dma + (is_reclaimable & !is_dma) * KMALLOC_RECLAIM;
 }
 
 /*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index d880b2a3c81b..5b19439fd862 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1108,10 +1108,21 @@ void __init setup_kmalloc_cache_index_table(void)
 	}
 }
 
-static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
+static void __init
+new_kmalloc_cache(int idx, int type, slab_flags_t flags)
 {
-	kmalloc_caches[KMALLOC_NORMAL][idx] = create_kmalloc_cache(
-					kmalloc_info[idx].name,
+	const char *name;
+
+	if (type == KMALLOC_RECLAIM) {
+		flags |= SLAB_RECLAIM_ACCOUNT;
+		name = kasprintf(GFP_NOWAIT, "kmalloc-rcl-%u",
+						kmalloc_info[idx].size);
+		BUG_ON(!name);
+	} else {
+		name = kmalloc_info[idx].name;
+	}
+
+	kmalloc_caches[type][idx] = create_kmalloc_cache(name,
 					kmalloc_info[idx].size, flags, 0,
 					kmalloc_info[idx].size);
 }
@@ -1123,22 +1134,25 @@ static void __init new_kmalloc_cache(int idx, slab_flags_t flags)
  */
 void __init create_kmalloc_caches(slab_flags_t flags)
 {
-	int i;
-	int type = KMALLOC_NORMAL;
+	int i, type;
 
-	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
-		if (!kmalloc_caches[type][i])
-			new_kmalloc_cache(i, flags);
+	for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) {
+		for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
+			if (!kmalloc_caches[type][i])
+				new_kmalloc_cache(i, type, flags);
 
-		/*
-		 * Caches that are not of the two-to-the-power-of size.
-		 * These have to be created immediately after the
-		 * earlier power of two caches
-		 */
-		if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[type][1] && i == 6)
-			new_kmalloc_cache(1, flags);
-		if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[type][2] && i == 7)
-			new_kmalloc_cache(2, flags);
+			/*
+			 * Caches that are not of the two-to-the-power-of size.
+			 * These have to be created immediately after the
+			 * earlier power of two caches
+			 */
+			if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
+					!kmalloc_caches[type][1])
+				new_kmalloc_cache(1, type, flags);
+			if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
+					!kmalloc_caches[type][2])
+				new_kmalloc_cache(2, type, flags);
+		}
 	}
 
 	/* Kmalloc array is now usable */
-- 
cgit v1.2.3


From b29940c1abd7a4c3abeb926df0a5ec84d6902d47 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 26 Oct 2018 15:05:46 -0700
Subject: mm: rename and change semantics of nr_indirectly_reclaimable_bytes

The vmstat counter NR_INDIRECTLY_RECLAIMABLE_BYTES was introduced by
commit eb59254608bc ("mm: introduce NR_INDIRECTLY_RECLAIMABLE_BYTES") with
the goal of accounting objects that can be reclaimed, but cannot be
allocated via a SLAB_RECLAIM_ACCOUNT cache.  This is now possible via
kmalloc() with __GFP_RECLAIMABLE flag, and the dcache external names user
is converted.

The counter is however still useful for accounting direct page allocations
(i.e.  not slab) with a shrinker, such as the ION page pool.  So keep it,
and:

- change granularity to pages to be more like other counters; sub-page
  allocations should be able to use kmalloc
- rename the counter to NR_KERNEL_MISC_RECLAIMABLE
- expose the counter again in vmstat as "nr_kernel_misc_reclaimable"; we can
  again remove the check for not printing "hidden" counters

Link: http://lkml.kernel.org/r/20180731090649.16028-5-vbabka@suse.cz
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Vijayanand Jitta <vjitta@codeaurora.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/staging/android/ion/ion_page_pool.c |  8 ++++----
 include/linux/mmzone.h                      |  2 +-
 mm/page_alloc.c                             | 19 +++++++------------
 mm/util.c                                   |  3 +--
 mm/vmstat.c                                 |  6 +-----
 5 files changed, 14 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c
index 9bc56eb48d2a..0d2a95957ee8 100644
--- a/drivers/staging/android/ion/ion_page_pool.c
+++ b/drivers/staging/android/ion/ion_page_pool.c
@@ -33,8 +33,8 @@ static void ion_page_pool_add(struct ion_page_pool *pool, struct page *page)
 		pool->low_count++;
 	}
 
-	mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
-			    (1 << (PAGE_SHIFT + pool->order)));
+	mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
+							1 << pool->order);
 	mutex_unlock(&pool->mutex);
 }
 
@@ -53,8 +53,8 @@ static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high)
 	}
 
 	list_del(&page->lru);
-	mod_node_page_state(page_pgdat(page), NR_INDIRECTLY_RECLAIMABLE_BYTES,
-			    -(1 << (PAGE_SHIFT + pool->order)));
+	mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE,
+							-(1 << pool->order));
 	return page;
 }
 
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d4b0c79d2924..7bbeba21f6a3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -180,7 +180,7 @@ enum node_stat_item {
 	NR_VMSCAN_IMMEDIATE,	/* Prioritise for reclaim when writeback ends */
 	NR_DIRTIED,		/* page dirtyings since bootup */
 	NR_WRITTEN,		/* page writings since bootup */
-	NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */
+	NR_KERNEL_MISC_RECLAIMABLE,	/* reclaimable non-slab kernel pages */
 	NR_VM_NODE_STAT_ITEMS
 };
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 747031c2352d..20f25d06c00c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4701,6 +4701,7 @@ long si_mem_available(void)
 	unsigned long pagecache;
 	unsigned long wmark_low = 0;
 	unsigned long pages[NR_LRU_LISTS];
+	unsigned long reclaimable;
 	struct zone *zone;
 	int lru;
 
@@ -4726,19 +4727,13 @@ long si_mem_available(void)
 	available += pagecache;
 
 	/*
-	 * Part of the reclaimable slab consists of items that are in use,
-	 * and cannot be freed. Cap this estimate at the low watermark.
+	 * Part of the reclaimable slab and other kernel memory consists of
+	 * items that are in use, and cannot be freed. Cap this estimate at the
+	 * low watermark.
 	 */
-	available += global_node_page_state(NR_SLAB_RECLAIMABLE) -
-		     min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2,
-			 wmark_low);
-
-	/*
-	 * Part of the kernel memory, which can be released under memory
-	 * pressure.
-	 */
-	available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >>
-		PAGE_SHIFT;
+	reclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE) +
+			global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
+	available += reclaimable - min(reclaimable / 2, wmark_low);
 
 	if (available < 0)
 		available = 0;
diff --git a/mm/util.c b/mm/util.c
index 470f5cd80b64..f740754f5012 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -678,8 +678,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
 		 * Part of the kernel memory, which can be released
 		 * under memory pressure.
 		 */
-		free += global_node_page_state(
-			NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT;
+		free += global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE);
 
 		/*
 		 * Leave reserved pages. The pages are not for anonymous pages.
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7878da76abf2..2cec2fa4c8ae 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = {
 	"nr_vmscan_immediate_reclaim",
 	"nr_dirtied",
 	"nr_written",
-	"", /* nr_indirectly_reclaimable */
+	"nr_kernel_misc_reclaimable",
 
 	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
@@ -1706,10 +1706,6 @@ static int vmstat_show(struct seq_file *m, void *arg)
 	unsigned long *l = arg;
 	unsigned long off = l - (unsigned long *)m->private;
 
-	/* Skip hidden vmstat items. */
-	if (*vmstat_text[off] == '\0')
-		return 0;
-
 	seq_puts(m, vmstat_text[off]);
 	seq_put_decimal_ull(m, " ", *l);
 	seq_putc(m, '\n');
-- 
cgit v1.2.3


From 1899ad18c6072d689896badafb81267b0a1092a4 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 26 Oct 2018 15:06:04 -0700
Subject: mm: workingset: tell cache transitions from workingset thrashing

Refaults happen during transitions between workingsets as well as in-place
thrashing.  Knowing the difference between the two has a range of
applications, including measuring the impact of memory shortage on the
system performance, as well as the ability to smarter balance pressure
between the filesystem cache and the swap-backed workingset.

During workingset transitions, inactive cache refaults and pushes out
established active cache.  When that active cache isn't stale, however,
and also ends up refaulting, that's bonafide thrashing.

Introduce a new page flag that tells on eviction whether the page has been
active or not in its lifetime.  This bit is then stored in the shadow
entry, to classify refaults as transitioning or thrashing.

How many page->flags does this leave us with on 32-bit?

	20 bits are always page flags

	21 if you have an MMU

	23 with the zone bits for DMA, Normal, HighMem, Movable

	29 with the sparsemem section bits

	30 if PAE is enabled

	31 with this patch.

So on 32-bit PAE, that leaves 1 bit for distinguishing two NUMA nodes.  If
that's not enough, the system can switch to discontigmem and re-gain the 6
or 7 sparsemem section bits.

Link: http://lkml.kernel.org/r/20180828172258.3185-3-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Daniel Drake <drake@endlessm.com>
Tested-by: Suren Baghdasaryan <surenb@google.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <jweiner@fb.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Enderborg <peter.enderborg@sony.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h         |  1 +
 include/linux/page-flags.h     |  5 ++-
 include/linux/swap.h           |  2 +-
 include/trace/events/mmflags.h |  1 +
 mm/filemap.c                   |  9 ++--
 mm/huge_memory.c               |  1 +
 mm/migrate.c                   |  2 +
 mm/swap_state.c                |  1 +
 mm/vmscan.c                    |  1 +
 mm/vmstat.c                    |  1 +
 mm/workingset.c                | 95 +++++++++++++++++++++++++++---------------
 11 files changed, 77 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7bbeba21f6a3..ba51d5bf7af1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -163,6 +163,7 @@ enum node_stat_item {
 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
 	WORKINGSET_REFAULT,
 	WORKINGSET_ACTIVATE,
+	WORKINGSET_RESTORE,
 	WORKINGSET_NODERECLAIM,
 	NR_ANON_MAPPED,	/* Mapped anonymous pages */
 	NR_FILE_MAPPED,	/* pagecache pages mapped into pagetables.
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 74bee8cecf4c..4d99504f6496 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -69,13 +69,14 @@
  */
 enum pageflags {
 	PG_locked,		/* Page is locked. Don't touch. */
-	PG_error,
 	PG_referenced,
 	PG_uptodate,
 	PG_dirty,
 	PG_lru,
 	PG_active,
+	PG_workingset,
 	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
+	PG_error,
 	PG_slab,
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_arch_1,
@@ -280,6 +281,8 @@ PAGEFLAG(Dirty, dirty, PF_HEAD) TESTSCFLAG(Dirty, dirty, PF_HEAD)
 PAGEFLAG(LRU, lru, PF_HEAD) __CLEARPAGEFLAG(LRU, lru, PF_HEAD)
 PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
 	TESTCLEARFLAG(Active, active, PF_HEAD)
+PAGEFLAG(Workingset, workingset, PF_HEAD)
+	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
 __PAGEFLAG(Slab, slab, PF_NO_TAIL)
 __PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
 PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8e2c11e692ba..b93740d72e78 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -296,7 +296,7 @@ struct vma_swap_readahead {
 
 /* linux/mm/workingset.c */
 void *workingset_eviction(struct address_space *mapping, struct page *page);
-bool workingset_refault(void *shadow);
+void workingset_refault(struct page *page, void *shadow);
 void workingset_activation(struct page *page);
 
 /* Do not use directly, use workingset_lookup_update */
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index a81cffb76d89..a1675d43777e 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -88,6 +88,7 @@
 	{1UL << PG_dirty,		"dirty"		},		\
 	{1UL << PG_lru,			"lru"		},		\
 	{1UL << PG_active,		"active"	},		\
+	{1UL << PG_workingset,		"workingset"	},		\
 	{1UL << PG_slab,		"slab"		},		\
 	{1UL << PG_owner_priv_1,	"owner_priv_1"	},		\
 	{1UL << PG_arch_1,		"arch_1"	},		\
diff --git a/mm/filemap.c b/mm/filemap.c
index de6fed2a0815..7997adce5a29 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -915,12 +915,9 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 		 * data from the working set, only to cache data that will
 		 * get overwritten with something else, is a waste of memory.
 		 */
-		if (!(gfp_mask & __GFP_WRITE) &&
-		    shadow && workingset_refault(shadow)) {
-			SetPageActive(page);
-			workingset_activation(page);
-		} else
-			ClearPageActive(page);
+		WARN_ON_ONCE(PageActive(page));
+		if (!(gfp_mask & __GFP_WRITE) && shadow)
+			workingset_refault(page, shadow);
 		lru_cache_add(page);
 	}
 	return ret;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index deed97fba979..8ea1b36bd452 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2369,6 +2369,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
 			 (1L << PG_mlocked) |
 			 (1L << PG_uptodate) |
 			 (1L << PG_active) |
+			 (1L << PG_workingset) |
 			 (1L << PG_locked) |
 			 (1L << PG_unevictable) |
 			 (1L << PG_dirty)));
diff --git a/mm/migrate.c b/mm/migrate.c
index 84381b55b2bd..1ea27b343ccd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -685,6 +685,8 @@ void migrate_page_states(struct page *newpage, struct page *page)
 		SetPageActive(newpage);
 	} else if (TestClearPageUnevictable(page))
 		SetPageUnevictable(newpage);
+	if (PageWorkingset(page))
+		SetPageWorkingset(newpage);
 	if (PageChecked(page))
 		SetPageChecked(newpage);
 	if (PageMappedToDisk(page))
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ecee9c6c4cc1..0d6a7f268d2e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -448,6 +448,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 			/*
 			 * Initiate read into locked page and return.
 			 */
+			SetPageWorkingset(new_page);
 			lru_cache_add_anon(new_page);
 			*new_page_allocated = true;
 			return new_page;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 961401c46334..87e9fef341d2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2145,6 +2145,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
 		}
 
 		ClearPageActive(page);	/* we are de-activating */
+		SetPageWorkingset(page);
 		list_add(&page->lru, &l_inactive);
 	}
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 2cec2fa4c8ae..d918f6192d15 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1145,6 +1145,7 @@ const char * const vmstat_text[] = {
 	"nr_isolated_file",
 	"workingset_refault",
 	"workingset_activate",
+	"workingset_restore",
 	"workingset_nodereclaim",
 	"nr_anon_pages",
 	"nr_mapped",
diff --git a/mm/workingset.c b/mm/workingset.c
index 7d5fa0dd2b38..99b7f7c09b13 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -121,7 +121,7 @@
  * the only thing eating into inactive list space is active pages.
  *
  *
- *		Activating refaulting pages
+ *		Refaulting inactive pages
  *
  * All that is known about the active list is that the pages have been
  * accessed more than once in the past.  This means that at any given
@@ -134,6 +134,10 @@
  * used less frequently than the refaulting page - or even not used at
  * all anymore.
  *
+ * That means if inactive cache is refaulting with a suitable refault
+ * distance, we assume the cache workingset is transitioning and put
+ * pressure on the current active list.
+ *
  * If this is wrong and demotion kicks in, the pages which are truly
  * used more frequently will be reactivated while the less frequently
  * used once will be evicted from memory.
@@ -141,6 +145,14 @@
  * But if this is right, the stale pages will be pushed out of memory
  * and the used pages get to stay in cache.
  *
+ *		Refaulting active pages
+ *
+ * If on the other hand the refaulting pages have recently been
+ * deactivated, it means that the active list is no longer protecting
+ * actively used cache from reclaim. The cache is NOT transitioning to
+ * a different workingset; the existing workingset is thrashing in the
+ * space allocated to the page cache.
+ *
  *
  *		Implementation
  *
@@ -156,8 +168,7 @@
  */
 
 #define EVICTION_SHIFT	(RADIX_TREE_EXCEPTIONAL_ENTRY + \
-			 NODES_SHIFT +	\
-			 MEM_CGROUP_ID_SHIFT)
+			 1 + NODES_SHIFT + MEM_CGROUP_ID_SHIFT)
 #define EVICTION_MASK	(~0UL >> EVICTION_SHIFT)
 
 /*
@@ -170,23 +181,28 @@
  */
 static unsigned int bucket_order __read_mostly;
 
-static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction)
+static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction,
+			 bool workingset)
 {
 	eviction >>= bucket_order;
 	eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid;
 	eviction = (eviction << NODES_SHIFT) | pgdat->node_id;
+	eviction = (eviction << 1) | workingset;
 	eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT);
 
 	return (void *)(eviction | RADIX_TREE_EXCEPTIONAL_ENTRY);
 }
 
 static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
-			  unsigned long *evictionp)
+			  unsigned long *evictionp, bool *workingsetp)
 {
 	unsigned long entry = (unsigned long)shadow;
 	int memcgid, nid;
+	bool workingset;
 
 	entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT;
+	workingset = entry & 1;
+	entry >>= 1;
 	nid = entry & ((1UL << NODES_SHIFT) - 1);
 	entry >>= NODES_SHIFT;
 	memcgid = entry & ((1UL << MEM_CGROUP_ID_SHIFT) - 1);
@@ -195,6 +211,7 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
 	*memcgidp = memcgid;
 	*pgdat = NODE_DATA(nid);
 	*evictionp = entry << bucket_order;
+	*workingsetp = workingset;
 }
 
 /**
@@ -207,8 +224,8 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat,
  */
 void *workingset_eviction(struct address_space *mapping, struct page *page)
 {
-	struct mem_cgroup *memcg = page_memcg(page);
 	struct pglist_data *pgdat = page_pgdat(page);
+	struct mem_cgroup *memcg = page_memcg(page);
 	int memcgid = mem_cgroup_id(memcg);
 	unsigned long eviction;
 	struct lruvec *lruvec;
@@ -220,30 +237,30 @@ void *workingset_eviction(struct address_space *mapping, struct page *page)
 
 	lruvec = mem_cgroup_lruvec(pgdat, memcg);
 	eviction = atomic_long_inc_return(&lruvec->inactive_age);
-	return pack_shadow(memcgid, pgdat, eviction);
+	return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
 }
 
 /**
  * workingset_refault - evaluate the refault of a previously evicted page
+ * @page: the freshly allocated replacement page
  * @shadow: shadow entry of the evicted page
  *
  * Calculates and evaluates the refault distance of the previously
  * evicted page in the context of the node it was allocated in.
- *
- * Returns %true if the page should be activated, %false otherwise.
  */
-bool workingset_refault(void *shadow)
+void workingset_refault(struct page *page, void *shadow)
 {
 	unsigned long refault_distance;
+	struct pglist_data *pgdat;
 	unsigned long active_file;
 	struct mem_cgroup *memcg;
 	unsigned long eviction;
 	struct lruvec *lruvec;
 	unsigned long refault;
-	struct pglist_data *pgdat;
+	bool workingset;
 	int memcgid;
 
-	unpack_shadow(shadow, &memcgid, &pgdat, &eviction);
+	unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset);
 
 	rcu_read_lock();
 	/*
@@ -263,41 +280,51 @@ bool workingset_refault(void *shadow)
 	 * configurations instead.
 	 */
 	memcg = mem_cgroup_from_id(memcgid);
-	if (!mem_cgroup_disabled() && !memcg) {
-		rcu_read_unlock();
-		return false;
-	}
+	if (!mem_cgroup_disabled() && !memcg)
+		goto out;
 	lruvec = mem_cgroup_lruvec(pgdat, memcg);
 	refault = atomic_long_read(&lruvec->inactive_age);
 	active_file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, MAX_NR_ZONES);
 
 	/*
-	 * The unsigned subtraction here gives an accurate distance
-	 * across inactive_age overflows in most cases.
+	 * Calculate the refault distance
 	 *
-	 * There is a special case: usually, shadow entries have a
-	 * short lifetime and are either refaulted or reclaimed along
-	 * with the inode before they get too old.  But it is not
-	 * impossible for the inactive_age to lap a shadow entry in
-	 * the field, which can then can result in a false small
-	 * refault distance, leading to a false activation should this
-	 * old entry actually refault again.  However, earlier kernels
-	 * used to deactivate unconditionally with *every* reclaim
-	 * invocation for the longest time, so the occasional
-	 * inappropriate activation leading to pressure on the active
-	 * list is not a problem.
+	 * The unsigned subtraction here gives an accurate distance
+	 * across inactive_age overflows in most cases. There is a
+	 * special case: usually, shadow entries have a short lifetime
+	 * and are either refaulted or reclaimed along with the inode
+	 * before they get too old.  But it is not impossible for the
+	 * inactive_age to lap a shadow entry in the field, which can
+	 * then result in a false small refault distance, leading to a
+	 * false activation should this old entry actually refault
+	 * again.  However, earlier kernels used to deactivate
+	 * unconditionally with *every* reclaim invocation for the
+	 * longest time, so the occasional inappropriate activation
+	 * leading to pressure on the active list is not a problem.
 	 */
 	refault_distance = (refault - eviction) & EVICTION_MASK;
 
 	inc_lruvec_state(lruvec, WORKINGSET_REFAULT);
 
-	if (refault_distance <= active_file) {
-		inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
-		rcu_read_unlock();
-		return true;
+	/*
+	 * Compare the distance to the existing workingset size. We
+	 * don't act on pages that couldn't stay resident even if all
+	 * the memory was available to the page cache.
+	 */
+	if (refault_distance > active_file)
+		goto out;
+
+	SetPageActive(page);
+	atomic_long_inc(&lruvec->inactive_age);
+	inc_lruvec_state(lruvec, WORKINGSET_ACTIVATE);
+
+	/* Page was active prior to eviction */
+	if (workingset) {
+		SetPageWorkingset(page);
+		inc_lruvec_state(lruvec, WORKINGSET_RESTORE);
 	}
+out:
 	rcu_read_unlock();
-	return false;
 }
 
 /**
-- 
cgit v1.2.3


From b1d29ba82cf2bc784f4c963ddd6a2cf29e229b33 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 26 Oct 2018 15:06:08 -0700
Subject: delayacct: track delays from thrashing cache pages

Delay accounting already measures the time a task spends in direct reclaim
and waiting for swapin, but in low memory situations tasks spend can spend
a significant amount of their time waiting on thrashing page cache.  This
isn't tracked right now.

To know the full impact of memory contention on an individual task,
measure the delay when waiting for a recently evicted active cache page to
read back into memory.

Also update tools/accounting/getdelays.c:

     [hannes@computer accounting]$ sudo ./getdelays -d -p 1
     print delayacct stats ON
     PID     1

     CPU             count     real total  virtual total    delay total  delay average
                     50318      745000000      847346785      400533713          0.008ms
     IO              count    delay total  delay average
                       435      122601218              0ms
     SWAP            count    delay total  delay average
                         0              0              0ms
     RECLAIM         count    delay total  delay average
                         0              0              0ms
     THRASHING       count    delay total  delay average
                        19       12621439              0ms

Link: http://lkml.kernel.org/r/20180828172258.3185-4-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Daniel Drake <drake@endlessm.com>
Tested-by: Suren Baghdasaryan <surenb@google.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <jweiner@fb.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Enderborg <peter.enderborg@sony.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/delayacct.h      | 23 +++++++++++++++++++++++
 include/uapi/linux/taskstats.h |  6 +++++-
 kernel/delayacct.c             | 15 +++++++++++++++
 mm/filemap.c                   | 11 +++++++++++
 tools/accounting/getdelays.c   |  8 +++++++-
 5 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h
index 31c865d1842e..577d1b25fccd 100644
--- a/include/linux/delayacct.h
+++ b/include/linux/delayacct.h
@@ -57,7 +57,12 @@ struct task_delay_info {
 
 	u64 freepages_start;
 	u64 freepages_delay;	/* wait for memory reclaim */
+
+	u64 thrashing_start;
+	u64 thrashing_delay;	/* wait for thrashing page */
+
 	u32 freepages_count;	/* total count of memory reclaim */
+	u32 thrashing_count;	/* total count of thrash waits */
 };
 #endif
 
@@ -76,6 +81,8 @@ extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
 extern __u64 __delayacct_blkio_ticks(struct task_struct *);
 extern void __delayacct_freepages_start(void);
 extern void __delayacct_freepages_end(void);
+extern void __delayacct_thrashing_start(void);
+extern void __delayacct_thrashing_end(void);
 
 static inline int delayacct_is_task_waiting_on_io(struct task_struct *p)
 {
@@ -156,6 +163,18 @@ static inline void delayacct_freepages_end(void)
 		__delayacct_freepages_end();
 }
 
+static inline void delayacct_thrashing_start(void)
+{
+	if (current->delays)
+		__delayacct_thrashing_start();
+}
+
+static inline void delayacct_thrashing_end(void)
+{
+	if (current->delays)
+		__delayacct_thrashing_end();
+}
+
 #else
 static inline void delayacct_set_flag(int flag)
 {}
@@ -182,6 +201,10 @@ static inline void delayacct_freepages_start(void)
 {}
 static inline void delayacct_freepages_end(void)
 {}
+static inline void delayacct_thrashing_start(void)
+{}
+static inline void delayacct_thrashing_end(void)
+{}
 
 #endif /* CONFIG_TASK_DELAY_ACCT */
 
diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
index b7aa7bb2349f..5e8ca16a9079 100644
--- a/include/uapi/linux/taskstats.h
+++ b/include/uapi/linux/taskstats.h
@@ -34,7 +34,7 @@
  */
 
 
-#define TASKSTATS_VERSION	8
+#define TASKSTATS_VERSION	9
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -164,6 +164,10 @@ struct taskstats {
 	/* Delay waiting for memory reclaim */
 	__u64	freepages_count;
 	__u64	freepages_delay_total;
+
+	/* Delay waiting for thrashing page */
+	__u64	thrashing_count;
+	__u64	thrashing_delay_total;
 };
 
 
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index ca8ac2824f0b..2a12b988c717 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -135,9 +135,12 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
 	d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
 	tmp = d->freepages_delay_total + tsk->delays->freepages_delay;
 	d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp;
+	tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay;
+	d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp;
 	d->blkio_count += tsk->delays->blkio_count;
 	d->swapin_count += tsk->delays->swapin_count;
 	d->freepages_count += tsk->delays->freepages_count;
+	d->thrashing_count += tsk->delays->thrashing_count;
 	raw_spin_unlock_irqrestore(&tsk->delays->lock, flags);
 
 	return 0;
@@ -169,3 +172,15 @@ void __delayacct_freepages_end(void)
 		&current->delays->freepages_count);
 }
 
+void __delayacct_thrashing_start(void)
+{
+	current->delays->thrashing_start = ktime_get_ns();
+}
+
+void __delayacct_thrashing_end(void)
+{
+	delayacct_end(&current->delays->lock,
+		      &current->delays->thrashing_start,
+		      &current->delays->thrashing_delay,
+		      &current->delays->thrashing_count);
+}
diff --git a/mm/filemap.c b/mm/filemap.c
index 7997adce5a29..01a841f17bf4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -36,6 +36,7 @@
 #include <linux/cleancache.h>
 #include <linux/shmem_fs.h>
 #include <linux/rmap.h>
+#include <linux/delayacct.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -1073,8 +1074,15 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 {
 	struct wait_page_queue wait_page;
 	wait_queue_entry_t *wait = &wait_page.wait;
+	bool thrashing = false;
 	int ret = 0;
 
+	if (bit_nr == PG_locked && !PageSwapBacked(page) &&
+	    !PageUptodate(page) && PageWorkingset(page)) {
+		delayacct_thrashing_start();
+		thrashing = true;
+	}
+
 	init_wait(wait);
 	wait->flags = lock ? WQ_FLAG_EXCLUSIVE : 0;
 	wait->func = wake_page_function;
@@ -1113,6 +1121,9 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 
 	finish_wait(q, wait);
 
+	if (thrashing)
+		delayacct_thrashing_end();
+
 	/*
 	 * A signal could leave PageWaiters set. Clearing it here if
 	 * !waitqueue_active would be possible (by open-coding finish_wait),
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 9f420d98b5fb..8cb504d30384 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -203,6 +203,8 @@ static void print_delayacct(struct taskstats *t)
 	       "SWAP  %15s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n"
 	       "RECLAIM  %12s%15s%15s\n"
+	       "      %15llu%15llu%15llums\n"
+	       "THRASHING%12s%15s%15s\n"
 	       "      %15llu%15llu%15llums\n",
 	       "count", "real total", "virtual total",
 	       "delay total", "delay average",
@@ -222,7 +224,11 @@ static void print_delayacct(struct taskstats *t)
 	       "count", "delay total", "delay average",
 	       (unsigned long long)t->freepages_count,
 	       (unsigned long long)t->freepages_delay_total,
-	       average_ms(t->freepages_delay_total, t->freepages_count));
+	       average_ms(t->freepages_delay_total, t->freepages_count),
+	       "count", "delay total", "delay average",
+	       (unsigned long long)t->thrashing_count,
+	       (unsigned long long)t->thrashing_delay_total,
+	       average_ms(t->thrashing_delay_total, t->thrashing_count));
 }
 
 static void task_context_switch_counts(struct taskstats *t)
-- 
cgit v1.2.3


From 8508cf3ffad4defa202b303e5b6379efc4cd9054 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 26 Oct 2018 15:06:11 -0700
Subject: sched: loadavg: consolidate LOAD_INT, LOAD_FRAC, CALC_LOAD

There are several definitions of those functions/macros in places that
mess with fixed-point load averages.  Provide an official version.

[akpm@linux-foundation.org: fix missed conversion in block/blk-iolatency.c]
Link: http://lkml.kernel.org/r/20180828172258.3185-5-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Suren Baghdasaryan <surenb@google.com>
Tested-by: Daniel Drake <drake@endlessm.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <jweiner@fb.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Enderborg <peter.enderborg@sony.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/cell/cpufreq_spudemand.c |  2 +-
 arch/powerpc/platforms/cell/spufs/sched.c       |  9 +++------
 arch/s390/appldata/appldata_os.c                |  4 ----
 block/blk-iolatency.c                           |  8 +++++---
 drivers/cpuidle/governors/menu.c                |  4 ----
 fs/proc/loadavg.c                               |  3 ---
 include/linux/sched/loadavg.h                   | 21 +++++++++++++++++----
 kernel/debug/kdb/kdb_main.c                     |  7 +------
 kernel/sched/loadavg.c                          | 15 ---------------
 9 files changed, 27 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/cpufreq_spudemand.c b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
index 882944c36ef5..5d8e8b6bb1cc 100644
--- a/arch/powerpc/platforms/cell/cpufreq_spudemand.c
+++ b/arch/powerpc/platforms/cell/cpufreq_spudemand.c
@@ -49,7 +49,7 @@ static int calc_freq(struct spu_gov_info_struct *info)
 	cpu = info->policy->cpu;
 	busy_spus = atomic_read(&cbe_spu_info[cpu_to_node(cpu)].busy_spus);
 
-	CALC_LOAD(info->busy_spus, EXP, busy_spus * FIXED_1);
+	info->busy_spus = calc_load(info->busy_spus, EXP, busy_spus * FIXED_1);
 	pr_debug("cpu %d: busy_spus=%d, info->busy_spus=%ld\n",
 			cpu, busy_spus, info->busy_spus);
 
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index c9ef3c532169..9fcccb4490b9 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -987,9 +987,9 @@ static void spu_calc_load(void)
 	unsigned long active_tasks; /* fixed-point */
 
 	active_tasks = count_active_contexts() * FIXED_1;
-	CALC_LOAD(spu_avenrun[0], EXP_1, active_tasks);
-	CALC_LOAD(spu_avenrun[1], EXP_5, active_tasks);
-	CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks);
+	spu_avenrun[0] = calc_load(spu_avenrun[0], EXP_1, active_tasks);
+	spu_avenrun[1] = calc_load(spu_avenrun[1], EXP_5, active_tasks);
+	spu_avenrun[2] = calc_load(spu_avenrun[2], EXP_15, active_tasks);
 }
 
 static void spusched_wake(struct timer_list *unused)
@@ -1071,9 +1071,6 @@ void spuctx_switch_state(struct spu_context *ctx,
 	}
 }
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static int show_spu_loadavg(struct seq_file *s, void *private)
 {
 	int a, b, c;
diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c
index 433a994b1a89..54f375627532 100644
--- a/arch/s390/appldata/appldata_os.c
+++ b/arch/s390/appldata/appldata_os.c
@@ -25,10 +25,6 @@
 
 #include "appldata.h"
 
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 /*
  * OS data
  *
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 35c48d7b8f78..28f80d227528 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -153,7 +153,7 @@ struct iolatency_grp {
 #define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC
 /*
  * These are the constants used to fake the fixed-point moving average
- * calculation just like load average.  The call to CALC_LOAD folds
+ * calculation just like load average.  The call to calc_load() folds
  * (FIXED_1 (2048) - exp_factor) * new_sample into lat_avg.  The sampling
  * window size is bucketed to try to approximately calculate average
  * latency such that 1/exp (decay rate) is [1 min, 2.5 min) when windows
@@ -248,7 +248,7 @@ static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
 		return;
 
 	/*
-	 * CALC_LOAD takes in a number stored in fixed point representation.
+	 * calc_load() takes in a number stored in fixed point representation.
 	 * Because we are using this for IO time in ns, the values stored
 	 * are significantly larger than the FIXED_1 denominator (2048).
 	 * Therefore, rounding errors in the calculation are negligible and
@@ -257,7 +257,9 @@ static inline void iolat_update_total_lat_avg(struct iolatency_grp *iolat,
 	exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1,
 			div64_u64(iolat->cur_win_nsec,
 				  BLKIOLATENCY_EXP_BUCKET_SIZE));
-	CALC_LOAD(iolat->lat_avg, iolatency_exp_factors[exp_idx], stat->rqs.mean);
+	iolat->lat_avg = calc_load(iolat->lat_avg,
+				   iolatency_exp_factors[exp_idx],
+				   stat->rqs.mean);
 }
 
 static inline bool iolatency_may_queue(struct iolatency_grp *iolat,
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 575a68f31761..71979605246e 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -130,10 +130,6 @@ struct menu_device {
 	int		interval_ptr;
 };
 
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static inline int get_loadavg(unsigned long load)
 {
 	return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c
index d06694757201..8468baee951d 100644
--- a/fs/proc/loadavg.c
+++ b/fs/proc/loadavg.c
@@ -10,9 +10,6 @@
 #include <linux/seqlock.h>
 #include <linux/time.h>
 
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
-
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
 	unsigned long avnrun[3];
diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index 80bc84ba5d2a..cc9cc62bb1f8 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h
@@ -22,10 +22,23 @@ extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
 #define EXP_5		2014		/* 1/exp(5sec/5min) */
 #define EXP_15		2037		/* 1/exp(5sec/15min) */
 
-#define CALC_LOAD(load,exp,n) \
-	load *= exp; \
-	load += n*(FIXED_1-exp); \
-	load >>= FSHIFT;
+/*
+ * a1 = a0 * e + a * (1 - e)
+ */
+static inline unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
+{
+	unsigned long newload;
+
+	newload = load * exp + active * (FIXED_1 - exp);
+	if (active >= load)
+		newload += FIXED_1-1;
+
+	return newload / FIXED_1;
+}
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
 extern void calc_global_load(unsigned long ticks);
 
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 2ddfce8f1e8f..bb4fe4e1a601 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2556,16 +2556,11 @@ static int kdb_summary(int argc, const char **argv)
 	}
 	kdb_printf("%02ld:%02ld\n", val.uptime/(60*60), (val.uptime/60)%60);
 
-	/* lifted from fs/proc/proc_misc.c::loadavg_read_proc() */
-
-#define LOAD_INT(x) ((x) >> FSHIFT)
-#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 	kdb_printf("load avg   %ld.%02ld %ld.%02ld %ld.%02ld\n",
 		LOAD_INT(val.loads[0]), LOAD_FRAC(val.loads[0]),
 		LOAD_INT(val.loads[1]), LOAD_FRAC(val.loads[1]),
 		LOAD_INT(val.loads[2]), LOAD_FRAC(val.loads[2]));
-#undef LOAD_INT
-#undef LOAD_FRAC
+
 	/* Display in kilobytes */
 #define K(x) ((x) << (PAGE_SHIFT - 10))
 	kdb_printf("\nMemTotal:       %8lu kB\nMemFree:        %8lu kB\n"
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index a171c1258109..54fbdfb2d86c 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -91,21 +91,6 @@ long calc_load_fold_active(struct rq *this_rq, long adjust)
 	return delta;
 }
 
-/*
- * a1 = a0 * e + a * (1 - e)
- */
-static unsigned long
-calc_load(unsigned long load, unsigned long exp, unsigned long active)
-{
-	unsigned long newload;
-
-	newload = load * exp + active * (FIXED_1 - exp);
-	if (active >= load)
-		newload += FIXED_1-1;
-
-	return newload / FIXED_1;
-}
-
 #ifdef CONFIG_NO_HZ_COMMON
 /*
  * Handle NO_HZ for the global load-average.
-- 
cgit v1.2.3


From 5c54f5b9edb1aa2eabbb1091c458f1b6776a1896 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 26 Oct 2018 15:06:16 -0700
Subject: sched: loadavg: make calc_load_n() public

It's going to be used in a later patch. Keep the churn separate.

Link: http://lkml.kernel.org/r/20180828172258.3185-6-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Suren Baghdasaryan <surenb@google.com>
Tested-by: Daniel Drake <drake@endlessm.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <jweiner@fb.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Enderborg <peter.enderborg@sony.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/loadavg.h |   3 +
 kernel/sched/loadavg.c        | 138 +++++++++++++++++++++---------------------
 2 files changed, 72 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched/loadavg.h b/include/linux/sched/loadavg.h
index cc9cc62bb1f8..4859bea47a7b 100644
--- a/include/linux/sched/loadavg.h
+++ b/include/linux/sched/loadavg.h
@@ -37,6 +37,9 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
 	return newload / FIXED_1;
 }
 
+extern unsigned long calc_load_n(unsigned long load, unsigned long exp,
+				 unsigned long active, unsigned int n);
+
 #define LOAD_INT(x) ((x) >> FSHIFT)
 #define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
 
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 54fbdfb2d86c..28a516575c18 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -91,6 +91,75 @@ long calc_load_fold_active(struct rq *this_rq, long adjust)
 	return delta;
 }
 
+/**
+ * fixed_power_int - compute: x^n, in O(log n) time
+ *
+ * @x:         base of the power
+ * @frac_bits: fractional bits of @x
+ * @n:         power to raise @x to.
+ *
+ * By exploiting the relation between the definition of the natural power
+ * function: x^n := x*x*...*x (x multiplied by itself for n times), and
+ * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
+ * (where: n_i \elem {0, 1}, the binary vector representing n),
+ * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
+ * of course trivially computable in O(log_2 n), the length of our binary
+ * vector.
+ */
+static unsigned long
+fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
+{
+	unsigned long result = 1UL << frac_bits;
+
+	if (n) {
+		for (;;) {
+			if (n & 1) {
+				result *= x;
+				result += 1UL << (frac_bits - 1);
+				result >>= frac_bits;
+			}
+			n >>= 1;
+			if (!n)
+				break;
+			x *= x;
+			x += 1UL << (frac_bits - 1);
+			x >>= frac_bits;
+		}
+	}
+
+	return result;
+}
+
+/*
+ * a1 = a0 * e + a * (1 - e)
+ *
+ * a2 = a1 * e + a * (1 - e)
+ *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
+ *    = a0 * e^2 + a * (1 - e) * (1 + e)
+ *
+ * a3 = a2 * e + a * (1 - e)
+ *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
+ *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
+ *
+ *  ...
+ *
+ * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
+ *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
+ *    = a0 * e^n + a * (1 - e^n)
+ *
+ * [1] application of the geometric series:
+ *
+ *              n         1 - x^(n+1)
+ *     S_n := \Sum x^i = -------------
+ *             i=0          1 - x
+ */
+unsigned long
+calc_load_n(unsigned long load, unsigned long exp,
+	    unsigned long active, unsigned int n)
+{
+	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
+}
+
 #ifdef CONFIG_NO_HZ_COMMON
 /*
  * Handle NO_HZ for the global load-average.
@@ -210,75 +279,6 @@ static long calc_load_nohz_fold(void)
 	return delta;
 }
 
-/**
- * fixed_power_int - compute: x^n, in O(log n) time
- *
- * @x:         base of the power
- * @frac_bits: fractional bits of @x
- * @n:         power to raise @x to.
- *
- * By exploiting the relation between the definition of the natural power
- * function: x^n := x*x*...*x (x multiplied by itself for n times), and
- * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
- * (where: n_i \elem {0, 1}, the binary vector representing n),
- * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
- * of course trivially computable in O(log_2 n), the length of our binary
- * vector.
- */
-static unsigned long
-fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
-{
-	unsigned long result = 1UL << frac_bits;
-
-	if (n) {
-		for (;;) {
-			if (n & 1) {
-				result *= x;
-				result += 1UL << (frac_bits - 1);
-				result >>= frac_bits;
-			}
-			n >>= 1;
-			if (!n)
-				break;
-			x *= x;
-			x += 1UL << (frac_bits - 1);
-			x >>= frac_bits;
-		}
-	}
-
-	return result;
-}
-
-/*
- * a1 = a0 * e + a * (1 - e)
- *
- * a2 = a1 * e + a * (1 - e)
- *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
- *    = a0 * e^2 + a * (1 - e) * (1 + e)
- *
- * a3 = a2 * e + a * (1 - e)
- *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
- *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
- *
- *  ...
- *
- * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
- *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
- *    = a0 * e^n + a * (1 - e^n)
- *
- * [1] application of the geometric series:
- *
- *              n         1 - x^(n+1)
- *     S_n := \Sum x^i = -------------
- *             i=0          1 - x
- */
-static unsigned long
-calc_load_n(unsigned long load, unsigned long exp,
-	    unsigned long active, unsigned int n)
-{
-	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
-}
-
 /*
  * NO_HZ can leave us missing all per-CPU ticks calling
  * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
-- 
cgit v1.2.3


From eb414681d5a07d28d2ff90dc05f69ec6b232ebd2 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 26 Oct 2018 15:06:27 -0700
Subject: psi: pressure stall information for CPU, memory, and IO

When systems are overcommitted and resources become contended, it's hard
to tell exactly the impact this has on workload productivity, or how close
the system is to lockups and OOM kills.  In particular, when machines work
multiple jobs concurrently, the impact of overcommit in terms of latency
and throughput on the individual job can be enormous.

In order to maximize hardware utilization without sacrificing individual
job health or risk complete machine lockups, this patch implements a way
to quantify resource pressure in the system.

A kernel built with CONFIG_PSI=y creates files in /proc/pressure/ that
expose the percentage of time the system is stalled on CPU, memory, or IO,
respectively.  Stall states are aggregate versions of the per-task delay
accounting delays:

       cpu: some tasks are runnable but not executing on a CPU
       memory: tasks are reclaiming, or waiting for swapin or thrashing cache
       io: tasks are waiting for io completions

These percentages of walltime can be thought of as pressure percentages,
and they give a general sense of system health and productivity loss
incurred by resource overcommit.  They can also indicate when the system
is approaching lockup scenarios and OOMs.

To do this, psi keeps track of the task states associated with each CPU
and samples the time they spend in stall states.  Every 2 seconds, the
samples are averaged across CPUs - weighted by the CPUs' non-idle time to
eliminate artifacts from unused CPUs - and translated into percentages of
walltime.  A running average of those percentages is maintained over 10s,
1m, and 5m periods (similar to the loadaverage).

[hannes@cmpxchg.org: doc fixlet, per Randy]
  Link: http://lkml.kernel.org/r/20180828205625.GA14030@cmpxchg.org
[hannes@cmpxchg.org: code optimization]
  Link: http://lkml.kernel.org/r/20180907175015.GA8479@cmpxchg.org
[hannes@cmpxchg.org: rename psi_clock() to psi_update_work(), per Peter]
  Link: http://lkml.kernel.org/r/20180907145404.GB11088@cmpxchg.org
[hannes@cmpxchg.org: fix build]
  Link: http://lkml.kernel.org/r/20180913014222.GA2370@cmpxchg.org
Link: http://lkml.kernel.org/r/20180828172258.3185-9-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Daniel Drake <drake@endlessm.com>
Tested-by: Suren Baghdasaryan <surenb@google.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <jweiner@fb.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Enderborg <peter.enderborg@sony.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/psi.txt |  64 ++++
 include/linux/psi.h              |  28 ++
 include/linux/psi_types.h        |  92 ++++++
 include/linux/sched.h            |  10 +
 init/Kconfig                     |  15 +
 kernel/fork.c                    |   4 +
 kernel/sched/Makefile            |   1 +
 kernel/sched/core.c              |  12 +-
 kernel/sched/psi.c               | 657 +++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h             |   2 +
 kernel/sched/stats.h             |  86 +++++
 mm/compaction.c                  |   5 +
 mm/filemap.c                     |  15 +-
 mm/page_alloc.c                  |   9 +
 mm/vmscan.c                      |   9 +
 15 files changed, 1003 insertions(+), 6 deletions(-)
 create mode 100644 Documentation/accounting/psi.txt
 create mode 100644 include/linux/psi.h
 create mode 100644 include/linux/psi_types.h
 create mode 100644 kernel/sched/psi.c

(limited to 'include/linux')

diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
new file mode 100644
index 000000000000..3753a82f1cf5
--- /dev/null
+++ b/Documentation/accounting/psi.txt
@@ -0,0 +1,64 @@
+================================
+PSI - Pressure Stall Information
+================================
+
+:Date: April, 2018
+:Author: Johannes Weiner <hannes@cmpxchg.org>
+
+When CPU, memory or IO devices are contended, workloads experience
+latency spikes, throughput losses, and run the risk of OOM kills.
+
+Without an accurate measure of such contention, users are forced to
+either play it safe and under-utilize their hardware resources, or
+roll the dice and frequently suffer the disruptions resulting from
+excessive overcommit.
+
+The psi feature identifies and quantifies the disruptions caused by
+such resource crunches and the time impact it has on complex workloads
+or even entire systems.
+
+Having an accurate measure of productivity losses caused by resource
+scarcity aids users in sizing workloads to hardware--or provisioning
+hardware according to workload demand.
+
+As psi aggregates this information in realtime, systems can be managed
+dynamically using techniques such as load shedding, migrating jobs to
+other systems or data centers, or strategically pausing or killing low
+priority or restartable batch jobs.
+
+This allows maximizing hardware utilization without sacrificing
+workload health or risking major disruptions such as OOM kills.
+
+Pressure interface
+==================
+
+Pressure information for each resource is exported through the
+respective file in /proc/pressure/ -- cpu, memory, and io.
+
+The format for CPU is as such:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+and for memory and IO:
+
+some avg10=0.00 avg60=0.00 avg300=0.00 total=0
+full avg10=0.00 avg60=0.00 avg300=0.00 total=0
+
+The "some" line indicates the share of time in which at least some
+tasks are stalled on a given resource.
+
+The "full" line indicates the share of time in which all non-idle
+tasks are stalled on a given resource simultaneously. In this state
+actual CPU cycles are going to waste, and a workload that spends
+extended time in this state is considered to be thrashing. This has
+severe impact on performance, and it's useful to distinguish this
+situation from a state where some tasks are stalled but the CPU is
+still doing productive work. As such, time spent in this subset of the
+stall state is tracked separately and exported in the "full" averages.
+
+The ratios are tracked as recent trends over ten, sixty, and three
+hundred second windows, which gives insight into short term events as
+well as medium and long term trends. The total absolute stall time is
+tracked and exported as well, to allow detection of latency spikes
+which wouldn't necessarily make a dent in the time averages, or to
+average trends over custom time frames.
diff --git a/include/linux/psi.h b/include/linux/psi.h
new file mode 100644
index 000000000000..b0daf050de58
--- /dev/null
+++ b/include/linux/psi.h
@@ -0,0 +1,28 @@
+#ifndef _LINUX_PSI_H
+#define _LINUX_PSI_H
+
+#include <linux/psi_types.h>
+#include <linux/sched.h>
+
+#ifdef CONFIG_PSI
+
+extern bool psi_disabled;
+
+void psi_init(void);
+
+void psi_task_change(struct task_struct *task, int clear, int set);
+
+void psi_memstall_tick(struct task_struct *task, int cpu);
+void psi_memstall_enter(unsigned long *flags);
+void psi_memstall_leave(unsigned long *flags);
+
+#else /* CONFIG_PSI */
+
+static inline void psi_init(void) {}
+
+static inline void psi_memstall_enter(unsigned long *flags) {}
+static inline void psi_memstall_leave(unsigned long *flags) {}
+
+#endif /* CONFIG_PSI */
+
+#endif /* _LINUX_PSI_H */
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
new file mode 100644
index 000000000000..2cf422db5d18
--- /dev/null
+++ b/include/linux/psi_types.h
@@ -0,0 +1,92 @@
+#ifndef _LINUX_PSI_TYPES_H
+#define _LINUX_PSI_TYPES_H
+
+#include <linux/seqlock.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_PSI
+
+/* Tracked task states */
+enum psi_task_count {
+	NR_IOWAIT,
+	NR_MEMSTALL,
+	NR_RUNNING,
+	NR_PSI_TASK_COUNTS,
+};
+
+/* Task state bitmasks */
+#define TSK_IOWAIT	(1 << NR_IOWAIT)
+#define TSK_MEMSTALL	(1 << NR_MEMSTALL)
+#define TSK_RUNNING	(1 << NR_RUNNING)
+
+/* Resources that workloads could be stalled on */
+enum psi_res {
+	PSI_IO,
+	PSI_MEM,
+	PSI_CPU,
+	NR_PSI_RESOURCES,
+};
+
+/*
+ * Pressure states for each resource:
+ *
+ * SOME: Stalled tasks & working tasks
+ * FULL: Stalled tasks & no working tasks
+ */
+enum psi_states {
+	PSI_IO_SOME,
+	PSI_IO_FULL,
+	PSI_MEM_SOME,
+	PSI_MEM_FULL,
+	PSI_CPU_SOME,
+	/* Only per-CPU, to weigh the CPU in the global average: */
+	PSI_NONIDLE,
+	NR_PSI_STATES,
+};
+
+struct psi_group_cpu {
+	/* 1st cacheline updated by the scheduler */
+
+	/* Aggregator needs to know of concurrent changes */
+	seqcount_t seq ____cacheline_aligned_in_smp;
+
+	/* States of the tasks belonging to this group */
+	unsigned int tasks[NR_PSI_TASK_COUNTS];
+
+	/* Period time sampling buckets for each state of interest (ns) */
+	u32 times[NR_PSI_STATES];
+
+	/* Time of last task change in this group (rq_clock) */
+	u64 state_start;
+
+	/* 2nd cacheline updated by the aggregator */
+
+	/* Delta detection against the sampling buckets */
+	u32 times_prev[NR_PSI_STATES] ____cacheline_aligned_in_smp;
+};
+
+struct psi_group {
+	/* Protects data updated during an aggregation */
+	struct mutex stat_lock;
+
+	/* Per-cpu task state & time tracking */
+	struct psi_group_cpu __percpu *pcpu;
+
+	/* Periodic aggregation state */
+	u64 total_prev[NR_PSI_STATES - 1];
+	u64 last_update;
+	u64 next_update;
+	struct delayed_work clock_work;
+
+	/* Total stall times and sampled pressure averages */
+	u64 total[NR_PSI_STATES - 1];
+	unsigned long avg[NR_PSI_STATES - 1][3];
+};
+
+#else /* CONFIG_PSI */
+
+struct psi_group { };
+
+#endif /* CONFIG_PSI */
+
+#endif /* _LINUX_PSI_TYPES_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index adfb3f9a7597..b8fcc6b3080c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -25,6 +25,7 @@
 #include <linux/latencytop.h>
 #include <linux/sched/prio.h>
 #include <linux/signal_types.h>
+#include <linux/psi_types.h>
 #include <linux/mm_types_task.h>
 #include <linux/task_io_accounting.h>
 #include <linux/rseq.h>
@@ -706,6 +707,10 @@ struct task_struct {
 	unsigned			sched_contributes_to_load:1;
 	unsigned			sched_migrated:1;
 	unsigned			sched_remote_wakeup:1;
+#ifdef CONFIG_PSI
+	unsigned			sched_psi_wake_requeue:1;
+#endif
+
 	/* Force alignment to the next boundary: */
 	unsigned			:0;
 
@@ -965,6 +970,10 @@ struct task_struct {
 	kernel_siginfo_t		*last_siginfo;
 
 	struct task_io_accounting	ioac;
+#ifdef CONFIG_PSI
+	/* Pressure stall state */
+	unsigned int			psi_flags;
+#endif
 #ifdef CONFIG_TASK_XACCT
 	/* Accumulated RSS usage: */
 	u64				acct_rss_mem1;
@@ -1391,6 +1400,7 @@ extern struct pid *cad_pid;
 #define PF_KTHREAD		0x00200000	/* I am a kernel thread */
 #define PF_RANDOMIZE		0x00400000	/* Randomize virtual address space */
 #define PF_SWAPWRITE		0x00800000	/* Allowed to write to swap */
+#define PF_MEMSTALL		0x01000000	/* Stalled due to lack of memory */
 #define PF_NO_SETAFFINITY	0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
 #define PF_MCE_EARLY		0x08000000      /* Early kill for mce process policy */
 #define PF_MUTEX_TESTER		0x20000000	/* Thread belongs to the rt mutex tester */
diff --git a/init/Kconfig b/init/Kconfig
index 317d5ccb5191..26e639df5517 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -490,6 +490,21 @@ config TASK_IO_ACCOUNTING
 
 	  Say N if unsure.
 
+config PSI
+	bool "Pressure stall information tracking"
+	help
+	  Collect metrics that indicate how overcommitted the CPU, memory,
+	  and IO capacity are in the system.
+
+	  If you say Y here, the kernel will create /proc/pressure/ with the
+	  pressure statistics files cpu, memory, and io. These will indicate
+	  the share of walltime in which some or all tasks in the system are
+	  delayed due to contention of the respective resource.
+
+	  For more details see Documentation/accounting/psi.txt.
+
+	  Say N if unsure.
+
 endmenu # "CPU/Task time and stats accounting"
 
 config CPU_ISOLATION
diff --git a/kernel/fork.c b/kernel/fork.c
index 3c719fec46c5..8f82a3bdcb8f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1822,6 +1822,10 @@ static __latent_entropy struct task_struct *copy_process(
 
 	p->default_timer_slack_ns = current->timer_slack_ns;
 
+#ifdef CONFIG_PSI
+	p->psi_flags = 0;
+#endif
+
 	task_io_accounting_init(&p->ioac);
 	acct_clear_integrals(p);
 
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index 7fe183404c38..21fb5a5662b5 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -29,3 +29,4 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o
 obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
 obj-$(CONFIG_MEMBARRIER) += membarrier.o
 obj-$(CONFIG_CPU_ISOLATION) += isolation.o
+obj-$(CONFIG_PSI) += psi.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f3efef387797..fd2fce8a001b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -722,8 +722,10 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 	if (!(flags & ENQUEUE_NOCLOCK))
 		update_rq_clock(rq);
 
-	if (!(flags & ENQUEUE_RESTORE))
+	if (!(flags & ENQUEUE_RESTORE)) {
 		sched_info_queued(rq, p);
+		psi_enqueue(p, flags & ENQUEUE_WAKEUP);
+	}
 
 	p->sched_class->enqueue_task(rq, p, flags);
 }
@@ -733,8 +735,10 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 	if (!(flags & DEQUEUE_NOCLOCK))
 		update_rq_clock(rq);
 
-	if (!(flags & DEQUEUE_SAVE))
+	if (!(flags & DEQUEUE_SAVE)) {
 		sched_info_dequeued(rq, p);
+		psi_dequeue(p, flags & DEQUEUE_SLEEP);
+	}
 
 	p->sched_class->dequeue_task(rq, p, flags);
 }
@@ -2037,6 +2041,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 	cpu = select_task_rq(p, p->wake_cpu, SD_BALANCE_WAKE, wake_flags);
 	if (task_cpu(p) != cpu) {
 		wake_flags |= WF_MIGRATED;
+		psi_ttwu_dequeue(p);
 		set_task_cpu(p, cpu);
 	}
 
@@ -3051,6 +3056,7 @@ void scheduler_tick(void)
 	curr->sched_class->task_tick(rq, curr, 0);
 	cpu_load_update_active(rq);
 	calc_global_load_tick(rq);
+	psi_task_tick(rq);
 
 	rq_unlock(rq, &rf);
 
@@ -6067,6 +6073,8 @@ void __init sched_init(void)
 
 	init_schedstats();
 
+	psi_init();
+
 	scheduler_running = 1;
 }
 
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
new file mode 100644
index 000000000000..595414599b98
--- /dev/null
+++ b/kernel/sched/psi.c
@@ -0,0 +1,657 @@
+/*
+ * Pressure stall information for CPU, memory and IO
+ *
+ * Copyright (c) 2018 Facebook, Inc.
+ * Author: Johannes Weiner <hannes@cmpxchg.org>
+ *
+ * When CPU, memory and IO are contended, tasks experience delays that
+ * reduce throughput and introduce latencies into the workload. Memory
+ * and IO contention, in addition, can cause a full loss of forward
+ * progress in which the CPU goes idle.
+ *
+ * This code aggregates individual task delays into resource pressure
+ * metrics that indicate problems with both workload health and
+ * resource utilization.
+ *
+ *			Model
+ *
+ * The time in which a task can execute on a CPU is our baseline for
+ * productivity. Pressure expresses the amount of time in which this
+ * potential cannot be realized due to resource contention.
+ *
+ * This concept of productivity has two components: the workload and
+ * the CPU. To measure the impact of pressure on both, we define two
+ * contention states for a resource: SOME and FULL.
+ *
+ * In the SOME state of a given resource, one or more tasks are
+ * delayed on that resource. This affects the workload's ability to
+ * perform work, but the CPU may still be executing other tasks.
+ *
+ * In the FULL state of a given resource, all non-idle tasks are
+ * delayed on that resource such that nobody is advancing and the CPU
+ * goes idle. This leaves both workload and CPU unproductive.
+ *
+ * (Naturally, the FULL state doesn't exist for the CPU resource.)
+ *
+ *	SOME = nr_delayed_tasks != 0
+ *	FULL = nr_delayed_tasks != 0 && nr_running_tasks == 0
+ *
+ * The percentage of wallclock time spent in those compound stall
+ * states gives pressure numbers between 0 and 100 for each resource,
+ * where the SOME percentage indicates workload slowdowns and the FULL
+ * percentage indicates reduced CPU utilization:
+ *
+ *	%SOME = time(SOME) / period
+ *	%FULL = time(FULL) / period
+ *
+ *			Multiple CPUs
+ *
+ * The more tasks and available CPUs there are, the more work can be
+ * performed concurrently. This means that the potential that can go
+ * unrealized due to resource contention *also* scales with non-idle
+ * tasks and CPUs.
+ *
+ * Consider a scenario where 257 number crunching tasks are trying to
+ * run concurrently on 256 CPUs. If we simply aggregated the task
+ * states, we would have to conclude a CPU SOME pressure number of
+ * 100%, since *somebody* is waiting on a runqueue at all
+ * times. However, that is clearly not the amount of contention the
+ * workload is experiencing: only one out of 256 possible exceution
+ * threads will be contended at any given time, or about 0.4%.
+ *
+ * Conversely, consider a scenario of 4 tasks and 4 CPUs where at any
+ * given time *one* of the tasks is delayed due to a lack of memory.
+ * Again, looking purely at the task state would yield a memory FULL
+ * pressure number of 0%, since *somebody* is always making forward
+ * progress. But again this wouldn't capture the amount of execution
+ * potential lost, which is 1 out of 4 CPUs, or 25%.
+ *
+ * To calculate wasted potential (pressure) with multiple processors,
+ * we have to base our calculation on the number of non-idle tasks in
+ * conjunction with the number of available CPUs, which is the number
+ * of potential execution threads. SOME becomes then the proportion of
+ * delayed tasks to possibe threads, and FULL is the share of possible
+ * threads that are unproductive due to delays:
+ *
+ *	threads = min(nr_nonidle_tasks, nr_cpus)
+ *	   SOME = min(nr_delayed_tasks / threads, 1)
+ *	   FULL = (threads - min(nr_running_tasks, threads)) / threads
+ *
+ * For the 257 number crunchers on 256 CPUs, this yields:
+ *
+ *	threads = min(257, 256)
+ *	   SOME = min(1 / 256, 1)             = 0.4%
+ *	   FULL = (256 - min(257, 256)) / 256 = 0%
+ *
+ * For the 1 out of 4 memory-delayed tasks, this yields:
+ *
+ *	threads = min(4, 4)
+ *	   SOME = min(1 / 4, 1)               = 25%
+ *	   FULL = (4 - min(3, 4)) / 4         = 25%
+ *
+ * [ Substitute nr_cpus with 1, and you can see that it's a natural
+ *   extension of the single-CPU model. ]
+ *
+ *			Implementation
+ *
+ * To assess the precise time spent in each such state, we would have
+ * to freeze the system on task changes and start/stop the state
+ * clocks accordingly. Obviously that doesn't scale in practice.
+ *
+ * Because the scheduler aims to distribute the compute load evenly
+ * among the available CPUs, we can track task state locally to each
+ * CPU and, at much lower frequency, extrapolate the global state for
+ * the cumulative stall times and the running averages.
+ *
+ * For each runqueue, we track:
+ *
+ *	   tSOME[cpu] = time(nr_delayed_tasks[cpu] != 0)
+ *	   tFULL[cpu] = time(nr_delayed_tasks[cpu] && !nr_running_tasks[cpu])
+ *	tNONIDLE[cpu] = time(nr_nonidle_tasks[cpu] != 0)
+ *
+ * and then periodically aggregate:
+ *
+ *	tNONIDLE = sum(tNONIDLE[i])
+ *
+ *	   tSOME = sum(tSOME[i] * tNONIDLE[i]) / tNONIDLE
+ *	   tFULL = sum(tFULL[i] * tNONIDLE[i]) / tNONIDLE
+ *
+ *	   %SOME = tSOME / period
+ *	   %FULL = tFULL / period
+ *
+ * This gives us an approximation of pressure that is practical
+ * cost-wise, yet way more sensitive and accurate than periodic
+ * sampling of the aggregate task states would be.
+ */
+
+#include <linux/sched/loadavg.h>
+#include <linux/seq_file.h>
+#include <linux/proc_fs.h>
+#include <linux/seqlock.h>
+#include <linux/cgroup.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/psi.h>
+#include "sched.h"
+
+static int psi_bug __read_mostly;
+
+bool psi_disabled __read_mostly;
+core_param(psi_disabled, psi_disabled, bool, 0644);
+
+/* Running averages - we need to be higher-res than loadavg */
+#define PSI_FREQ	(2*HZ+1)	/* 2 sec intervals */
+#define EXP_10s		1677		/* 1/exp(2s/10s) as fixed-point */
+#define EXP_60s		1981		/* 1/exp(2s/60s) */
+#define EXP_300s	2034		/* 1/exp(2s/300s) */
+
+/* Sampling frequency in nanoseconds */
+static u64 psi_period __read_mostly;
+
+/* System-level pressure and stall tracking */
+static DEFINE_PER_CPU(struct psi_group_cpu, system_group_pcpu);
+static struct psi_group psi_system = {
+	.pcpu = &system_group_pcpu,
+};
+
+static void psi_update_work(struct work_struct *work);
+
+static void group_init(struct psi_group *group)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
+	group->next_update = sched_clock() + psi_period;
+	INIT_DELAYED_WORK(&group->clock_work, psi_update_work);
+	mutex_init(&group->stat_lock);
+}
+
+void __init psi_init(void)
+{
+	if (psi_disabled)
+		return;
+
+	psi_period = jiffies_to_nsecs(PSI_FREQ);
+	group_init(&psi_system);
+}
+
+static bool test_state(unsigned int *tasks, enum psi_states state)
+{
+	switch (state) {
+	case PSI_IO_SOME:
+		return tasks[NR_IOWAIT];
+	case PSI_IO_FULL:
+		return tasks[NR_IOWAIT] && !tasks[NR_RUNNING];
+	case PSI_MEM_SOME:
+		return tasks[NR_MEMSTALL];
+	case PSI_MEM_FULL:
+		return tasks[NR_MEMSTALL] && !tasks[NR_RUNNING];
+	case PSI_CPU_SOME:
+		return tasks[NR_RUNNING] > 1;
+	case PSI_NONIDLE:
+		return tasks[NR_IOWAIT] || tasks[NR_MEMSTALL] ||
+			tasks[NR_RUNNING];
+	default:
+		return false;
+	}
+}
+
+static void get_recent_times(struct psi_group *group, int cpu, u32 *times)
+{
+	struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
+	unsigned int tasks[NR_PSI_TASK_COUNTS];
+	u64 now, state_start;
+	unsigned int seq;
+	int s;
+
+	/* Snapshot a coherent view of the CPU state */
+	do {
+		seq = read_seqcount_begin(&groupc->seq);
+		now = cpu_clock(cpu);
+		memcpy(times, groupc->times, sizeof(groupc->times));
+		memcpy(tasks, groupc->tasks, sizeof(groupc->tasks));
+		state_start = groupc->state_start;
+	} while (read_seqcount_retry(&groupc->seq, seq));
+
+	/* Calculate state time deltas against the previous snapshot */
+	for (s = 0; s < NR_PSI_STATES; s++) {
+		u32 delta;
+		/*
+		 * In addition to already concluded states, we also
+		 * incorporate currently active states on the CPU,
+		 * since states may last for many sampling periods.
+		 *
+		 * This way we keep our delta sampling buckets small
+		 * (u32) and our reported pressure close to what's
+		 * actually happening.
+		 */
+		if (test_state(tasks, s))
+			times[s] += now - state_start;
+
+		delta = times[s] - groupc->times_prev[s];
+		groupc->times_prev[s] = times[s];
+
+		times[s] = delta;
+	}
+}
+
+static void calc_avgs(unsigned long avg[3], int missed_periods,
+		      u64 time, u64 period)
+{
+	unsigned long pct;
+
+	/* Fill in zeroes for periods of no activity */
+	if (missed_periods) {
+		avg[0] = calc_load_n(avg[0], EXP_10s, 0, missed_periods);
+		avg[1] = calc_load_n(avg[1], EXP_60s, 0, missed_periods);
+		avg[2] = calc_load_n(avg[2], EXP_300s, 0, missed_periods);
+	}
+
+	/* Sample the most recent active period */
+	pct = div_u64(time * 100, period);
+	pct *= FIXED_1;
+	avg[0] = calc_load(avg[0], EXP_10s, pct);
+	avg[1] = calc_load(avg[1], EXP_60s, pct);
+	avg[2] = calc_load(avg[2], EXP_300s, pct);
+}
+
+static bool update_stats(struct psi_group *group)
+{
+	u64 deltas[NR_PSI_STATES - 1] = { 0, };
+	unsigned long missed_periods = 0;
+	unsigned long nonidle_total = 0;
+	u64 now, expires, period;
+	int cpu;
+	int s;
+
+	mutex_lock(&group->stat_lock);
+
+	/*
+	 * Collect the per-cpu time buckets and average them into a
+	 * single time sample that is normalized to wallclock time.
+	 *
+	 * For averaging, each CPU is weighted by its non-idle time in
+	 * the sampling period. This eliminates artifacts from uneven
+	 * loading, or even entirely idle CPUs.
+	 */
+	for_each_possible_cpu(cpu) {
+		u32 times[NR_PSI_STATES];
+		u32 nonidle;
+
+		get_recent_times(group, cpu, times);
+
+		nonidle = nsecs_to_jiffies(times[PSI_NONIDLE]);
+		nonidle_total += nonidle;
+
+		for (s = 0; s < PSI_NONIDLE; s++)
+			deltas[s] += (u64)times[s] * nonidle;
+	}
+
+	/*
+	 * Integrate the sample into the running statistics that are
+	 * reported to userspace: the cumulative stall times and the
+	 * decaying averages.
+	 *
+	 * Pressure percentages are sampled at PSI_FREQ. We might be
+	 * called more often when the user polls more frequently than
+	 * that; we might be called less often when there is no task
+	 * activity, thus no data, and clock ticks are sporadic. The
+	 * below handles both.
+	 */
+
+	/* total= */
+	for (s = 0; s < NR_PSI_STATES - 1; s++)
+		group->total[s] += div_u64(deltas[s], max(nonidle_total, 1UL));
+
+	/* avgX= */
+	now = sched_clock();
+	expires = group->next_update;
+	if (now < expires)
+		goto out;
+	if (now - expires > psi_period)
+		missed_periods = div_u64(now - expires, psi_period);
+
+	/*
+	 * The periodic clock tick can get delayed for various
+	 * reasons, especially on loaded systems. To avoid clock
+	 * drift, we schedule the clock in fixed psi_period intervals.
+	 * But the deltas we sample out of the per-cpu buckets above
+	 * are based on the actual time elapsing between clock ticks.
+	 */
+	group->next_update = expires + ((1 + missed_periods) * psi_period);
+	period = now - (group->last_update + (missed_periods * psi_period));
+	group->last_update = now;
+
+	for (s = 0; s < NR_PSI_STATES - 1; s++) {
+		u32 sample;
+
+		sample = group->total[s] - group->total_prev[s];
+		/*
+		 * Due to the lockless sampling of the time buckets,
+		 * recorded time deltas can slip into the next period,
+		 * which under full pressure can result in samples in
+		 * excess of the period length.
+		 *
+		 * We don't want to report non-sensical pressures in
+		 * excess of 100%, nor do we want to drop such events
+		 * on the floor. Instead we punt any overage into the
+		 * future until pressure subsides. By doing this we
+		 * don't underreport the occurring pressure curve, we
+		 * just report it delayed by one period length.
+		 *
+		 * The error isn't cumulative. As soon as another
+		 * delta slips from a period P to P+1, by definition
+		 * it frees up its time T in P.
+		 */
+		if (sample > period)
+			sample = period;
+		group->total_prev[s] += sample;
+		calc_avgs(group->avg[s], missed_periods, sample, period);
+	}
+out:
+	mutex_unlock(&group->stat_lock);
+	return nonidle_total;
+}
+
+static void psi_update_work(struct work_struct *work)
+{
+	struct delayed_work *dwork;
+	struct psi_group *group;
+	bool nonidle;
+
+	dwork = to_delayed_work(work);
+	group = container_of(dwork, struct psi_group, clock_work);
+
+	/*
+	 * If there is task activity, periodically fold the per-cpu
+	 * times and feed samples into the running averages. If things
+	 * are idle and there is no data to process, stop the clock.
+	 * Once restarted, we'll catch up the running averages in one
+	 * go - see calc_avgs() and missed_periods.
+	 */
+
+	nonidle = update_stats(group);
+
+	if (nonidle) {
+		unsigned long delay = 0;
+		u64 now;
+
+		now = sched_clock();
+		if (group->next_update > now)
+			delay = nsecs_to_jiffies(group->next_update - now) + 1;
+		schedule_delayed_work(dwork, delay);
+	}
+}
+
+static void record_times(struct psi_group_cpu *groupc, int cpu,
+			 bool memstall_tick)
+{
+	u32 delta;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	delta = now - groupc->state_start;
+	groupc->state_start = now;
+
+	if (test_state(groupc->tasks, PSI_IO_SOME)) {
+		groupc->times[PSI_IO_SOME] += delta;
+		if (test_state(groupc->tasks, PSI_IO_FULL))
+			groupc->times[PSI_IO_FULL] += delta;
+	}
+
+	if (test_state(groupc->tasks, PSI_MEM_SOME)) {
+		groupc->times[PSI_MEM_SOME] += delta;
+		if (test_state(groupc->tasks, PSI_MEM_FULL))
+			groupc->times[PSI_MEM_FULL] += delta;
+		else if (memstall_tick) {
+			u32 sample;
+			/*
+			 * Since we care about lost potential, a
+			 * memstall is FULL when there are no other
+			 * working tasks, but also when the CPU is
+			 * actively reclaiming and nothing productive
+			 * could run even if it were runnable.
+			 *
+			 * When the timer tick sees a reclaiming CPU,
+			 * regardless of runnable tasks, sample a FULL
+			 * tick (or less if it hasn't been a full tick
+			 * since the last state change).
+			 */
+			sample = min(delta, (u32)jiffies_to_nsecs(1));
+			groupc->times[PSI_MEM_FULL] += sample;
+		}
+	}
+
+	if (test_state(groupc->tasks, PSI_CPU_SOME))
+		groupc->times[PSI_CPU_SOME] += delta;
+
+	if (test_state(groupc->tasks, PSI_NONIDLE))
+		groupc->times[PSI_NONIDLE] += delta;
+}
+
+static void psi_group_change(struct psi_group *group, int cpu,
+			     unsigned int clear, unsigned int set)
+{
+	struct psi_group_cpu *groupc;
+	unsigned int t, m;
+
+	groupc = per_cpu_ptr(group->pcpu, cpu);
+
+	/*
+	 * First we assess the aggregate resource states this CPU's
+	 * tasks have been in since the last change, and account any
+	 * SOME and FULL time these may have resulted in.
+	 *
+	 * Then we update the task counts according to the state
+	 * change requested through the @clear and @set bits.
+	 */
+	write_seqcount_begin(&groupc->seq);
+
+	record_times(groupc, cpu, false);
+
+	for (t = 0, m = clear; m; m &= ~(1 << t), t++) {
+		if (!(m & (1 << t)))
+			continue;
+		if (groupc->tasks[t] == 0 && !psi_bug) {
+			printk_deferred(KERN_ERR "psi: task underflow! cpu=%d t=%d tasks=[%u %u %u] clear=%x set=%x\n",
+					cpu, t, groupc->tasks[0],
+					groupc->tasks[1], groupc->tasks[2],
+					clear, set);
+			psi_bug = 1;
+		}
+		groupc->tasks[t]--;
+	}
+
+	for (t = 0; set; set &= ~(1 << t), t++)
+		if (set & (1 << t))
+			groupc->tasks[t]++;
+
+	write_seqcount_end(&groupc->seq);
+
+	if (!delayed_work_pending(&group->clock_work))
+		schedule_delayed_work(&group->clock_work, PSI_FREQ);
+}
+
+void psi_task_change(struct task_struct *task, int clear, int set)
+{
+	int cpu = task_cpu(task);
+
+	if (!task->pid)
+		return;
+
+	if (((task->psi_flags & set) ||
+	     (task->psi_flags & clear) != clear) &&
+	    !psi_bug) {
+		printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n",
+				task->pid, task->comm, cpu,
+				task->psi_flags, clear, set);
+		psi_bug = 1;
+	}
+
+	task->psi_flags &= ~clear;
+	task->psi_flags |= set;
+
+	psi_group_change(&psi_system, cpu, clear, set);
+}
+
+void psi_memstall_tick(struct task_struct *task, int cpu)
+{
+	struct psi_group_cpu *groupc;
+
+	groupc = per_cpu_ptr(psi_system.pcpu, cpu);
+	write_seqcount_begin(&groupc->seq);
+	record_times(groupc, cpu, true);
+	write_seqcount_end(&groupc->seq);
+}
+
+/**
+ * psi_memstall_enter - mark the beginning of a memory stall section
+ * @flags: flags to handle nested sections
+ *
+ * Marks the calling task as being stalled due to a lack of memory,
+ * such as waiting for a refault or performing reclaim.
+ */
+void psi_memstall_enter(unsigned long *flags)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+
+	if (psi_disabled)
+		return;
+
+	*flags = current->flags & PF_MEMSTALL;
+	if (*flags)
+		return;
+	/*
+	 * PF_MEMSTALL setting & accounting needs to be atomic wrt
+	 * changes to the task's scheduling state, otherwise we can
+	 * race with CPU migration.
+	 */
+	rq = this_rq_lock_irq(&rf);
+
+	current->flags |= PF_MEMSTALL;
+	psi_task_change(current, 0, TSK_MEMSTALL);
+
+	rq_unlock_irq(rq, &rf);
+}
+
+/**
+ * psi_memstall_leave - mark the end of an memory stall section
+ * @flags: flags to handle nested memdelay sections
+ *
+ * Marks the calling task as no longer stalled due to lack of memory.
+ */
+void psi_memstall_leave(unsigned long *flags)
+{
+	struct rq_flags rf;
+	struct rq *rq;
+
+	if (psi_disabled)
+		return;
+
+	if (*flags)
+		return;
+	/*
+	 * PF_MEMSTALL clearing & accounting needs to be atomic wrt
+	 * changes to the task's scheduling state, otherwise we could
+	 * race with CPU migration.
+	 */
+	rq = this_rq_lock_irq(&rf);
+
+	current->flags &= ~PF_MEMSTALL;
+	psi_task_change(current, TSK_MEMSTALL, 0);
+
+	rq_unlock_irq(rq, &rf);
+}
+
+static int psi_show(struct seq_file *m, struct psi_group *group,
+		    enum psi_res res)
+{
+	int full;
+
+	if (psi_disabled)
+		return -EOPNOTSUPP;
+
+	update_stats(group);
+
+	for (full = 0; full < 2 - (res == PSI_CPU); full++) {
+		unsigned long avg[3];
+		u64 total;
+		int w;
+
+		for (w = 0; w < 3; w++)
+			avg[w] = group->avg[res * 2 + full][w];
+		total = div_u64(group->total[res * 2 + full], NSEC_PER_USEC);
+
+		seq_printf(m, "%s avg10=%lu.%02lu avg60=%lu.%02lu avg300=%lu.%02lu total=%llu\n",
+			   full ? "full" : "some",
+			   LOAD_INT(avg[0]), LOAD_FRAC(avg[0]),
+			   LOAD_INT(avg[1]), LOAD_FRAC(avg[1]),
+			   LOAD_INT(avg[2]), LOAD_FRAC(avg[2]),
+			   total);
+	}
+
+	return 0;
+}
+
+static int psi_io_show(struct seq_file *m, void *v)
+{
+	return psi_show(m, &psi_system, PSI_IO);
+}
+
+static int psi_memory_show(struct seq_file *m, void *v)
+{
+	return psi_show(m, &psi_system, PSI_MEM);
+}
+
+static int psi_cpu_show(struct seq_file *m, void *v)
+{
+	return psi_show(m, &psi_system, PSI_CPU);
+}
+
+static int psi_io_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, psi_io_show, NULL);
+}
+
+static int psi_memory_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, psi_memory_show, NULL);
+}
+
+static int psi_cpu_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, psi_cpu_show, NULL);
+}
+
+static const struct file_operations psi_io_fops = {
+	.open           = psi_io_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+static const struct file_operations psi_memory_fops = {
+	.open           = psi_memory_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+static const struct file_operations psi_cpu_fops = {
+	.open           = psi_cpu_open,
+	.read           = seq_read,
+	.llseek         = seq_lseek,
+	.release        = single_release,
+};
+
+static int __init psi_proc_init(void)
+{
+	proc_mkdir("pressure", NULL);
+	proc_create("pressure/io", 0, NULL, &psi_io_fops);
+	proc_create("pressure/memory", 0, NULL, &psi_memory_fops);
+	proc_create("pressure/cpu", 0, NULL, &psi_cpu_fops);
+	return 0;
+}
+module_init(psi_proc_init);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1de189bb9209..618577fc9aa8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -54,6 +54,7 @@
 #include <linux/proc_fs.h>
 #include <linux/prefetch.h>
 #include <linux/profile.h>
+#include <linux/psi.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/security.h>
 #include <linux/stop_machine.h>
@@ -319,6 +320,7 @@ extern bool dl_cpu_busy(unsigned int cpu);
 #ifdef CONFIG_CGROUP_SCHED
 
 #include <linux/cgroup.h>
+#include <linux/psi.h>
 
 struct cfs_rq;
 struct rt_rq;
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 8aea199a39b4..4904c4677000 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -55,6 +55,92 @@ static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delt
 # define   schedstat_val_or_zero(var)	0
 #endif /* CONFIG_SCHEDSTATS */
 
+#ifdef CONFIG_PSI
+/*
+ * PSI tracks state that persists across sleeps, such as iowaits and
+ * memory stalls. As a result, it has to distinguish between sleeps,
+ * where a task's runnable state changes, and requeues, where a task
+ * and its state are being moved between CPUs and runqueues.
+ */
+static inline void psi_enqueue(struct task_struct *p, bool wakeup)
+{
+	int clear = 0, set = TSK_RUNNING;
+
+	if (psi_disabled)
+		return;
+
+	if (!wakeup || p->sched_psi_wake_requeue) {
+		if (p->flags & PF_MEMSTALL)
+			set |= TSK_MEMSTALL;
+		if (p->sched_psi_wake_requeue)
+			p->sched_psi_wake_requeue = 0;
+	} else {
+		if (p->in_iowait)
+			clear |= TSK_IOWAIT;
+	}
+
+	psi_task_change(p, clear, set);
+}
+
+static inline void psi_dequeue(struct task_struct *p, bool sleep)
+{
+	int clear = TSK_RUNNING, set = 0;
+
+	if (psi_disabled)
+		return;
+
+	if (!sleep) {
+		if (p->flags & PF_MEMSTALL)
+			clear |= TSK_MEMSTALL;
+	} else {
+		if (p->in_iowait)
+			set |= TSK_IOWAIT;
+	}
+
+	psi_task_change(p, clear, set);
+}
+
+static inline void psi_ttwu_dequeue(struct task_struct *p)
+{
+	if (psi_disabled)
+		return;
+	/*
+	 * Is the task being migrated during a wakeup? Make sure to
+	 * deregister its sleep-persistent psi states from the old
+	 * queue, and let psi_enqueue() know it has to requeue.
+	 */
+	if (unlikely(p->in_iowait || (p->flags & PF_MEMSTALL))) {
+		struct rq_flags rf;
+		struct rq *rq;
+		int clear = 0;
+
+		if (p->in_iowait)
+			clear |= TSK_IOWAIT;
+		if (p->flags & PF_MEMSTALL)
+			clear |= TSK_MEMSTALL;
+
+		rq = __task_rq_lock(p, &rf);
+		psi_task_change(p, clear, 0);
+		p->sched_psi_wake_requeue = 1;
+		__task_rq_unlock(rq, &rf);
+	}
+}
+
+static inline void psi_task_tick(struct rq *rq)
+{
+	if (psi_disabled)
+		return;
+
+	if (unlikely(rq->curr->flags & PF_MEMSTALL))
+		psi_memstall_tick(rq->curr, cpu_of(rq));
+}
+#else /* CONFIG_PSI */
+static inline void psi_enqueue(struct task_struct *p, bool wakeup) {}
+static inline void psi_dequeue(struct task_struct *p, bool sleep) {}
+static inline void psi_ttwu_dequeue(struct task_struct *p) {}
+static inline void psi_task_tick(struct rq *rq) {}
+#endif /* CONFIG_PSI */
+
 #ifdef CONFIG_SCHED_INFO
 static inline void sched_info_reset_dequeued(struct task_struct *t)
 {
diff --git a/mm/compaction.c b/mm/compaction.c
index faca45ebe62d..7c607479de4a 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -22,6 +22,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/page_owner.h>
+#include <linux/psi.h>
 #include "internal.h"
 
 #ifdef CONFIG_COMPACTION
@@ -2068,11 +2069,15 @@ static int kcompactd(void *p)
 	pgdat->kcompactd_classzone_idx = pgdat->nr_zones - 1;
 
 	while (!kthread_should_stop()) {
+		unsigned long pflags;
+
 		trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
 		wait_event_freezable(pgdat->kcompactd_wait,
 				kcompactd_work_requested(pgdat));
 
+		psi_memstall_enter(&pflags);
 		kcompactd_do_work(pgdat);
+		psi_memstall_leave(&pflags);
 	}
 
 	return 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index 01a841f17bf4..41586009fa42 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -37,6 +37,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/rmap.h>
 #include <linux/delayacct.h>
+#include <linux/psi.h>
 #include "internal.h"
 
 #define CREATE_TRACE_POINTS
@@ -1075,11 +1076,14 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 	struct wait_page_queue wait_page;
 	wait_queue_entry_t *wait = &wait_page.wait;
 	bool thrashing = false;
+	unsigned long pflags;
 	int ret = 0;
 
-	if (bit_nr == PG_locked && !PageSwapBacked(page) &&
+	if (bit_nr == PG_locked &&
 	    !PageUptodate(page) && PageWorkingset(page)) {
-		delayacct_thrashing_start();
+		if (!PageSwapBacked(page))
+			delayacct_thrashing_start();
+		psi_memstall_enter(&pflags);
 		thrashing = true;
 	}
 
@@ -1121,8 +1125,11 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
 
 	finish_wait(q, wait);
 
-	if (thrashing)
-		delayacct_thrashing_end();
+	if (thrashing) {
+		if (!PageSwapBacked(page))
+			delayacct_thrashing_end();
+		psi_memstall_leave(&pflags);
+	}
 
 	/*
 	 * A signal could leave PageWaiters set. Clearing it here if
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 20f25d06c00c..f97b5a1700a4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -66,6 +66,7 @@
 #include <linux/ftrace.h>
 #include <linux/lockdep.h>
 #include <linux/nmi.h>
+#include <linux/psi.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -3549,15 +3550,20 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		enum compact_priority prio, enum compact_result *compact_result)
 {
 	struct page *page;
+	unsigned long pflags;
 	unsigned int noreclaim_flag;
 
 	if (!order)
 		return NULL;
 
+	psi_memstall_enter(&pflags);
 	noreclaim_flag = memalloc_noreclaim_save();
+
 	*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
 									prio);
+
 	memalloc_noreclaim_restore(noreclaim_flag);
+	psi_memstall_leave(&pflags);
 
 	if (*compact_result <= COMPACT_INACTIVE)
 		return NULL;
@@ -3756,11 +3762,13 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
 	struct reclaim_state reclaim_state;
 	int progress;
 	unsigned int noreclaim_flag;
+	unsigned long pflags;
 
 	cond_resched();
 
 	/* We now go into synchronous reclaim */
 	cpuset_memory_pressure_bump();
+	psi_memstall_enter(&pflags);
 	fs_reclaim_acquire(gfp_mask);
 	noreclaim_flag = memalloc_noreclaim_save();
 	reclaim_state.reclaimed_slab = 0;
@@ -3772,6 +3780,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
 	current->reclaim_state = NULL;
 	memalloc_noreclaim_restore(noreclaim_flag);
 	fs_reclaim_release(gfp_mask);
+	psi_memstall_leave(&pflags);
 
 	cond_resched();
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 87e9fef341d2..8ea87586925e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -49,6 +49,7 @@
 #include <linux/prefetch.h>
 #include <linux/printk.h>
 #include <linux/dax.h>
+#include <linux/psi.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -3305,6 +3306,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 {
 	struct zonelist *zonelist;
 	unsigned long nr_reclaimed;
+	unsigned long pflags;
 	int nid;
 	unsigned int noreclaim_flag;
 	struct scan_control sc = {
@@ -3333,9 +3335,13 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 					    sc.gfp_mask,
 					    sc.reclaim_idx);
 
+	psi_memstall_enter(&pflags);
 	noreclaim_flag = memalloc_noreclaim_save();
+
 	nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+
 	memalloc_noreclaim_restore(noreclaim_flag);
+	psi_memstall_leave(&pflags);
 
 	trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
 
@@ -3500,6 +3506,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
 	int i;
 	unsigned long nr_soft_reclaimed;
 	unsigned long nr_soft_scanned;
+	unsigned long pflags;
 	struct zone *zone;
 	struct scan_control sc = {
 		.gfp_mask = GFP_KERNEL,
@@ -3510,6 +3517,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
 		.may_swap = 1,
 	};
 
+	psi_memstall_enter(&pflags);
 	__fs_reclaim_acquire();
 
 	count_vm_event(PAGEOUTRUN);
@@ -3611,6 +3619,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
 out:
 	snapshot_refaults(NULL, pgdat);
 	__fs_reclaim_release();
+	psi_memstall_leave(&pflags);
 	/*
 	 * Return the order kswapd stopped reclaiming at as
 	 * prepare_kswapd_sleep() takes it into account. If another caller
-- 
cgit v1.2.3


From 2ce7135adc9ad081aa3c49744144376ac74fea60 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 26 Oct 2018 15:06:31 -0700
Subject: psi: cgroup support

On a system that executes multiple cgrouped jobs and independent
workloads, we don't just care about the health of the overall system, but
also that of individual jobs, so that we can ensure individual job health,
fairness between jobs, or prioritize some jobs over others.

This patch implements pressure stall tracking for cgroups.  In kernels
with CONFIG_PSI=y, cgroup2 groups will have cpu.pressure, memory.pressure,
and io.pressure files that track aggregate pressure stall times for only
the tasks inside the cgroup.

Link: http://lkml.kernel.org/r/20180828172258.3185-10-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Daniel Drake <drake@endlessm.com>
Tested-by: Suren Baghdasaryan <surenb@google.com>
Cc: Christopher Lameter <cl@linux.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <jweiner@fb.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Enderborg <peter.enderborg@sony.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Vinayak Menon <vinmenon@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/accounting/psi.txt        |   9 +++
 Documentation/admin-guide/cgroup-v2.rst |  18 +++++
 include/linux/cgroup-defs.h             |   4 ++
 include/linux/cgroup.h                  |  15 ++++
 include/linux/psi.h                     |  25 +++++++
 init/Kconfig                            |   4 ++
 kernel/cgroup/cgroup.c                  |  45 +++++++++++-
 kernel/sched/psi.c                      | 118 +++++++++++++++++++++++++++++---
 8 files changed, 228 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/accounting/psi.txt b/Documentation/accounting/psi.txt
index 3753a82f1cf5..b8ca28b60215 100644
--- a/Documentation/accounting/psi.txt
+++ b/Documentation/accounting/psi.txt
@@ -62,3 +62,12 @@ well as medium and long term trends. The total absolute stall time is
 tracked and exported as well, to allow detection of latency spikes
 which wouldn't necessarily make a dent in the time averages, or to
 average trends over custom time frames.
+
+Cgroup2 interface
+=================
+
+In a system with a CONFIG_CGROUP=y kernel and the cgroup2 filesystem
+mounted, pressure stall information is also tracked for tasks grouped
+into cgroups. Each subdirectory in the cgroupfs mountpoint contains
+cpu.pressure, memory.pressure, and io.pressure files; the format is
+the same as the /proc/pressure/ files.
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index caf36105a1c7..8389d6f72a77 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -966,6 +966,12 @@ All time durations are in microseconds.
 	$PERIOD duration.  "max" for $MAX indicates no limit.  If only
 	one number is written, $MAX is updated.
 
+  cpu.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for CPU. See
+	Documentation/accounting/psi.txt for details.
+
 
 Memory
 ------
@@ -1271,6 +1277,12 @@ PAGE_SIZE multiple when read back.
 	higher than the limit for an extended period of time.  This
 	reduces the impact on the workload and memory management.
 
+  memory.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for memory. See
+	Documentation/accounting/psi.txt for details.
+
 
 Usage Guidelines
 ~~~~~~~~~~~~~~~~
@@ -1408,6 +1420,12 @@ IO Interface Files
 
 	  8:16 rbps=2097152 wbps=max riops=max wiops=max
 
+  io.pressure
+	A read-only nested-key file which exists on non-root cgroups.
+
+	Shows pressure stall information for IO. See
+	Documentation/accounting/psi.txt for details.
+
 
 Writeback
 ~~~~~~~~~
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 22254c1fe1c5..5e1694fe035b 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -20,6 +20,7 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/workqueue.h>
 #include <linux/bpf-cgroup.h>
+#include <linux/psi_types.h>
 
 #ifdef CONFIG_CGROUPS
 
@@ -436,6 +437,9 @@ struct cgroup {
 	/* used to schedule release agent */
 	struct work_struct release_agent_work;
 
+	/* used to track pressure stalls */
+	struct psi_group psi;
+
 	/* used to store eBPF programs */
 	struct cgroup_bpf bpf;
 
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b622d6608605..9968332cceed 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -650,6 +650,11 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
 	pr_cont_kernfs_path(cgrp->kn);
 }
 
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+	return &cgrp->psi;
+}
+
 static inline void cgroup_init_kthreadd(void)
 {
 	/*
@@ -703,6 +708,16 @@ static inline union kernfs_node_id *cgroup_get_kernfs_id(struct cgroup *cgrp)
 	return NULL;
 }
 
+static inline struct cgroup *cgroup_parent(struct cgroup *cgrp)
+{
+	return NULL;
+}
+
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+	return NULL;
+}
+
 static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 					       struct cgroup *ancestor)
 {
diff --git a/include/linux/psi.h b/include/linux/psi.h
index b0daf050de58..8e0725aac0aa 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -4,6 +4,9 @@
 #include <linux/psi_types.h>
 #include <linux/sched.h>
 
+struct seq_file;
+struct css_set;
+
 #ifdef CONFIG_PSI
 
 extern bool psi_disabled;
@@ -16,6 +19,14 @@ void psi_memstall_tick(struct task_struct *task, int cpu);
 void psi_memstall_enter(unsigned long *flags);
 void psi_memstall_leave(unsigned long *flags);
 
+int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
+
+#ifdef CONFIG_CGROUPS
+int psi_cgroup_alloc(struct cgroup *cgrp);
+void psi_cgroup_free(struct cgroup *cgrp);
+void cgroup_move_task(struct task_struct *p, struct css_set *to);
+#endif
+
 #else /* CONFIG_PSI */
 
 static inline void psi_init(void) {}
@@ -23,6 +34,20 @@ static inline void psi_init(void) {}
 static inline void psi_memstall_enter(unsigned long *flags) {}
 static inline void psi_memstall_leave(unsigned long *flags) {}
 
+#ifdef CONFIG_CGROUPS
+static inline int psi_cgroup_alloc(struct cgroup *cgrp)
+{
+	return 0;
+}
+static inline void psi_cgroup_free(struct cgroup *cgrp)
+{
+}
+static inline void cgroup_move_task(struct task_struct *p, struct css_set *to)
+{
+	rcu_assign_pointer(p->cgroups, to);
+}
+#endif
+
 #endif /* CONFIG_PSI */
 
 #endif /* _LINUX_PSI_H */
diff --git a/init/Kconfig b/init/Kconfig
index 26e639df5517..a4112e95724a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -501,6 +501,10 @@ config PSI
 	  the share of walltime in which some or all tasks in the system are
 	  delayed due to contention of the respective resource.
 
+	  In kernels with cgroup support, cgroups (cgroup2 only) will
+	  have cpu.pressure, memory.pressure, and io.pressure files,
+	  which aggregate pressure stalls for the grouped tasks only.
+
 	  For more details see Documentation/accounting/psi.txt.
 
 	  Say N if unsure.
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4c1cf0969a80..8b79318810ad 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -55,6 +55,7 @@
 #include <linux/nsproxy.h>
 #include <linux/file.h>
 #include <linux/sched/cputime.h>
+#include <linux/psi.h>
 #include <net/sock.h>
 
 #define CREATE_TRACE_POINTS
@@ -862,7 +863,7 @@ static void css_set_move_task(struct task_struct *task,
 		 */
 		WARN_ON_ONCE(task->flags & PF_EXITING);
 
-		rcu_assign_pointer(task->cgroups, to_cset);
+		cgroup_move_task(task, to_cset);
 		list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
 							     &to_cset->tasks);
 	}
@@ -3446,6 +3447,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v)
 	return ret;
 }
 
+#ifdef CONFIG_PSI
+static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
+{
+	return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_IO);
+}
+static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
+{
+	return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_MEM);
+}
+static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
+{
+	return psi_show(seq, &seq_css(seq)->cgroup->psi, PSI_CPU);
+}
+#endif
+
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
 	struct cftype *cft = of->kn->priv;
@@ -4576,6 +4592,23 @@ static struct cftype cgroup_base_files[] = {
 		.flags = CFTYPE_NOT_ON_ROOT,
 		.seq_show = cpu_stat_show,
 	},
+#ifdef CONFIG_PSI
+	{
+		.name = "io.pressure",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_io_pressure_show,
+	},
+	{
+		.name = "memory.pressure",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_memory_pressure_show,
+	},
+	{
+		.name = "cpu.pressure",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.seq_show = cgroup_cpu_pressure_show,
+	},
+#endif
 	{ }	/* terminate */
 };
 
@@ -4636,6 +4669,7 @@ static void css_free_rwork_fn(struct work_struct *work)
 			 */
 			cgroup_put(cgroup_parent(cgrp));
 			kernfs_put(cgrp->kn);
+			psi_cgroup_free(cgrp);
 			if (cgroup_on_dfl(cgrp))
 				cgroup_rstat_exit(cgrp);
 			kfree(cgrp);
@@ -4892,10 +4926,15 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 	cgrp->self.parent = &parent->self;
 	cgrp->root = root;
 	cgrp->level = level;
-	ret = cgroup_bpf_inherit(cgrp);
+
+	ret = psi_cgroup_alloc(cgrp);
 	if (ret)
 		goto out_idr_free;
 
+	ret = cgroup_bpf_inherit(cgrp);
+	if (ret)
+		goto out_psi_free;
+
 	for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
 		cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
 
@@ -4933,6 +4972,8 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
 
 	return cgrp;
 
+out_psi_free:
+	psi_cgroup_free(cgrp);
 out_idr_free:
 	cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
 out_stat_exit:
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 595414599b98..7cdecfc010af 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -473,9 +473,35 @@ static void psi_group_change(struct psi_group *group, int cpu,
 		schedule_delayed_work(&group->clock_work, PSI_FREQ);
 }
 
+static struct psi_group *iterate_groups(struct task_struct *task, void **iter)
+{
+#ifdef CONFIG_CGROUPS
+	struct cgroup *cgroup = NULL;
+
+	if (!*iter)
+		cgroup = task->cgroups->dfl_cgrp;
+	else if (*iter == &psi_system)
+		return NULL;
+	else
+		cgroup = cgroup_parent(*iter);
+
+	if (cgroup && cgroup_parent(cgroup)) {
+		*iter = cgroup;
+		return cgroup_psi(cgroup);
+	}
+#else
+	if (*iter)
+		return NULL;
+#endif
+	*iter = &psi_system;
+	return &psi_system;
+}
+
 void psi_task_change(struct task_struct *task, int clear, int set)
 {
 	int cpu = task_cpu(task);
+	struct psi_group *group;
+	void *iter = NULL;
 
 	if (!task->pid)
 		return;
@@ -492,17 +518,23 @@ void psi_task_change(struct task_struct *task, int clear, int set)
 	task->psi_flags &= ~clear;
 	task->psi_flags |= set;
 
-	psi_group_change(&psi_system, cpu, clear, set);
+	while ((group = iterate_groups(task, &iter)))
+		psi_group_change(group, cpu, clear, set);
 }
 
 void psi_memstall_tick(struct task_struct *task, int cpu)
 {
-	struct psi_group_cpu *groupc;
+	struct psi_group *group;
+	void *iter = NULL;
 
-	groupc = per_cpu_ptr(psi_system.pcpu, cpu);
-	write_seqcount_begin(&groupc->seq);
-	record_times(groupc, cpu, true);
-	write_seqcount_end(&groupc->seq);
+	while ((group = iterate_groups(task, &iter))) {
+		struct psi_group_cpu *groupc;
+
+		groupc = per_cpu_ptr(group->pcpu, cpu);
+		write_seqcount_begin(&groupc->seq);
+		record_times(groupc, cpu, true);
+		write_seqcount_end(&groupc->seq);
+	}
 }
 
 /**
@@ -565,8 +597,78 @@ void psi_memstall_leave(unsigned long *flags)
 	rq_unlock_irq(rq, &rf);
 }
 
-static int psi_show(struct seq_file *m, struct psi_group *group,
-		    enum psi_res res)
+#ifdef CONFIG_CGROUPS
+int psi_cgroup_alloc(struct cgroup *cgroup)
+{
+	if (psi_disabled)
+		return 0;
+
+	cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
+	if (!cgroup->psi.pcpu)
+		return -ENOMEM;
+	group_init(&cgroup->psi);
+	return 0;
+}
+
+void psi_cgroup_free(struct cgroup *cgroup)
+{
+	if (psi_disabled)
+		return;
+
+	cancel_delayed_work_sync(&cgroup->psi.clock_work);
+	free_percpu(cgroup->psi.pcpu);
+}
+
+/**
+ * cgroup_move_task - move task to a different cgroup
+ * @task: the task
+ * @to: the target css_set
+ *
+ * Move task to a new cgroup and safely migrate its associated stall
+ * state between the different groups.
+ *
+ * This function acquires the task's rq lock to lock out concurrent
+ * changes to the task's scheduling state and - in case the task is
+ * running - concurrent changes to its stall state.
+ */
+void cgroup_move_task(struct task_struct *task, struct css_set *to)
+{
+	bool move_psi = !psi_disabled;
+	unsigned int task_flags = 0;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	if (move_psi) {
+		rq = task_rq_lock(task, &rf);
+
+		if (task_on_rq_queued(task))
+			task_flags = TSK_RUNNING;
+		else if (task->in_iowait)
+			task_flags = TSK_IOWAIT;
+
+		if (task->flags & PF_MEMSTALL)
+			task_flags |= TSK_MEMSTALL;
+
+		if (task_flags)
+			psi_task_change(task, task_flags, 0);
+	}
+
+	/*
+	 * Lame to do this here, but the scheduler cannot be locked
+	 * from the outside, so we move cgroups from inside sched/.
+	 */
+	rcu_assign_pointer(task->cgroups, to);
+
+	if (move_psi) {
+		if (task_flags)
+			psi_task_change(task, 0, task_flags);
+
+		task_rq_unlock(rq, task, &rf);
+	}
+}
+#endif /* CONFIG_CGROUPS */
+
+int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
 {
 	int full;
 
-- 
cgit v1.2.3


From 68d48e6a2df575b935edd420396c3cb8b6aa6ad3 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 26 Oct 2018 15:06:39 -0700
Subject: mm: workingset: add vmstat counter for shadow nodes

Make it easier to catch bugs in the shadow node shrinker by adding a
counter for the shadow nodes in circulation.

[akpm@linux-foundation.org: assert that irqs are disabled, for __inc_lruvec_page_state()]
[akpm@linux-foundation.org: s/WARN_ON_ONCE/VM_WARN_ON_ONCE/, per Johannes]
Link: http://lkml.kernel.org/r/20181009184732.762-4-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h |  1 +
 mm/vmstat.c            |  1 +
 mm/workingset.c        | 14 ++++++++++++--
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ba51d5bf7af1..9f0caccd5833 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -161,6 +161,7 @@ enum node_stat_item {
 	NR_SLAB_UNRECLAIMABLE,
 	NR_ISOLATED_ANON,	/* Temporary isolated pages from anon lru */
 	NR_ISOLATED_FILE,	/* Temporary isolated pages from file lru */
+	WORKINGSET_NODES,
 	WORKINGSET_REFAULT,
 	WORKINGSET_ACTIVATE,
 	WORKINGSET_RESTORE,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index d918f6192d15..dab53430f63c 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1143,6 +1143,7 @@ const char * const vmstat_text[] = {
 	"nr_slab_unreclaimable",
 	"nr_isolated_anon",
 	"nr_isolated_file",
+	"workingset_nodes",
 	"workingset_refault",
 	"workingset_activate",
 	"workingset_restore",
diff --git a/mm/workingset.c b/mm/workingset.c
index 5a72c9d5e195..7e6ef312cea5 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -377,12 +377,20 @@ void workingset_update_node(struct radix_tree_node *node)
 	 * already where they should be. The list_empty() test is safe
 	 * as node->private_list is protected by the i_pages lock.
 	 */
+	VM_WARN_ON_ONCE(!irqs_disabled());  /* For __inc_lruvec_page_state */
+
 	if (node->count && node->count == node->exceptional) {
-		if (list_empty(&node->private_list))
+		if (list_empty(&node->private_list)) {
 			list_lru_add(&shadow_nodes, &node->private_list);
+			__inc_lruvec_page_state(virt_to_page(node),
+						WORKINGSET_NODES);
+		}
 	} else {
-		if (!list_empty(&node->private_list))
+		if (!list_empty(&node->private_list)) {
 			list_lru_del(&shadow_nodes, &node->private_list);
+			__dec_lruvec_page_state(virt_to_page(node),
+						WORKINGSET_NODES);
+		}
 	}
 }
 
@@ -473,6 +481,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	}
 
 	list_lru_isolate(lru, item);
+	__dec_lruvec_page_state(virt_to_page(node), WORKINGSET_NODES);
+
 	spin_unlock(lru_lock);
 
 	/*
-- 
cgit v1.2.3


From 85cfb245060e45640fa3447f8b0bad5e8bd3bdaf Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Fri, 26 Oct 2018 15:07:41 -0700
Subject: memcg: remove memcg_kmem_skip_account

The flag memcg_kmem_skip_account was added during the era of opt-out kmem
accounting.  There is no need for such flag in the opt-in world as there
aren't any __GFP_ACCOUNT allocations within memcg_create_cache_enqueue().

Link: http://lkml.kernel.org/r/20180919004501.178023-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  3 ---
 mm/memcontrol.c       | 24 +-----------------------
 2 files changed, 1 insertion(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b8fcc6b3080c..8f8a5418b627 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -724,9 +724,6 @@ struct task_struct {
 #endif
 #ifdef CONFIG_MEMCG
 	unsigned			in_user_fault:1;
-#ifdef CONFIG_MEMCG_KMEM
-	unsigned			memcg_kmem_skip_account:1;
-#endif
 #endif
 #ifdef CONFIG_COMPAT_BRK
 	unsigned			brk_randomized:1;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0e9ede617b89..645ede7ad1b2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2460,7 +2460,7 @@ static void memcg_kmem_cache_create_func(struct work_struct *w)
 /*
  * Enqueue the creation of a per-memcg kmem_cache.
  */
-static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
+static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
 					       struct kmem_cache *cachep)
 {
 	struct memcg_kmem_cache_create_work *cw;
@@ -2478,25 +2478,6 @@ static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
 	queue_work(memcg_kmem_cache_wq, &cw->work);
 }
 
-static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
-					     struct kmem_cache *cachep)
-{
-	/*
-	 * We need to stop accounting when we kmalloc, because if the
-	 * corresponding kmalloc cache is not yet created, the first allocation
-	 * in __memcg_schedule_kmem_cache_create will recurse.
-	 *
-	 * However, it is better to enclose the whole function. Depending on
-	 * the debugging options enabled, INIT_WORK(), for instance, can
-	 * trigger an allocation. This too, will make us recurse. Because at
-	 * this point we can't allow ourselves back into memcg_kmem_get_cache,
-	 * the safest choice is to do it like this, wrapping the whole function.
-	 */
-	current->memcg_kmem_skip_account = 1;
-	__memcg_schedule_kmem_cache_create(memcg, cachep);
-	current->memcg_kmem_skip_account = 0;
-}
-
 static inline bool memcg_kmem_bypass(void)
 {
 	if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
@@ -2531,9 +2512,6 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
 	if (memcg_kmem_bypass())
 		return cachep;
 
-	if (current->memcg_kmem_skip_account)
-		return cachep;
-
 	memcg = get_mem_cgroup_from_current();
 	kmemcg_id = READ_ONCE(memcg->kmemcg_id);
 	if (kmemcg_id < 0)
-- 
cgit v1.2.3


From f682a97a00591def7cefbb5003dc04045028e405 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Date: Fri, 26 Oct 2018 15:07:45 -0700
Subject: mm: provide kernel parameter to allow disabling page init poisoning

Patch series "Address issues slowing persistent memory initialization", v5.

The main thing this patch set achieves is that it allows us to initialize
each node worth of persistent memory independently.  As a result we reduce
page init time by about 2 minutes because instead of taking 30 to 40
seconds per node and going through each node one at a time, we process all
4 nodes in parallel in the case of a 12TB persistent memory setup spread
evenly over 4 nodes.

This patch (of 3):

On systems with a large amount of memory it can take a significant amount
of time to initialize all of the page structs with the PAGE_POISON_PATTERN
value.  I have seen it take over 2 minutes to initialize a system with
over 12TB of RAM.

In order to work around the issue I had to disable CONFIG_DEBUG_VM and
then the boot time returned to something much more reasonable as the
arch_add_memory call completed in milliseconds versus seconds.  However in
doing that I had to disable all of the other VM debugging on the system.

In order to work around a kernel that might have CONFIG_DEBUG_VM enabled
on a system that has a large amount of memory I have added a new kernel
parameter named "vm_debug" that can be set to "-" in order to disable it.

Link: http://lkml.kernel.org/r/20180925201921.3576.84239.stgit@localhost.localdomain
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 12 +++++++
 include/linux/page-flags.h                      |  8 +++++
 mm/debug.c                                      | 46 +++++++++++++++++++++++++
 mm/memblock.c                                   |  5 ++-
 mm/sparse.c                                     |  4 +--
 5 files changed, 69 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8022d902e770..dcd082576e79 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4839,6 +4839,18 @@
 			This is actually a boot loader parameter; the value is
 			passed to the kernel using a special protocol.
 
+	vm_debug[=options]	[KNL] Available with CONFIG_DEBUG_VM=y.
+			May slow down system boot speed, especially when
+			enabled on systems with a large amount of memory.
+			All options are enabled by default, and this
+			interface is meant to allow for selectively
+			enabling or disabling specific virtual memory
+			debugging features.
+
+			Available options are:
+			  P	Enable page structure init time poisoning
+			  -	Disable all of the above options
+
 	vmalloc=nn[KMG]	[KNL,BOOT] Forces the vmalloc area to have an exact
 			size of <nn>. This can be used to increase the
 			minimum size (128MB on x86). It can also be used to
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4d99504f6496..934f91ef3f54 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -163,6 +163,14 @@ static inline int PagePoisoned(const struct page *page)
 	return page->flags == PAGE_POISON_PATTERN;
 }
 
+#ifdef CONFIG_DEBUG_VM
+void page_init_poison(struct page *page, size_t size);
+#else
+static inline void page_init_poison(struct page *page, size_t size)
+{
+}
+#endif
+
 /*
  * Page flags policies wrt compound pages
  *
diff --git a/mm/debug.c b/mm/debug.c
index bd10aad8539a..cdacba12e09a 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -13,6 +13,7 @@
 #include <trace/events/mmflags.h>
 #include <linux/migrate.h>
 #include <linux/page_owner.h>
+#include <linux/ctype.h>
 
 #include "internal.h"
 
@@ -175,4 +176,49 @@ void dump_mm(const struct mm_struct *mm)
 	);
 }
 
+static bool page_init_poisoning __read_mostly = true;
+
+static int __init setup_vm_debug(char *str)
+{
+	bool __page_init_poisoning = true;
+
+	/*
+	 * Calling vm_debug with no arguments is equivalent to requesting
+	 * to enable all debugging options we can control.
+	 */
+	if (*str++ != '=' || !*str)
+		goto out;
+
+	__page_init_poisoning = false;
+	if (*str == '-')
+		goto out;
+
+	while (*str) {
+		switch (tolower(*str)) {
+		case'p':
+			__page_init_poisoning = true;
+			break;
+		default:
+			pr_err("vm_debug option '%c' unknown. skipped\n",
+			       *str);
+		}
+
+		str++;
+	}
+out:
+	if (page_init_poisoning && !__page_init_poisoning)
+		pr_warn("Page struct poisoning disabled by kernel command line option 'vm_debug'\n");
+
+	page_init_poisoning = __page_init_poisoning;
+
+	return 1;
+}
+__setup("vm_debug", setup_vm_debug);
+
+void page_init_poison(struct page *page, size_t size)
+{
+	if (page_init_poisoning)
+		memset(page, PAGE_POISON_PATTERN, size);
+}
+EXPORT_SYMBOL_GPL(page_init_poison);
 #endif		/* CONFIG_DEBUG_VM */
diff --git a/mm/memblock.c b/mm/memblock.c
index 237944479d25..a85315083b5a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1444,10 +1444,9 @@ void * __init memblock_virt_alloc_try_nid_raw(
 
 	ptr = memblock_virt_alloc_internal(size, align,
 					   min_addr, max_addr, nid);
-#ifdef CONFIG_DEBUG_VM
 	if (ptr && size > 0)
-		memset(ptr, PAGE_POISON_PATTERN, size);
-#endif
+		page_init_poison(ptr, size);
+
 	return ptr;
 }
 
diff --git a/mm/sparse.c b/mm/sparse.c
index 10b07eea9a6e..67ad061f7fb8 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -696,13 +696,11 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat,
 		goto out;
 	}
 
-#ifdef CONFIG_DEBUG_VM
 	/*
 	 * Poison uninitialized struct pages in order to catch invalid flags
 	 * combinations.
 	 */
-	memset(memmap, PAGE_POISON_PATTERN, sizeof(struct page) * PAGES_PER_SECTION);
-#endif
+	page_init_poison(memmap, sizeof(struct page) * PAGES_PER_SECTION);
 
 	section_mark_present(ms);
 	sparse_init_one_section(ms, section_nr, memmap, usemap);
-- 
cgit v1.2.3


From d483da5bc78b86fe4200d2947f193a745f711713 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Date: Fri, 26 Oct 2018 15:07:48 -0700
Subject: mm: create non-atomic version of SetPageReserved for init use

It doesn't make much sense to use the atomic SetPageReserved at init time
when we are using memset to clear the memory and manipulating the page
flags via simple "&=" and "|=" operations in __init_single_page.

This patch adds a non-atomic version __SetPageReserved that can be used
during page init and shows about a 10% improvement in initialization times
on the systems I have available for testing.  On those systems I saw
initialization times drop from around 35 seconds to around 32 seconds to
initialize a 3TB block of persistent memory.  I believe the main advantage
of this is that it allows for more compiler optimization as the __set_bit
operation can be reordered whereas the atomic version cannot.

I tried adding a bit of documentation based on f1dd2cd13c4 ("mm,
memory_hotplug: do not associate hotadded memory to zones until online").

Ideally the reserved flag should be set earlier since there is a brief
window where the page is initialization via __init_single_page and we have
not set the PG_Reserved flag.  I'm leaving that for a future patch set as
that will require a more significant refactor.

Link: http://lkml.kernel.org/r/20180925202018.3576.11607.stgit@localhost.localdomain
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-flags.h | 1 +
 mm/page_alloc.c            | 9 +++++++--
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 934f91ef3f54..50ce1bddaf56 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -303,6 +303,7 @@ PAGEFLAG(Foreign, foreign, PF_NO_COMPOUND);
 
 PAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 	__CLEARPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
+	__SETPAGEFLAG(Reserved, reserved, PF_NO_COMPOUND)
 PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
 	__CLEARPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
 	__SETPAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eb6c50cc8880..cee1abf85d72 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1232,7 +1232,12 @@ void __meminit reserve_bootmem_region(phys_addr_t start, phys_addr_t end)
 			/* Avoid false-positive PageTail() */
 			INIT_LIST_HEAD(&page->lru);
 
-			SetPageReserved(page);
+			/*
+			 * no need for atomic set_bit because the struct
+			 * page is not visible yet so nobody should
+			 * access it yet.
+			 */
+			__SetPageReserved(page);
 		}
 	}
 }
@@ -5508,7 +5513,7 @@ not_early:
 		page = pfn_to_page(pfn);
 		__init_single_page(page, pfn, zone, nid);
 		if (context == MEMMAP_HOTPLUG)
-			SetPageReserved(page);
+			__SetPageReserved(page);
 
 		/*
 		 * Mark the block movable so that blocks are reserved for
-- 
cgit v1.2.3


From 966cf44f637e6aeea7e3d01ba004bf8b5beac78f Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Date: Fri, 26 Oct 2018 15:07:52 -0700
Subject: mm: defer ZONE_DEVICE page initialization to the point where we init
 pgmap

The ZONE_DEVICE pages were being initialized in two locations.  One was
with the memory_hotplug lock held and another was outside of that lock.
The problem with this is that it was nearly doubling the memory
initialization time.  Instead of doing this twice, once while holding a
global lock and once without, I am opting to defer the initialization to
the one outside of the lock.  This allows us to avoid serializing the
overhead for memory init and we can instead focus on per-node init times.

One issue I encountered is that devm_memremap_pages and
hmm_devmmem_pages_create were initializing only the pgmap field the same
way.  One wasn't initializing hmm_data, and the other was initializing it
to a poison value.  Since this is something that is exposed to the driver
in the case of hmm I am opting for a third option and just initializing
hmm_data to 0 since this is going to be exposed to unknown third party
drivers.

[alexander.h.duyck@linux.intel.com: fix reference count for pgmap in devm_memremap_pages]
  Link: http://lkml.kernel.org/r/20181008233404.1909.37302.stgit@localhost.localdomain
Link: http://lkml.kernel.org/r/20180925202053.3576.66039.stgit@localhost.localdomain
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Tested-by: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Michal Hocko <mhocko@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 kernel/memremap.c  | 25 ++++++---------
 mm/hmm.c           | 12 ++++---
 mm/page_alloc.c    | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 108 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 737279bb479c..33228a49d7d2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -848,6 +848,8 @@ static inline bool is_zone_device_page(const struct page *page)
 {
 	return page_zonenum(page) == ZONE_DEVICE;
 }
+extern void memmap_init_zone_device(struct zone *, unsigned long,
+				    unsigned long, struct dev_pagemap *);
 #else
 static inline bool is_zone_device_page(const struct page *page)
 {
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 5b8600d39931..620fc4d2559a 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -175,10 +175,10 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	struct vmem_altmap *altmap = pgmap->altmap_valid ?
 			&pgmap->altmap : NULL;
 	struct resource *res = &pgmap->res;
-	unsigned long pfn, pgoff, order;
+	struct dev_pagemap *conflict_pgmap;
 	pgprot_t pgprot = PAGE_KERNEL;
+	unsigned long pgoff, order;
 	int error, nid, is_ram;
-	struct dev_pagemap *conflict_pgmap;
 
 	align_start = res->start & ~(SECTION_SIZE - 1);
 	align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -256,19 +256,14 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 	if (error)
 		goto err_add_memory;
 
-	for_each_device_pfn(pfn, pgmap) {
-		struct page *page = pfn_to_page(pfn);
-
-		/*
-		 * ZONE_DEVICE pages union ->lru with a ->pgmap back
-		 * pointer.  It is a bug if a ZONE_DEVICE page is ever
-		 * freed or placed on a driver-private list.  Seed the
-		 * storage with LIST_POISON* values.
-		 */
-		list_del(&page->lru);
-		page->pgmap = pgmap;
-		percpu_ref_get(pgmap->ref);
-	}
+	/*
+	 * Initialization of the pages has been deferred until now in order
+	 * to allow us to do the work while not holding the hotplug lock.
+	 */
+	memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
+				align_start >> PAGE_SHIFT,
+				align_size >> PAGE_SHIFT, pgmap);
+	percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
 
 	devm_add_action(dev, devm_memremap_pages_release, pgmap);
 
diff --git a/mm/hmm.c b/mm/hmm.c
index c968e49f7a0c..774d684fa2b4 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -1024,7 +1024,6 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
 	resource_size_t key, align_start, align_size, align_end;
 	struct device *device = devmem->device;
 	int ret, nid, is_ram;
-	unsigned long pfn;
 
 	align_start = devmem->resource->start & ~(PA_SECTION_SIZE - 1);
 	align_size = ALIGN(devmem->resource->start +
@@ -1109,11 +1108,14 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
 				align_size >> PAGE_SHIFT, NULL);
 	mem_hotplug_done();
 
-	for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) {
-		struct page *page = pfn_to_page(pfn);
+	/*
+	 * Initialization of the pages has been deferred until now in order
+	 * to allow us to do the work while not holding the hotplug lock.
+	 */
+	memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
+				align_start >> PAGE_SHIFT,
+				align_size >> PAGE_SHIFT, &devmem->pagemap);
 
-		page->pgmap = &devmem->pagemap;
-	}
 	return 0;
 
 error_add_memory:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cee1abf85d72..d73ff2188d72 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5465,12 +5465,23 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 	if (highest_memmap_pfn < end_pfn - 1)
 		highest_memmap_pfn = end_pfn - 1;
 
+#ifdef CONFIG_ZONE_DEVICE
 	/*
 	 * Honor reservation requested by the driver for this ZONE_DEVICE
-	 * memory
+	 * memory. We limit the total number of pages to initialize to just
+	 * those that might contain the memory mapping. We will defer the
+	 * ZONE_DEVICE page initialization until after we have released
+	 * the hotplug lock.
 	 */
-	if (altmap && start_pfn == altmap->base_pfn)
-		start_pfn += altmap->reserve;
+	if (zone == ZONE_DEVICE) {
+		if (!altmap)
+			return;
+
+		if (start_pfn == altmap->base_pfn)
+			start_pfn += altmap->reserve;
+		end_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
+	}
+#endif
 
 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
 		/*
@@ -5537,6 +5548,81 @@ not_early:
 	}
 }
 
+#ifdef CONFIG_ZONE_DEVICE
+void __ref memmap_init_zone_device(struct zone *zone,
+				   unsigned long start_pfn,
+				   unsigned long size,
+				   struct dev_pagemap *pgmap)
+{
+	unsigned long pfn, end_pfn = start_pfn + size;
+	struct pglist_data *pgdat = zone->zone_pgdat;
+	unsigned long zone_idx = zone_idx(zone);
+	unsigned long start = jiffies;
+	int nid = pgdat->node_id;
+
+	if (WARN_ON_ONCE(!pgmap || !is_dev_zone(zone)))
+		return;
+
+	/*
+	 * The call to memmap_init_zone should have already taken care
+	 * of the pages reserved for the memmap, so we can just jump to
+	 * the end of that region and start processing the device pages.
+	 */
+	if (pgmap->altmap_valid) {
+		struct vmem_altmap *altmap = &pgmap->altmap;
+
+		start_pfn = altmap->base_pfn + vmem_altmap_offset(altmap);
+		size = end_pfn - start_pfn;
+	}
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+		struct page *page = pfn_to_page(pfn);
+
+		__init_single_page(page, pfn, zone_idx, nid);
+
+		/*
+		 * Mark page reserved as it will need to wait for onlining
+		 * phase for it to be fully associated with a zone.
+		 *
+		 * We can use the non-atomic __set_bit operation for setting
+		 * the flag as we are still initializing the pages.
+		 */
+		__SetPageReserved(page);
+
+		/*
+		 * ZONE_DEVICE pages union ->lru with a ->pgmap back
+		 * pointer and hmm_data.  It is a bug if a ZONE_DEVICE
+		 * page is ever freed or placed on a driver-private list.
+		 */
+		page->pgmap = pgmap;
+		page->hmm_data = 0;
+
+		/*
+		 * Mark the block movable so that blocks are reserved for
+		 * movable at startup. This will force kernel allocations
+		 * to reserve their blocks rather than leaking throughout
+		 * the address space during boot when many long-lived
+		 * kernel allocations are made.
+		 *
+		 * bitmap is created for zone's valid pfn range. but memmap
+		 * can be created for invalid pages (for alignment)
+		 * check here not to call set_pageblock_migratetype() against
+		 * pfn out of zone.
+		 *
+		 * Please note that MEMMAP_HOTPLUG path doesn't clear memmap
+		 * because this is done early in sparse_add_one_section
+		 */
+		if (!(pfn & (pageblock_nr_pages - 1))) {
+			set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+			cond_resched();
+		}
+	}
+
+	pr_info("%s initialised, %lu pages in %ums\n", dev_name(pgmap->dev),
+		size, jiffies_to_msecs(jiffies - start));
+}
+
+#endif
 static void __meminit zone_init_free_lists(struct zone *zone)
 {
 	unsigned int order, t;
-- 
cgit v1.2.3


From 85a06835f6f1ba79f0f00838ccd5ad840dd1eafb Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Fri, 26 Oct 2018 15:08:50 -0700
Subject: mm: mremap: downgrade mmap_sem to read when shrinking

Other than munmap, mremap might be used to shrink memory mapping too.
So, it may hold write mmap_sem for long time when shrinking large
mapping, as what commit ("mm: mmap: zap pages with read mmap_sem in
munmap") described.

The mremap() will not manipulate vmas anymore after __do_munmap() call for
the mapping shrink use case, so it is safe to downgrade to read mmap_sem.

So, the same optimization, which downgrades mmap_sem to read for zapping
pages, is also feasible and reasonable to this case.

The period of holding exclusive mmap_sem for shrinking large mapping
would be reduced significantly with this optimization.

MREMAP_FIXED and MREMAP_MAYMOVE are more complicated to adopt this
optimization since they need manipulate vmas after do_munmap(),
downgrading mmap_sem may create race window.

Simple mapping shrink is the low hanging fruit, and it may cover the
most cases of unmap with munmap together.

[akpm@linux-foundation.org: tweak comment]
[yang.shi@linux.alibaba.com: fix unsigned compare against 0 issue]
  Link: http://lkml.kernel.org/r/1538687672-17795-2-git-send-email-yang.shi@linux.alibaba.com
Link: http://lkml.kernel.org/r/1538067582-60038-1-git-send-email-yang.shi@linux.alibaba.com
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Cc: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/mmap.c          |  4 ++--
 mm/mremap.c        | 20 ++++++++++++++++----
 3 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 33228a49d7d2..a023c5ce71fa 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2306,6 +2306,8 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate,
 	struct list_head *uf);
+extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
+		       struct list_head *uf, bool downgrade);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 		     struct list_head *uf);
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 58e323c92c8e..1bfd12032664 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2687,8 +2687,8 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
  * work.  This now handles partial unmappings.
  * Jeremy Fitzhardinge <jeremy@goop.org>
  */
-static int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
-		       struct list_head *uf, bool downgrade)
+int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
+		struct list_head *uf, bool downgrade)
 {
 	unsigned long end;
 	struct vm_area_struct *vma, *prev, *last;
diff --git a/mm/mremap.c b/mm/mremap.c
index a9617e72e6b7..7f9f9180e401 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -521,6 +521,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	unsigned long ret = -EINVAL;
 	unsigned long charged = 0;
 	bool locked = false;
+	bool downgraded = false;
 	struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
 	LIST_HEAD(uf_unmap_early);
 	LIST_HEAD(uf_unmap);
@@ -557,12 +558,20 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	/*
 	 * Always allow a shrinking remap: that just unmaps
 	 * the unnecessary pages..
-	 * do_munmap does all the needed commit accounting
+	 * __do_munmap does all the needed commit accounting, and
+	 * downgrades mmap_sem to read if so directed.
 	 */
 	if (old_len >= new_len) {
-		ret = do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap);
-		if (ret && old_len != new_len)
+		int retval;
+
+		retval = __do_munmap(mm, addr+new_len, old_len - new_len,
+				  &uf_unmap, true);
+		if (retval < 0 && old_len != new_len) {
+			ret = retval;
 			goto out;
+		/* Returning 1 indicates mmap_sem is downgraded to read. */
+		} else if (retval == 1)
+			downgraded = true;
 		ret = addr;
 		goto out;
 	}
@@ -627,7 +636,10 @@ out:
 		vm_unacct_memory(charged);
 		locked = 0;
 	}
-	up_write(&current->mm->mmap_sem);
+	if (downgraded)
+		up_read(&current->mm->mmap_sem);
+	else
+		up_write(&current->mm->mmap_sem);
 	if (locked && new_len > old_len)
 		mm_populate(new_addr + old_len, new_len - old_len);
 	userfaultfd_unmap_complete(mm, &uf_unmap_early);
-- 
cgit v1.2.3


From cc4b8c794f476076c9ce19f43eb4d98dc4b5e155 Mon Sep 17 00:00:00 2001
From: Yang Shi <yang.shi@linux.alibaba.com>
Date: Fri, 26 Oct 2018 15:08:57 -0700
Subject: mm: dax: add comment for PFN_SPECIAL

The comment for PFN_SPECIAL is missed in pfn_t.h. Add comment to get
consistent with other pfn flags.

Link: http://lkml.kernel.org/r/1538086549-100536-1-git-send-email-yang.shi@linux.alibaba.com
Signed-off-by: Yang Shi <yang.shi@linux.alibaba.com>
Suggested-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pfn_t.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h
index 673546ba7342..7bb77850c65a 100644
--- a/include/linux/pfn_t.h
+++ b/include/linux/pfn_t.h
@@ -9,6 +9,8 @@
  * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
  * PFN_DEV - pfn is not covered by system memmap by default
  * PFN_MAP - pfn has a dynamic page mapping established by a device driver
+ * PFN_SPECIAL - for CONFIG_FS_DAX_LIMITED builds to allow XIP, but not
+ *		 get_user_pages
  */
 #define PFN_FLAGS_MASK (((u64) (~PAGE_MASK)) << (BITS_PER_LONG_LONG - PAGE_SHIFT))
 #define PFN_SG_CHAIN (1ULL << (BITS_PER_LONG_LONG - 1))
-- 
cgit v1.2.3


From 1c2d479a119b84feacbe4de782016f1bf1ad16dc Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Fri, 26 Oct 2018 15:09:28 -0700
Subject: mm/memcontrol.c: convert mem_cgroup_id::ref to refcount_t type

This will allow to use generic refcount_t interfaces to check counters
overflow instead of currently existing VM_BUG_ON().  The only difference
after the patch is VM_BUG_ON() may cause BUG(), while refcount_t fires
with WARN().  But this seems not to be significant here, since such the
problems are usually caught by syzbot with panic-on-warn enabled.

Link: http://lkml.kernel.org/r/153910718919.7006.13400779039257185427.stgit@localhost.localdomain
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Andrea Parri <andrea.parri@amarulasolutions.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  2 +-
 mm/memcontrol.c            | 10 ++++------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4399cc3f00e4..7ab2120155a4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -78,7 +78,7 @@ struct mem_cgroup_reclaim_cookie {
 
 struct mem_cgroup_id {
 	int id;
-	atomic_t ref;
+	refcount_t ref;
 };
 
 /*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 645ede7ad1b2..92d38c88250f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4299,14 +4299,12 @@ static void mem_cgroup_id_remove(struct mem_cgroup *memcg)
 
 static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
-	VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
-	atomic_add(n, &memcg->id.ref);
+	refcount_add(n, &memcg->id.ref);
 }
 
 static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
-	VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
-	if (atomic_sub_and_test(n, &memcg->id.ref)) {
+	if (refcount_sub_and_test(n, &memcg->id.ref)) {
 		mem_cgroup_id_remove(memcg);
 
 		/* Memcg ID pins CSS */
@@ -4523,7 +4521,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	}
 
 	/* Online state pins memcg ID, memcg ID pins CSS */
-	atomic_set(&memcg->id.ref, 1);
+	refcount_set(&memcg->id.ref, 1);
 	css_get(css);
 	return 0;
 }
@@ -6357,7 +6355,7 @@ subsys_initcall(mem_cgroup_init);
 #ifdef CONFIG_MEMCG_SWAP
 static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
 {
-	while (!atomic_inc_not_zero(&memcg->id.ref)) {
+	while (!refcount_inc_not_zero(&memcg->id.ref)) {
 		/*
 		 * The root cgroup cannot be destroyed, so it's refcount must
 		 * always be >= 1.
-- 
cgit v1.2.3


From 907ec5fca3dc38d37737de826f06f25b063aa08e Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Fri, 26 Oct 2018 15:10:15 -0700
Subject: mm: zero remaining unavailable struct pages

Patch series "mm: Fix for movable_node boot option", v3.

This patch series contains a fix for the movable_node boot option issue
which was introduced by commit 124049decbb1 ("x86/e820: put !E820_TYPE_RAM
regions into memblock.reserved").

The commit breaks the option because it changed the memory gap range to
reserved memblock.  So, the node is marked as Normal zone even if the SRAT
has Hot pluggable affinity.

First and second patch fix the original issue which the commit tried to
fix, then revert the commit.

This patch (of 3):

There is a kernel panic that is triggered when reading /proc/kpageflags on
the kernel booted with kernel parameter 'memmap=nn[KMG]!ss[KMG]':

  BUG: unable to handle kernel paging request at fffffffffffffffe
  PGD 9b20e067 P4D 9b20e067 PUD 9b210067 PMD 0
  Oops: 0000 [#1] SMP PTI
  CPU: 2 PID: 1728 Comm: page-types Not tainted 4.17.0-rc6-mm1-v4.17-rc6-180605-0816-00236-g2dfb086ef02c+ #160
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.fc28 04/01/2014
  RIP: 0010:stable_page_flags+0x27/0x3c0
  Code: 00 00 00 0f 1f 44 00 00 48 85 ff 0f 84 a0 03 00 00 41 54 55 49 89 fc 53 48 8b 57 08 48 8b 2f 48 8d 42 ff 83 e2 01 48 0f 44 c7 <48> 8b 00 f6 c4 01 0f 84 10 03 00 00 31 db 49 8b 54 24 08 4c 89 e7
  RSP: 0018:ffffbbd44111fde0 EFLAGS: 00010202
  RAX: fffffffffffffffe RBX: 00007fffffffeff9 RCX: 0000000000000000
  RDX: 0000000000000001 RSI: 0000000000000202 RDI: ffffed1182fff5c0
  RBP: ffffffffffffffff R08: 0000000000000001 R09: 0000000000000001
  R10: ffffbbd44111fed8 R11: 0000000000000000 R12: ffffed1182fff5c0
  R13: 00000000000bffd7 R14: 0000000002fff5c0 R15: ffffbbd44111ff10
  FS:  00007efc4335a500(0000) GS:ffff93a5bfc00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: fffffffffffffffe CR3: 00000000b2a58000 CR4: 00000000001406e0
  Call Trace:
   kpageflags_read+0xc7/0x120
   proc_reg_read+0x3c/0x60
   __vfs_read+0x36/0x170
   vfs_read+0x89/0x130
   ksys_pread64+0x71/0x90
   do_syscall_64+0x5b/0x160
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  RIP: 0033:0x7efc42e75e23
  Code: 09 00 ba 9f 01 00 00 e8 ab 81 f4 ff 66 2e 0f 1f 84 00 00 00 00 00 90 83 3d 29 0a 2d 00 00 75 13 49 89 ca b8 11 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 db d3 01 00 48 89 04 24

According to kernel bisection, this problem became visible due to commit
f7f99100d8d9 which changes how struct pages are initialized.

Memblock layout affects the pfn ranges covered by node/zone.  Consider
that we have a VM with 2 NUMA nodes and each node has 4GB memory, and the
default (no memmap= given) memblock layout is like below:

  MEMBLOCK configuration:
   memory size = 0x00000001fff75c00 reserved size = 0x000000000300c000
   memory.cnt  = 0x4
   memory[0x0]     [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0
   memory[0x1]     [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0
   memory[0x2]     [0x0000000100000000-0x000000013fffffff], 0x0000000040000000 bytes on node 0 flags: 0x0
   memory[0x3]     [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0
   ...

If you give memmap=1G!4G (so it just covers memory[0x2]),
the range [0x100000000-0x13fffffff] is gone:

  MEMBLOCK configuration:
   memory size = 0x00000001bff75c00 reserved size = 0x000000000300c000
   memory.cnt  = 0x3
   memory[0x0]     [0x0000000000001000-0x000000000009efff], 0x000000000009e000 bytes on node 0 flags: 0x0
   memory[0x1]     [0x0000000000100000-0x00000000bffd6fff], 0x00000000bfed7000 bytes on node 0 flags: 0x0
   memory[0x2]     [0x0000000140000000-0x000000023fffffff], 0x0000000100000000 bytes on node 1 flags: 0x0
   ...

This causes shrinking node 0's pfn range because it is calculated by the
address range of memblock.memory.  So some of struct pages in the gap
range are left uninitialized.

We have a function zero_resv_unavail() which does zeroing the struct pages
outside memblock.memory, but currently it covers only the reserved
unavailable range (i.e.  memblock.memory && !memblock.reserved).  This
patch extends it to cover all unavailable range, which fixes the reported
issue.

Link: http://lkml.kernel.org/r/20181002143821.5112-2-msys.mizuma@gmail.com
Fixes: f7f99100d8d9 ("mm: stop zeroing memory during allocation in vmemmap")
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Tested-by: Oscar Salvador <osalvador@suse.de>
Tested-by: Masayoshi Mizuma <m.mizuma@jp.fujitsu.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memblock.h | 15 ---------------
 mm/page_alloc.c          | 36 +++++++++++++++++++++++++-----------
 2 files changed, 25 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 516920549378..2acdd046df2d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -265,21 +265,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
 	for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved,	\
 			       nid, flags, p_start, p_end, p_nid)
 
-/**
- * for_each_resv_unavail_range - iterate through reserved and unavailable memory
- * @i: u64 used as loop variable
- * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
- * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
- *
- * Walks over unavailable but reserved (reserved && !memory) areas of memblock.
- * Available as soon as memblock is initialized.
- * Note: because this memory does not belong to any physical node, flags and
- * nid arguments do not make sense and thus not exported as arguments.
- */
-#define for_each_resv_unavail_range(i, p_start, p_end)			\
-	for_each_mem_range(i, &memblock.reserved, &memblock.memory,	\
-			   NUMA_NO_NODE, MEMBLOCK_NONE, p_start, p_end, NULL)
-
 static inline void memblock_set_region_flags(struct memblock_region *r,
 					     enum memblock_flags flags)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c26d3152f9ba..6d863c5afa08 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6515,29 +6515,42 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
  * struct pages which are reserved in memblock allocator and their fields
  * may be accessed (for example page_to_pfn() on some configuration accesses
  * flags). We must explicitly zero those struct pages.
+ *
+ * This function also addresses a similar issue where struct pages are left
+ * uninitialized because the physical address range is not covered by
+ * memblock.memory or memblock.reserved. That could happen when memblock
+ * layout is manually configured via memmap=.
  */
 void __init zero_resv_unavail(void)
 {
 	phys_addr_t start, end;
 	unsigned long pfn;
 	u64 i, pgcnt;
+	phys_addr_t next = 0;
 
 	/*
-	 * Loop through ranges that are reserved, but do not have reported
-	 * physical memory backing.
+	 * Loop through unavailable ranges not covered by memblock.memory.
 	 */
 	pgcnt = 0;
-	for_each_resv_unavail_range(i, &start, &end) {
-		for (pfn = PFN_DOWN(start); pfn < PFN_UP(end); pfn++) {
-			if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
-				pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
-					+ pageblock_nr_pages - 1;
-				continue;
+	for_each_mem_range(i, &memblock.memory, NULL,
+			NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) {
+		if (next < start) {
+			for (pfn = PFN_DOWN(next); pfn < PFN_UP(start); pfn++) {
+				if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages)))
+					continue;
+				mm_zero_struct_page(pfn_to_page(pfn));
+				pgcnt++;
 			}
-			mm_zero_struct_page(pfn_to_page(pfn));
-			pgcnt++;
 		}
+		next = end;
 	}
+	for (pfn = PFN_DOWN(next); pfn < max_pfn; pfn++) {
+		if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages)))
+			continue;
+		mm_zero_struct_page(pfn_to_page(pfn));
+		pgcnt++;
+	}
+
 
 	/*
 	 * Struct pages that do not have backing memory. This could be because
@@ -6547,7 +6560,8 @@ void __init zero_resv_unavail(void)
 	 * this code can be removed.
 	 */
 	if (pgcnt)
-		pr_info("Reserved but unavailable: %lld pages", pgcnt);
+		pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
+
 }
 #endif /* CONFIG_HAVE_MEMBLOCK && !CONFIG_FLAT_NODE_MEM_MAP */
 
-- 
cgit v1.2.3


From df06b37ffe5a442503b7095b77b0a970df515459 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 26 Oct 2018 15:10:28 -0700
Subject: mm/gup: cache dev_pagemap while pinning pages

Getting pages from ZONE_DEVICE memory needs to check the backing device's
live-ness, which is tracked in the device's dev_pagemap metadata.  This
metadata is stored in a radix tree and looking it up adds measurable
software overhead.

This patch avoids repeating this relatively costly operation when
dev_pagemap is used by caching the last dev_pagemap while getting user
pages.  The gup_benchmark kernel self test reports this reduces time to
get user pages to as low as 1/3 of the previous time.

Link: http://lkml.kernel.org/r/20181012173040.15669-1-keith.busch@intel.com
Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |   8 ++--
 include/linux/mm.h      |  12 +-----
 mm/gup.c                | 110 ++++++++++++++++++++++++++++--------------------
 mm/huge_memory.c        |  16 +++----
 mm/nommu.c              |   6 +--
 5 files changed, 79 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index fdcb45999b26..4663ee96cf59 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -213,9 +213,9 @@ static inline int hpage_nr_pages(struct page *page)
 }
 
 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
-		pmd_t *pmd, int flags);
+		pmd_t *pmd, int flags, struct dev_pagemap **pgmap);
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
-		pud_t *pud, int flags);
+		pud_t *pud, int flags, struct dev_pagemap **pgmap);
 
 extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
 
@@ -344,13 +344,13 @@ static inline void mm_put_huge_zero_page(struct mm_struct *mm)
 }
 
 static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
-		unsigned long addr, pmd_t *pmd, int flags)
+	unsigned long addr, pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
 {
 	return NULL;
 }
 
 static inline struct page *follow_devmap_pud(struct vm_area_struct *vma,
-		unsigned long addr, pud_t *pud, int flags)
+	unsigned long addr, pud_t *pud, int flags, struct dev_pagemap **pgmap)
 {
 	return NULL;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a023c5ce71fa..1e52b8fd1685 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2536,16 +2536,8 @@ static inline vm_fault_t vmf_error(int err)
 	return VM_FAULT_SIGBUS;
 }
 
-struct page *follow_page_mask(struct vm_area_struct *vma,
-			      unsigned long address, unsigned int foll_flags,
-			      unsigned int *page_mask);
-
-static inline struct page *follow_page(struct vm_area_struct *vma,
-		unsigned long address, unsigned int foll_flags)
-{
-	unsigned int unused_page_mask;
-	return follow_page_mask(vma, address, foll_flags, &unused_page_mask);
-}
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+			 unsigned int foll_flags);
 
 #define FOLL_WRITE	0x01	/* check pte is writable */
 #define FOLL_TOUCH	0x02	/* mark page accessed */
diff --git a/mm/gup.c b/mm/gup.c
index 08eb350e0f35..841d7ef53591 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -20,6 +20,11 @@
 
 #include "internal.h"
 
+struct follow_page_context {
+	struct dev_pagemap *pgmap;
+	unsigned int page_mask;
+};
+
 static struct page *no_page_table(struct vm_area_struct *vma,
 		unsigned int flags)
 {
@@ -71,10 +76,10 @@ static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
 }
 
 static struct page *follow_page_pte(struct vm_area_struct *vma,
-		unsigned long address, pmd_t *pmd, unsigned int flags)
+		unsigned long address, pmd_t *pmd, unsigned int flags,
+		struct dev_pagemap **pgmap)
 {
 	struct mm_struct *mm = vma->vm_mm;
-	struct dev_pagemap *pgmap = NULL;
 	struct page *page;
 	spinlock_t *ptl;
 	pte_t *ptep, pte;
@@ -116,8 +121,8 @@ retry:
 		 * Only return device mapping pages in the FOLL_GET case since
 		 * they are only valid while holding the pgmap reference.
 		 */
-		pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
-		if (pgmap)
+		*pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
+		if (*pgmap)
 			page = pte_page(pte);
 		else
 			goto no_page;
@@ -152,15 +157,8 @@ retry:
 		goto retry;
 	}
 
-	if (flags & FOLL_GET) {
+	if (flags & FOLL_GET)
 		get_page(page);
-
-		/* drop the pgmap reference now that we hold the page */
-		if (pgmap) {
-			put_dev_pagemap(pgmap);
-			pgmap = NULL;
-		}
-	}
 	if (flags & FOLL_TOUCH) {
 		if ((flags & FOLL_WRITE) &&
 		    !pte_dirty(pte) && !PageDirty(page))
@@ -210,7 +208,8 @@ no_page:
 
 static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 				    unsigned long address, pud_t *pudp,
-				    unsigned int flags, unsigned int *page_mask)
+				    unsigned int flags,
+				    struct follow_page_context *ctx)
 {
 	pmd_t *pmd, pmdval;
 	spinlock_t *ptl;
@@ -258,13 +257,13 @@ retry:
 	}
 	if (pmd_devmap(pmdval)) {
 		ptl = pmd_lock(mm, pmd);
-		page = follow_devmap_pmd(vma, address, pmd, flags);
+		page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
 		spin_unlock(ptl);
 		if (page)
 			return page;
 	}
 	if (likely(!pmd_trans_huge(pmdval)))
-		return follow_page_pte(vma, address, pmd, flags);
+		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 
 	if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
 		return no_page_table(vma, flags);
@@ -284,7 +283,7 @@ retry_locked:
 	}
 	if (unlikely(!pmd_trans_huge(*pmd))) {
 		spin_unlock(ptl);
-		return follow_page_pte(vma, address, pmd, flags);
+		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 	}
 	if (flags & FOLL_SPLIT) {
 		int ret;
@@ -307,18 +306,18 @@ retry_locked:
 		}
 
 		return ret ? ERR_PTR(ret) :
-			follow_page_pte(vma, address, pmd, flags);
+			follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
 	}
 	page = follow_trans_huge_pmd(vma, address, pmd, flags);
 	spin_unlock(ptl);
-	*page_mask = HPAGE_PMD_NR - 1;
+	ctx->page_mask = HPAGE_PMD_NR - 1;
 	return page;
 }
 
-
 static struct page *follow_pud_mask(struct vm_area_struct *vma,
 				    unsigned long address, p4d_t *p4dp,
-				    unsigned int flags, unsigned int *page_mask)
+				    unsigned int flags,
+				    struct follow_page_context *ctx)
 {
 	pud_t *pud;
 	spinlock_t *ptl;
@@ -344,7 +343,7 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
 	}
 	if (pud_devmap(*pud)) {
 		ptl = pud_lock(mm, pud);
-		page = follow_devmap_pud(vma, address, pud, flags);
+		page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
 		spin_unlock(ptl);
 		if (page)
 			return page;
@@ -352,13 +351,13 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
 	if (unlikely(pud_bad(*pud)))
 		return no_page_table(vma, flags);
 
-	return follow_pmd_mask(vma, address, pud, flags, page_mask);
+	return follow_pmd_mask(vma, address, pud, flags, ctx);
 }
 
-
 static struct page *follow_p4d_mask(struct vm_area_struct *vma,
 				    unsigned long address, pgd_t *pgdp,
-				    unsigned int flags, unsigned int *page_mask)
+				    unsigned int flags,
+				    struct follow_page_context *ctx)
 {
 	p4d_t *p4d;
 	struct page *page;
@@ -378,7 +377,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
 			return page;
 		return no_page_table(vma, flags);
 	}
-	return follow_pud_mask(vma, address, p4d, flags, page_mask);
+	return follow_pud_mask(vma, address, p4d, flags, ctx);
 }
 
 /**
@@ -396,13 +395,13 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
  */
 struct page *follow_page_mask(struct vm_area_struct *vma,
 			      unsigned long address, unsigned int flags,
-			      unsigned int *page_mask)
+			      struct follow_page_context *ctx)
 {
 	pgd_t *pgd;
 	struct page *page;
 	struct mm_struct *mm = vma->vm_mm;
 
-	*page_mask = 0;
+	ctx->page_mask = 0;
 
 	/* make this handle hugepd */
 	page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
@@ -431,7 +430,19 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 		return no_page_table(vma, flags);
 	}
 
-	return follow_p4d_mask(vma, address, pgd, flags, page_mask);
+	return follow_p4d_mask(vma, address, pgd, flags, ctx);
+}
+
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+			 unsigned int foll_flags)
+{
+	struct follow_page_context ctx = { NULL };
+	struct page *page;
+
+	page = follow_page_mask(vma, address, foll_flags, &ctx);
+	if (ctx.pgmap)
+		put_dev_pagemap(ctx.pgmap);
+	return page;
 }
 
 static int get_gate_page(struct mm_struct *mm, unsigned long address,
@@ -659,9 +670,9 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 		unsigned int gup_flags, struct page **pages,
 		struct vm_area_struct **vmas, int *nonblocking)
 {
-	long i = 0;
-	unsigned int page_mask;
+	long ret = 0, i = 0;
 	struct vm_area_struct *vma = NULL;
+	struct follow_page_context ctx = { NULL };
 
 	if (!nr_pages)
 		return 0;
@@ -691,12 +702,14 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 						pages ? &pages[i] : NULL);
 				if (ret)
 					return i ? : ret;
-				page_mask = 0;
+				ctx.page_mask = 0;
 				goto next_page;
 			}
 
-			if (!vma || check_vma_flags(vma, gup_flags))
-				return i ? : -EFAULT;
+			if (!vma || check_vma_flags(vma, gup_flags)) {
+				ret = -EFAULT;
+				goto out;
+			}
 			if (is_vm_hugetlb_page(vma)) {
 				i = follow_hugetlb_page(mm, vma, pages, vmas,
 						&start, &nr_pages, i,
@@ -709,23 +722,26 @@ retry:
 		 * If we have a pending SIGKILL, don't keep faulting pages and
 		 * potentially allocating memory.
 		 */
-		if (unlikely(fatal_signal_pending(current)))
-			return i ? i : -ERESTARTSYS;
+		if (unlikely(fatal_signal_pending(current))) {
+			ret = -ERESTARTSYS;
+			goto out;
+		}
 		cond_resched();
-		page = follow_page_mask(vma, start, foll_flags, &page_mask);
+
+		page = follow_page_mask(vma, start, foll_flags, &ctx);
 		if (!page) {
-			int ret;
 			ret = faultin_page(tsk, vma, start, &foll_flags,
 					nonblocking);
 			switch (ret) {
 			case 0:
 				goto retry;
+			case -EBUSY:
+				ret = 0;
+				/* FALLTHRU */
 			case -EFAULT:
 			case -ENOMEM:
 			case -EHWPOISON:
-				return i ? i : ret;
-			case -EBUSY:
-				return i;
+				goto out;
 			case -ENOENT:
 				goto next_page;
 			}
@@ -737,27 +753,31 @@ retry:
 			 */
 			goto next_page;
 		} else if (IS_ERR(page)) {
-			return i ? i : PTR_ERR(page);
+			ret = PTR_ERR(page);
+			goto out;
 		}
 		if (pages) {
 			pages[i] = page;
 			flush_anon_page(vma, page, start);
 			flush_dcache_page(page);
-			page_mask = 0;
+			ctx.page_mask = 0;
 		}
 next_page:
 		if (vmas) {
 			vmas[i] = vma;
-			page_mask = 0;
+			ctx.page_mask = 0;
 		}
-		page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
+		page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
 		if (page_increm > nr_pages)
 			page_increm = nr_pages;
 		i += page_increm;
 		start += page_increm * PAGE_SIZE;
 		nr_pages -= page_increm;
 	} while (nr_pages);
-	return i;
+out:
+	if (ctx.pgmap)
+		put_dev_pagemap(ctx.pgmap);
+	return i ? i : ret;
 }
 
 static bool vma_permits_fault(struct vm_area_struct *vma,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8ea1b36bd452..25c7d7509cf4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -852,11 +852,10 @@ static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
 }
 
 struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
-		pmd_t *pmd, int flags)
+		pmd_t *pmd, int flags, struct dev_pagemap **pgmap)
 {
 	unsigned long pfn = pmd_pfn(*pmd);
 	struct mm_struct *mm = vma->vm_mm;
-	struct dev_pagemap *pgmap;
 	struct page *page;
 
 	assert_spin_locked(pmd_lockptr(mm, pmd));
@@ -886,12 +885,11 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
 		return ERR_PTR(-EEXIST);
 
 	pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
-	pgmap = get_dev_pagemap(pfn, NULL);
-	if (!pgmap)
+	*pgmap = get_dev_pagemap(pfn, *pgmap);
+	if (!*pgmap)
 		return ERR_PTR(-EFAULT);
 	page = pfn_to_page(pfn);
 	get_page(page);
-	put_dev_pagemap(pgmap);
 
 	return page;
 }
@@ -1000,11 +998,10 @@ static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
 }
 
 struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
-		pud_t *pud, int flags)
+		pud_t *pud, int flags, struct dev_pagemap **pgmap)
 {
 	unsigned long pfn = pud_pfn(*pud);
 	struct mm_struct *mm = vma->vm_mm;
-	struct dev_pagemap *pgmap;
 	struct page *page;
 
 	assert_spin_locked(pud_lockptr(mm, pud));
@@ -1028,12 +1025,11 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
 		return ERR_PTR(-EEXIST);
 
 	pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT;
-	pgmap = get_dev_pagemap(pfn, NULL);
-	if (!pgmap)
+	*pgmap = get_dev_pagemap(pfn, *pgmap);
+	if (!*pgmap)
 		return ERR_PTR(-EFAULT);
 	page = pfn_to_page(pfn);
 	get_page(page);
-	put_dev_pagemap(pgmap);
 
 	return page;
 }
diff --git a/mm/nommu.c b/mm/nommu.c
index e4aac33216ae..749276beb109 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1709,11 +1709,9 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 	return ret;
 }
 
-struct page *follow_page_mask(struct vm_area_struct *vma,
-			      unsigned long address, unsigned int flags,
-			      unsigned int *page_mask)
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+			 unsigned int foll_flags)
 {
-	*page_mask = 0;
 	return NULL;
 }
 
-- 
cgit v1.2.3


From bc4ae27d817a4e92071ef67cb6368120cfabe7ec Mon Sep 17 00:00:00 2001
From: Omar Sandoval <osandov@fb.com>
Date: Fri, 26 Oct 2018 15:10:51 -0700
Subject: mm: split SWP_FILE into SWP_ACTIVATED and SWP_FS

The SWP_FILE flag serves two purposes: to make swap_{read,write}page() go
through the filesystem, and to make swapoff() call ->swap_deactivate().
For Btrfs, we want the latter but not the former, so split this flag into
two.  This makes us always call ->swap_deactivate() if ->swap_activate()
succeeded, not just if it didn't add any swap extents itself.

This also resolves the issue of the very misleading name of SWP_FILE,
which is only used for swap files over NFS.

Link: http://lkml.kernel.org/r/6d63d8668c4287a4f6d203d65696e96f80abdfc7.1536704650.git.osandov@fb.com
Signed-off-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: David Sterba <dsterba@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h | 13 +++++++------
 mm/page_io.c         |  6 +++---
 mm/swapfile.c        | 13 ++++++++-----
 3 files changed, 18 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index b93740d72e78..38195f5c96b1 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -167,13 +167,14 @@ enum {
 	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
 	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
 	SWP_BLKDEV	= (1 << 6),	/* its a block device */
-	SWP_FILE	= (1 << 7),	/* set after swap_activate success */
-	SWP_AREA_DISCARD = (1 << 8),	/* single-time swap area discards */
-	SWP_PAGE_DISCARD = (1 << 9),	/* freed swap page-cluster discards */
-	SWP_STABLE_WRITES = (1 << 10),	/* no overwrite PG_writeback pages */
-	SWP_SYNCHRONOUS_IO = (1 << 11),	/* synchronous IO is efficient */
+	SWP_ACTIVATED	= (1 << 7),	/* set after swap_activate success */
+	SWP_FS		= (1 << 8),	/* swap file goes through fs */
+	SWP_AREA_DISCARD = (1 << 9),	/* single-time swap area discards */
+	SWP_PAGE_DISCARD = (1 << 10),	/* freed swap page-cluster discards */
+	SWP_STABLE_WRITES = (1 << 11),	/* no overwrite PG_writeback pages */
+	SWP_SYNCHRONOUS_IO = (1 << 12),	/* synchronous IO is efficient */
 					/* add others here before... */
-	SWP_SCANNING	= (1 << 12),	/* refcount in scan_swap_map */
+	SWP_SCANNING	= (1 << 13),	/* refcount in scan_swap_map */
 };
 
 #define SWAP_CLUSTER_MAX 32UL
diff --git a/mm/page_io.c b/mm/page_io.c
index 573d3663d846..a451ffa9491c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -283,7 +283,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 	struct swap_info_struct *sis = page_swap_info(page);
 
 	VM_BUG_ON_PAGE(!PageSwapCache(page), page);
-	if (sis->flags & SWP_FILE) {
+	if (sis->flags & SWP_FS) {
 		struct kiocb kiocb;
 		struct file *swap_file = sis->swap_file;
 		struct address_space *mapping = swap_file->f_mapping;
@@ -365,7 +365,7 @@ int swap_readpage(struct page *page, bool synchronous)
 		goto out;
 	}
 
-	if (sis->flags & SWP_FILE) {
+	if (sis->flags & SWP_FS) {
 		struct file *swap_file = sis->swap_file;
 		struct address_space *mapping = swap_file->f_mapping;
 
@@ -423,7 +423,7 @@ int swap_set_page_dirty(struct page *page)
 {
 	struct swap_info_struct *sis = page_swap_info(page);
 
-	if (sis->flags & SWP_FILE) {
+	if (sis->flags & SWP_FS) {
 		struct address_space *mapping = sis->swap_file->f_mapping;
 
 		VM_BUG_ON_PAGE(!PageSwapCache(page), page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 2681e50592c5..f0c7e4c11bab 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1003,7 +1003,7 @@ start_over:
 			goto nextsi;
 		}
 		if (size == SWAPFILE_CLUSTER) {
-			if (!(si->flags & SWP_FILE))
+			if (!(si->flags & SWP_FS))
 				n_ret = swap_alloc_cluster(si, swp_entries);
 		} else
 			n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
@@ -2299,12 +2299,13 @@ static void destroy_swap_extents(struct swap_info_struct *sis)
 		kfree(se);
 	}
 
-	if (sis->flags & SWP_FILE) {
+	if (sis->flags & SWP_ACTIVATED) {
 		struct file *swap_file = sis->swap_file;
 		struct address_space *mapping = swap_file->f_mapping;
 
-		sis->flags &= ~SWP_FILE;
-		mapping->a_ops->swap_deactivate(swap_file);
+		sis->flags &= ~SWP_ACTIVATED;
+		if (mapping->a_ops->swap_deactivate)
+			mapping->a_ops->swap_deactivate(swap_file);
 	}
 }
 
@@ -2400,8 +2401,10 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
 
 	if (mapping->a_ops->swap_activate) {
 		ret = mapping->a_ops->swap_activate(sis, swap_file, span);
+		if (ret >= 0)
+			sis->flags |= SWP_ACTIVATED;
 		if (!ret) {
-			sis->flags |= SWP_FILE;
+			sis->flags |= SWP_FS;
 			ret = add_swap_extent(sis, 0, sis->max, 0);
 			*span = sis->pages;
 		}
-- 
cgit v1.2.3


From ae74136b4bb64440a55117e12065b8c282ab6c1a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Wed, 3 Oct 2018 12:01:22 -0400
Subject: SUNRPC: Allow cache lookups to use RCU protection rather than the r/w
 spinlock

Instead of the reader/writer spinlock, allow cache lookups to use RCU
for looking up entries. This is more efficient since modifications can
occur while other entries are being looked up.

Note that for now, we keep the reader/writer spinlock until all users
have been converted to use RCU-safe freeing of their cache entries.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h | 12 ++++++
 net/sunrpc/cache.c           | 93 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 91 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index 40d2822f0e2f..cf3e17ee2786 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -167,6 +167,9 @@ extern const struct file_operations cache_file_operations_pipefs;
 extern const struct file_operations content_file_operations_pipefs;
 extern const struct file_operations cache_flush_operations_pipefs;
 
+extern struct cache_head *
+sunrpc_cache_lookup_rcu(struct cache_detail *detail,
+			struct cache_head *key, int hash);
 extern struct cache_head *
 sunrpc_cache_lookup(struct cache_detail *detail,
 		    struct cache_head *key, int hash);
@@ -186,6 +189,12 @@ static inline struct cache_head  *cache_get(struct cache_head *h)
 	return h;
 }
 
+static inline struct cache_head  *cache_get_rcu(struct cache_head *h)
+{
+	if (kref_get_unless_zero(&h->ref))
+		return h;
+	return NULL;
+}
 
 static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
 {
@@ -227,6 +236,9 @@ extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
 extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
 extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos);
 extern void cache_seq_stop(struct seq_file *file, void *p);
+extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos);
+extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos);
+extern void cache_seq_stop_rcu(struct seq_file *file, void *p);
 
 extern void qword_add(char **bpp, int *lp, char *str);
 extern void qword_addhex(char **bpp, int *lp, char *buf, int blen);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index aa8e62d61f4d..7593afed9036 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -54,6 +54,27 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail)
 	h->last_refresh = now;
 }
 
+static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
+						struct cache_head *key,
+						int hash)
+{
+	struct hlist_head *head = &detail->hash_table[hash];
+	struct cache_head *tmp;
+
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(tmp, head, cache_list) {
+		if (detail->match(tmp, key)) {
+			if (cache_is_expired(detail, tmp))
+				continue;
+			tmp = cache_get_rcu(tmp);
+			rcu_read_unlock();
+			return tmp;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
+}
+
 static struct cache_head *sunrpc_cache_find(struct cache_detail *detail,
 					    struct cache_head *key, int hash)
 {
@@ -61,7 +82,6 @@ static struct cache_head *sunrpc_cache_find(struct cache_detail *detail,
 	struct cache_head *tmp;
 
 	read_lock(&detail->hash_lock);
-
 	hlist_for_each_entry(tmp, head, cache_list) {
 		if (detail->match(tmp, key)) {
 			if (cache_is_expired(detail, tmp))
@@ -96,10 +116,10 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 	write_lock(&detail->hash_lock);
 
 	/* check if entry appeared while we slept */
-	hlist_for_each_entry(tmp, head, cache_list) {
+	hlist_for_each_entry_rcu(tmp, head, cache_list) {
 		if (detail->match(tmp, key)) {
 			if (cache_is_expired(detail, tmp)) {
-				hlist_del_init(&tmp->cache_list);
+				hlist_del_init_rcu(&tmp->cache_list);
 				detail->entries --;
 				freeme = tmp;
 				break;
@@ -111,7 +131,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 		}
 	}
 
-	hlist_add_head(&new->cache_list, head);
+	hlist_add_head_rcu(&new->cache_list, head);
 	detail->entries++;
 	cache_get(new);
 	write_unlock(&detail->hash_lock);
@@ -121,6 +141,19 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 	return new;
 }
 
+struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail,
+					   struct cache_head *key, int hash)
+{
+	struct cache_head *ret;
+
+	ret = sunrpc_cache_find_rcu(detail, key, hash);
+	if (ret)
+		return ret;
+	/* Didn't find anything, insert an empty entry */
+	return sunrpc_cache_add_entry(detail, key, hash);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu);
+
 struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 				       struct cache_head *key, int hash)
 {
@@ -134,6 +167,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
 
+
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
 static void cache_fresh_locked(struct cache_head *head, time_t expiry,
@@ -450,7 +484,7 @@ static int cache_clean(void)
 			if (!cache_is_expired(current_detail, ch))
 				continue;
 
-			hlist_del_init(&ch->cache_list);
+			hlist_del_init_rcu(&ch->cache_list);
 			current_detail->entries--;
 			rv = 1;
 			break;
@@ -521,7 +555,7 @@ void cache_purge(struct cache_detail *detail)
 	for (i = 0; i < detail->hash_size; i++) {
 		head = &detail->hash_table[i];
 		hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
-			hlist_del_init(&ch->cache_list);
+			hlist_del_init_rcu(&ch->cache_list);
 			detail->entries--;
 
 			set_bit(CACHE_CLEANED, &ch->flags);
@@ -1306,21 +1340,19 @@ EXPORT_SYMBOL_GPL(qword_get);
  * get a header, then pass each real item in the cache
  */
 
-void *cache_seq_start(struct seq_file *m, loff_t *pos)
-	__acquires(cd->hash_lock)
+static void *__cache_seq_start(struct seq_file *m, loff_t *pos)
 {
 	loff_t n = *pos;
 	unsigned int hash, entry;
 	struct cache_head *ch;
 	struct cache_detail *cd = m->private;
 
-	read_lock(&cd->hash_lock);
 	if (!n--)
 		return SEQ_START_TOKEN;
 	hash = n >> 32;
 	entry = n & ((1LL<<32) - 1);
 
-	hlist_for_each_entry(ch, &cd->hash_table[hash], cache_list)
+	hlist_for_each_entry_rcu(ch, &cd->hash_table[hash], cache_list)
 		if (!entry--)
 			return ch;
 	n &= ~((1LL<<32) - 1);
@@ -1332,9 +1364,19 @@ void *cache_seq_start(struct seq_file *m, loff_t *pos)
 	if (hash >= cd->hash_size)
 		return NULL;
 	*pos = n+1;
-	return hlist_entry_safe(cd->hash_table[hash].first,
+	return hlist_entry_safe(rcu_dereference_raw(
+				hlist_first_rcu(&cd->hash_table[hash])),
 				struct cache_head, cache_list);
 }
+
+void *cache_seq_start(struct seq_file *m, loff_t *pos)
+	__acquires(cd->hash_lock)
+{
+	struct cache_detail *cd = m->private;
+
+	read_lock(&cd->hash_lock);
+	return __cache_seq_start(m, pos);
+}
 EXPORT_SYMBOL_GPL(cache_seq_start);
 
 void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
@@ -1350,7 +1392,8 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 		*pos += 1LL<<32;
 	} else {
 		++*pos;
-		return hlist_entry_safe(ch->cache_list.next,
+		return hlist_entry_safe(rcu_dereference_raw(
+					hlist_next_rcu(&ch->cache_list)),
 					struct cache_head, cache_list);
 	}
 	*pos &= ~((1LL<<32) - 1);
@@ -1362,7 +1405,8 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 	if (hash >= cd->hash_size)
 		return NULL;
 	++*pos;
-	return hlist_entry_safe(cd->hash_table[hash].first,
+	return hlist_entry_safe(rcu_dereference_raw(
+				hlist_first_rcu(&cd->hash_table[hash])),
 				struct cache_head, cache_list);
 }
 EXPORT_SYMBOL_GPL(cache_seq_next);
@@ -1375,6 +1419,27 @@ void cache_seq_stop(struct seq_file *m, void *p)
 }
 EXPORT_SYMBOL_GPL(cache_seq_stop);
 
+void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
+	__acquires(RCU)
+{
+	rcu_read_lock();
+	return __cache_seq_start(m, pos);
+}
+EXPORT_SYMBOL_GPL(cache_seq_start_rcu);
+
+void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos)
+{
+	return cache_seq_next(file, p, pos);
+}
+EXPORT_SYMBOL_GPL(cache_seq_next_rcu);
+
+void cache_seq_stop_rcu(struct seq_file *m, void *p)
+	__releases(RCU)
+{
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(cache_seq_stop_rcu);
+
 static int c_show(struct seq_file *m, void *p)
 {
 	struct cache_head *cp = p;
@@ -1863,7 +1928,7 @@ void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
 {
 	write_lock(&cd->hash_lock);
 	if (!hlist_unhashed(&h->cache_list)){
-		hlist_del_init(&h->cache_list);
+		hlist_del_init_rcu(&h->cache_list);
 		cd->entries--;
 		write_unlock(&cd->hash_lock);
 		cache_put(h, cd);
-- 
cgit v1.2.3


From d48cf356a13073853f19be6ca5ebbecfc2762ebe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Mon, 1 Oct 2018 10:41:51 -0400
Subject: SUNRPC: Remove non-RCU protected lookup

Clean up the cache code by removing the non-RCU protected lookup.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 Documentation/filesystems/nfs/rpc-cache.txt |  6 +--
 include/linux/sunrpc/cache.h                |  6 ---
 net/sunrpc/cache.c                          | 61 ++---------------------------
 3 files changed, 7 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/nfs/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.txt
index ebcaaee21616..c4dac829db0f 100644
--- a/Documentation/filesystems/nfs/rpc-cache.txt
+++ b/Documentation/filesystems/nfs/rpc-cache.txt
@@ -84,7 +84,7 @@ Creating a Cache
 		A message from user space has arrived to fill out a
 		cache entry.  It is in 'buf' of length 'len'.
 		cache_parse should parse this, find the item in the
-		cache with sunrpc_cache_lookup, and update the item
+		cache with sunrpc_cache_lookup_rcu, and update the item
 		with sunrpc_cache_update.
 
 
@@ -95,7 +95,7 @@ Creating a Cache
 Using a cache
 -------------
 
-To find a value in a cache, call sunrpc_cache_lookup passing a pointer
+To find a value in a cache, call sunrpc_cache_lookup_rcu passing a pointer
 to the cache_head in a sample item with the 'key' fields filled in.
 This will be passed to ->match to identify the target entry.  If no
 entry is found, a new entry will be create, added to the cache, and
@@ -116,7 +116,7 @@ item does become valid, the deferred copy of the request will be
 revisited (->revisit).  It is expected that this method will
 reschedule the request for processing.
 
-The value returned by sunrpc_cache_lookup can also be passed to
+The value returned by sunrpc_cache_lookup_rcu can also be passed to
 sunrpc_cache_update to set the content for the item.  A second item is
 passed which should hold the content.  If the item found by _lookup
 has valid data, then it is discarded and a new item is created.  This
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index cf3e17ee2786..c3d67e893430 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -171,9 +171,6 @@ extern struct cache_head *
 sunrpc_cache_lookup_rcu(struct cache_detail *detail,
 			struct cache_head *key, int hash);
 extern struct cache_head *
-sunrpc_cache_lookup(struct cache_detail *detail,
-		    struct cache_head *key, int hash);
-extern struct cache_head *
 sunrpc_cache_update(struct cache_detail *detail,
 		    struct cache_head *new, struct cache_head *old, int hash);
 
@@ -233,9 +230,6 @@ extern void sunrpc_cache_unregister_pipefs(struct cache_detail *);
 extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *);
 
 /* Must store cache_detail in seq_file->private if using next three functions */
-extern void *cache_seq_start(struct seq_file *file, loff_t *pos);
-extern void *cache_seq_next(struct seq_file *file, void *p, loff_t *pos);
-extern void cache_seq_stop(struct seq_file *file, void *p);
 extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos);
 extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos);
 extern void cache_seq_stop_rcu(struct seq_file *file, void *p);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 7593afed9036..593cf8607414 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -75,27 +75,6 @@ static struct cache_head *sunrpc_cache_find_rcu(struct cache_detail *detail,
 	return NULL;
 }
 
-static struct cache_head *sunrpc_cache_find(struct cache_detail *detail,
-					    struct cache_head *key, int hash)
-{
-	struct hlist_head *head = &detail->hash_table[hash];
-	struct cache_head *tmp;
-
-	read_lock(&detail->hash_lock);
-	hlist_for_each_entry(tmp, head, cache_list) {
-		if (detail->match(tmp, key)) {
-			if (cache_is_expired(detail, tmp))
-				/* This entry is expired, we will discard it. */
-				break;
-			cache_get(tmp);
-			read_unlock(&detail->hash_lock);
-			return tmp;
-		}
-	}
-	read_unlock(&detail->hash_lock);
-	return NULL;
-}
-
 static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 						 struct cache_head *key,
 						 int hash)
@@ -154,20 +133,6 @@ struct cache_head *sunrpc_cache_lookup_rcu(struct cache_detail *detail,
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_lookup_rcu);
 
-struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
-				       struct cache_head *key, int hash)
-{
-	struct cache_head *ret;
-
-	ret = sunrpc_cache_find(detail, key, hash);
-	if (ret)
-		return ret;
-	/* Didn't find anything, insert an empty entry */
-	return sunrpc_cache_add_entry(detail, key, hash);
-}
-EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
-
-
 static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
 
 static void cache_fresh_locked(struct cache_head *head, time_t expiry,
@@ -1369,17 +1334,7 @@ static void *__cache_seq_start(struct seq_file *m, loff_t *pos)
 				struct cache_head, cache_list);
 }
 
-void *cache_seq_start(struct seq_file *m, loff_t *pos)
-	__acquires(cd->hash_lock)
-{
-	struct cache_detail *cd = m->private;
-
-	read_lock(&cd->hash_lock);
-	return __cache_seq_start(m, pos);
-}
-EXPORT_SYMBOL_GPL(cache_seq_start);
-
-void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
+static void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 {
 	struct cache_head *ch = p;
 	int hash = (*pos >> 32);
@@ -1411,14 +1366,6 @@ void *cache_seq_next(struct seq_file *m, void *p, loff_t *pos)
 }
 EXPORT_SYMBOL_GPL(cache_seq_next);
 
-void cache_seq_stop(struct seq_file *m, void *p)
-	__releases(cd->hash_lock)
-{
-	struct cache_detail *cd = m->private;
-	read_unlock(&cd->hash_lock);
-}
-EXPORT_SYMBOL_GPL(cache_seq_stop);
-
 void *cache_seq_start_rcu(struct seq_file *m, loff_t *pos)
 	__acquires(RCU)
 {
@@ -1466,9 +1413,9 @@ static int c_show(struct seq_file *m, void *p)
 }
 
 static const struct seq_operations cache_content_op = {
-	.start	= cache_seq_start,
-	.next	= cache_seq_next,
-	.stop	= cache_seq_stop,
+	.start	= cache_seq_start_rcu,
+	.next	= cache_seq_next_rcu,
+	.stop	= cache_seq_stop_rcu,
 	.show	= c_show,
 };
 
-- 
cgit v1.2.3


From 1863d77f15da0addcd293a1719fa5d3ef8cde3ca Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trondmy@gmail.com>
Date: Mon, 1 Oct 2018 10:41:52 -0400
Subject: SUNRPC: Replace the cache_detail->hash_lock with a regular spinlock

Now that the reader functions are all RCU protected, use a regular
spinlock rather than a reader/writer lock.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/cache.h |  2 +-
 net/sunrpc/cache.c           | 46 ++++++++++++++++++++++----------------------
 2 files changed, 24 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index c3d67e893430..5a3e95017fc6 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -67,7 +67,7 @@ struct cache_detail {
 	struct module *		owner;
 	int			hash_size;
 	struct hlist_head *	hash_table;
-	rwlock_t		hash_lock;
+	spinlock_t		hash_lock;
 
 	char			*name;
 	void			(*cache_put)(struct kref *);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 593cf8607414..f96345b1180e 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -92,7 +92,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 	cache_init(new, detail);
 	detail->init(new, key);
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 
 	/* check if entry appeared while we slept */
 	hlist_for_each_entry_rcu(tmp, head, cache_list) {
@@ -104,7 +104,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 				break;
 			}
 			cache_get(tmp);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_put(new, detail);
 			return tmp;
 		}
@@ -113,7 +113,7 @@ static struct cache_head *sunrpc_cache_add_entry(struct cache_detail *detail,
 	hlist_add_head_rcu(&new->cache_list, head);
 	detail->entries++;
 	cache_get(new);
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 
 	if (freeme)
 		cache_put(freeme, detail);
@@ -167,18 +167,18 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	struct cache_head *tmp;
 
 	if (!test_bit(CACHE_VALID, &old->flags)) {
-		write_lock(&detail->hash_lock);
+		spin_lock(&detail->hash_lock);
 		if (!test_bit(CACHE_VALID, &old->flags)) {
 			if (test_bit(CACHE_NEGATIVE, &new->flags))
 				set_bit(CACHE_NEGATIVE, &old->flags);
 			else
 				detail->update(old, new);
 			cache_fresh_locked(old, new->expiry_time, detail);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(old, detail);
 			return old;
 		}
-		write_unlock(&detail->hash_lock);
+		spin_unlock(&detail->hash_lock);
 	}
 	/* We need to insert a new entry */
 	tmp = detail->alloc();
@@ -189,7 +189,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	cache_init(tmp, detail);
 	detail->init(tmp, old);
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	if (test_bit(CACHE_NEGATIVE, &new->flags))
 		set_bit(CACHE_NEGATIVE, &tmp->flags);
 	else
@@ -199,7 +199,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
 	cache_get(tmp);
 	cache_fresh_locked(tmp, new->expiry_time, detail);
 	cache_fresh_locked(old, 0, detail);
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(tmp, detail);
 	cache_fresh_unlocked(old, detail);
 	cache_put(old, detail);
@@ -239,7 +239,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 {
 	int rv;
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	rv = cache_is_valid(h);
 	if (rv == -EAGAIN) {
 		set_bit(CACHE_NEGATIVE, &h->flags);
@@ -247,7 +247,7 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
 				   detail);
 		rv = -ENOENT;
 	}
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 	cache_fresh_unlocked(h, detail);
 	return rv;
 }
@@ -357,7 +357,7 @@ static struct delayed_work cache_cleaner;
 
 void sunrpc_init_cache_detail(struct cache_detail *cd)
 {
-	rwlock_init(&cd->hash_lock);
+	spin_lock_init(&cd->hash_lock);
 	INIT_LIST_HEAD(&cd->queue);
 	spin_lock(&cache_list_lock);
 	cd->nextcheck = 0;
@@ -377,11 +377,11 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
 {
 	cache_purge(cd);
 	spin_lock(&cache_list_lock);
-	write_lock(&cd->hash_lock);
+	spin_lock(&cd->hash_lock);
 	if (current_detail == cd)
 		current_detail = NULL;
 	list_del_init(&cd->others);
-	write_unlock(&cd->hash_lock);
+	spin_unlock(&cd->hash_lock);
 	spin_unlock(&cache_list_lock);
 	if (list_empty(&cache_list)) {
 		/* module must be being unloaded so its safe to kill the worker */
@@ -438,7 +438,7 @@ static int cache_clean(void)
 		struct hlist_head *head;
 		struct hlist_node *tmp;
 
-		write_lock(&current_detail->hash_lock);
+		spin_lock(&current_detail->hash_lock);
 
 		/* Ok, now to clean this strand */
 
@@ -455,7 +455,7 @@ static int cache_clean(void)
 			break;
 		}
 
-		write_unlock(&current_detail->hash_lock);
+		spin_unlock(&current_detail->hash_lock);
 		d = current_detail;
 		if (!ch)
 			current_index ++;
@@ -510,9 +510,9 @@ void cache_purge(struct cache_detail *detail)
 	struct hlist_node *tmp = NULL;
 	int i = 0;
 
-	write_lock(&detail->hash_lock);
+	spin_lock(&detail->hash_lock);
 	if (!detail->entries) {
-		write_unlock(&detail->hash_lock);
+		spin_unlock(&detail->hash_lock);
 		return;
 	}
 
@@ -524,13 +524,13 @@ void cache_purge(struct cache_detail *detail)
 			detail->entries--;
 
 			set_bit(CACHE_CLEANED, &ch->flags);
-			write_unlock(&detail->hash_lock);
+			spin_unlock(&detail->hash_lock);
 			cache_fresh_unlocked(ch, detail);
 			cache_put(ch, detail);
-			write_lock(&detail->hash_lock);
+			spin_lock(&detail->hash_lock);
 		}
 	}
-	write_unlock(&detail->hash_lock);
+	spin_unlock(&detail->hash_lock);
 }
 EXPORT_SYMBOL_GPL(cache_purge);
 
@@ -1873,13 +1873,13 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
 
 void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
 {
-	write_lock(&cd->hash_lock);
+	spin_lock(&cd->hash_lock);
 	if (!hlist_unhashed(&h->cache_list)){
 		hlist_del_init_rcu(&h->cache_list);
 		cd->entries--;
-		write_unlock(&cd->hash_lock);
+		spin_unlock(&cd->hash_lock);
 		cache_put(h, cd);
 	} else
-		write_unlock(&cd->hash_lock);
+		spin_unlock(&cd->hash_lock);
 }
 EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
-- 
cgit v1.2.3


From 3ae2cefb613b00d613677c05ffa384b4f660f468 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 1 Oct 2018 14:16:11 -0400
Subject: svcrdma: Increase the default connection credit limit

Reduce queuing on clients by allowing more credits by default.

64 is the default NFSv4.1 slot table size on Linux clients. This
size prevents the credit limit from putting RPC requests to sleep
again after they have already slept waiting for a session slot.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_rdma.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index fd78f78df5c6..e6e26918504c 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -113,13 +113,14 @@ struct svcxprt_rdma {
 /* sc_flags */
 #define RDMAXPRT_CONN_PENDING	3
 
-#define RPCRDMA_LISTEN_BACKLOG  10
-#define RPCRDMA_MAX_REQUESTS    32
-
-/* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our
- * current NFSv4.1 implementation supports one backchannel slot.
+/*
+ * Default connection parameters
  */
-#define RPCRDMA_MAX_BC_REQUESTS	2
+enum {
+	RPCRDMA_LISTEN_BACKLOG	= 10,
+	RPCRDMA_MAX_REQUESTS	= 64,
+	RPCRDMA_MAX_BC_REQUESTS	= 2,
+};
 
 #define RPCSVC_MAXPAYLOAD_RDMA	RPCSVC_MAXPAYLOAD
 
-- 
cgit v1.2.3


From 1383a7ed67490fb00d793e36c7a4d599ff88a64d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:40:31 +1100
Subject: vfs: check file ranges before cloning files

Move the file range checks from vfs_clone_file_prep into a separate
generic_remap_checks function so that all the checks are collected in a
central location.  This forms the basis for adding more checks from
generic_write_checks that will make cloning's input checking more
consistent with write input checking.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/refcounttree.c |  2 +-
 fs/read_write.c         | 55 ++++++++++-----------------------------
 fs/xfs/xfs_reflink.c    |  2 +-
 include/linux/fs.h      |  9 ++++---
 mm/filemap.c            | 69 +++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 7a5ee145c733..19e03936c5e1 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
 			&len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/fs/read_write.c b/fs/read_write.c
index 260797b01851..d6e8e242a15f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1717,13 +1717,12 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
  * Returns: 0 for "nothing to clone", 1 for "something to clone", or
  * the usual negative error code.
  */
-int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-			       struct inode *inode_out, loff_t pos_out,
-			       u64 *len, bool is_dedupe)
+int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
+			struct file *file_out, loff_t pos_out,
+			u64 *len, bool is_dedupe)
 {
-	loff_t bs = inode_out->i_sb->s_blocksize;
-	loff_t blen;
-	loff_t isize;
+	struct inode *inode_in = file_inode(file_in);
+	struct inode *inode_out = file_inode(file_out);
 	bool same_inode = (inode_in == inode_out);
 	int ret;
 
@@ -1740,10 +1739,10 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
 		return -EINVAL;
 
-	isize = i_size_read(inode_in);
-
 	/* Zero length dedupe exits immediately; reflink goes to EOF. */
 	if (*len == 0) {
+		loff_t isize = i_size_read(inode_in);
+
 		if (is_dedupe || pos_in == isize)
 			return 0;
 		if (pos_in > isize)
@@ -1751,36 +1750,11 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 		*len = isize - pos_in;
 	}
 
-	/* Ensure offsets don't wrap and the input is inside i_size */
-	if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
-	    pos_in + *len > isize)
-		return -EINVAL;
-
-	/* Don't allow dedupe past EOF in the dest file */
-	if (is_dedupe) {
-		loff_t	disize;
-
-		disize = i_size_read(inode_out);
-		if (pos_out >= disize || pos_out + *len > disize)
-			return -EINVAL;
-	}
-
-	/* If we're linking to EOF, continue to the block boundary. */
-	if (pos_in + *len == isize)
-		blen = ALIGN(isize, bs) - pos_in;
-	else
-		blen = *len;
-
-	/* Only reflink if we're aligned to block boundaries */
-	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
-	    !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
-		return -EINVAL;
-
-	/* Don't allow overlapped reflink within the same file */
-	if (same_inode) {
-		if (pos_out + blen > pos_in && pos_out < pos_in + blen)
-			return -EINVAL;
-	}
+	/* Check that we don't violate system file offset limits. */
+	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
+			is_dedupe);
+	if (ret)
+		return ret;
 
 	/* Wait for the completion of any pending IOs on both files */
 	inode_dio_wait(inode_in);
@@ -1813,7 +1787,7 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
 
 	return 1;
 }
-EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
+EXPORT_SYMBOL(vfs_clone_file_prep);
 
 int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			struct file *file_out, loff_t pos_out, u64 len)
@@ -1851,9 +1825,6 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (ret)
 		return ret;
 
-	if (pos_in + len > i_size_read(inode_in))
-		return -EINVAL;
-
 	ret = file_in->f_op->clone_file_range(file_in, pos_in,
 			file_out, pos_out, len);
 	if (!ret) {
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 42ea7bab9144..281d5f53f2ec 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1326,7 +1326,7 @@ xfs_reflink_remap_prep(
 	if (IS_DAX(inode_in) || IS_DAX(inode_out))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep_inodes(inode_in, pos_in, inode_out, pos_out,
+	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
 			len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 897eae8faee1..ba93a6e7dac4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *, rwf_t);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
-extern int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
-				      struct inode *inode_out, loff_t pos_out,
-				      u64 *len, bool is_dedupe);
+extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
+			       struct file *file_out, loff_t pos_out,
+			       u64 *count, bool is_dedupe);
 extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
@@ -2967,6 +2967,9 @@ extern int sb_min_blocksize(struct super_block *, int);
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
+extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				uint64_t *count, bool is_dedupe);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 52517f28e6f4..47e6bfd45a91 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2974,6 +2974,75 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL(generic_write_checks);
 
+/*
+ * Performs necessary checks before doing a clone.
+ *
+ * Can adjust amount of bytes to clone.
+ * Returns appropriate error code that caller should return or
+ * zero in case the clone should be allowed.
+ */
+int generic_remap_checks(struct file *file_in, loff_t pos_in,
+			 struct file *file_out, loff_t pos_out,
+			 uint64_t *req_count, bool is_dedupe)
+{
+	struct inode *inode_in = file_in->f_mapping->host;
+	struct inode *inode_out = file_out->f_mapping->host;
+	uint64_t count = *req_count;
+	uint64_t bcount;
+	loff_t size_in, size_out;
+	loff_t bs = inode_out->i_sb->s_blocksize;
+
+	/* The start of both ranges must be aligned to an fs block. */
+	if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_out, bs))
+		return -EINVAL;
+
+	/* Ensure offsets don't wrap. */
+	if (pos_in + count < pos_in || pos_out + count < pos_out)
+		return -EINVAL;
+
+	size_in = i_size_read(inode_in);
+	size_out = i_size_read(inode_out);
+
+	/* Dedupe requires both ranges to be within EOF. */
+	if (is_dedupe &&
+	    (pos_in >= size_in || pos_in + count > size_in ||
+	     pos_out >= size_out || pos_out + count > size_out))
+		return -EINVAL;
+
+	/* Ensure the infile range is within the infile. */
+	if (pos_in >= size_in)
+		return -EINVAL;
+	count = min(count, size_in - (uint64_t)pos_in);
+
+	/*
+	 * If the user wanted us to link to the infile's EOF, round up to the
+	 * next block boundary for this check.
+	 *
+	 * Otherwise, make sure the count is also block-aligned, having
+	 * already confirmed the starting offsets' block alignment.
+	 */
+	if (pos_in + count == size_in) {
+		bcount = ALIGN(size_in, bs) - pos_in;
+	} else {
+		if (!IS_ALIGNED(count, bs))
+			return -EINVAL;
+
+		bcount = count;
+	}
+
+	/* Don't allow overlapped cloning within the same file. */
+	if (inode_in == inode_out &&
+	    pos_out + bcount > pos_in &&
+	    pos_out < pos_in + bcount)
+		return -EINVAL;
+
+	/* For now we don't support changing the length. */
+	if (*req_count != count)
+		return -EINVAL;
+
+	return 0;
+}
+
 int pagecache_write_begin(struct file *file, struct address_space *mapping,
 				loff_t pos, unsigned len, unsigned flags,
 				struct page **pagep, void **fsdata)
-- 
cgit v1.2.3


From a83ab01a62e61616ebb8b97f90f568c1214dc10d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:08 +1100
Subject: vfs: rename vfs_clone_file_prep to be more descriptive

The vfs_clone_file_prep is a generic function to be called by filesystem
implementations only.  Rename the prefix to generic_ and make it more
clear that it applies to remap operations, not just clones.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/refcounttree.c | 2 +-
 fs/read_write.c         | 8 ++++----
 fs/xfs/xfs_reflink.c    | 2 +-
 include/linux/fs.h      | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 19e03936c5e1..36c56dfbe485 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4850,7 +4850,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 	    (OCFS2_I(inode_out)->ip_flags & OCFS2_INODE_SYSTEM_FILE))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
+	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
 			&len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/fs/read_write.c b/fs/read_write.c
index f5395d8da741..aca75a97a695 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1745,9 +1745,9 @@ static int generic_remap_check_len(struct inode *inode_in,
  * Returns: 0 for "nothing to clone", 1 for "something to clone", or
  * the usual negative error code.
  */
-int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
-			struct file *file_out, loff_t pos_out,
-			u64 *len, bool is_dedupe)
+int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+				  struct file *file_out, loff_t pos_out,
+				  u64 *len, bool is_dedupe)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -1822,7 +1822,7 @@ int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
 
 	return 1;
 }
-EXPORT_SYMBOL(vfs_clone_file_prep);
+EXPORT_SYMBOL(generic_remap_file_range_prep);
 
 int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			struct file *file_out, loff_t pos_out, u64 len)
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 281d5f53f2ec..a7757a128a78 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1326,7 +1326,7 @@ xfs_reflink_remap_prep(
 	if (IS_DAX(inode_in) || IS_DAX(inode_out))
 		goto out_unlock;
 
-	ret = vfs_clone_file_prep(file_in, pos_in, file_out, pos_out,
+	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
 			len, is_dedupe);
 	if (ret <= 0)
 		goto out_unlock;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ba93a6e7dac4..55729e1c2e75 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1825,9 +1825,9 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *, rwf_t);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
-extern int vfs_clone_file_prep(struct file *file_in, loff_t pos_in,
-			       struct file *file_out, loff_t pos_out,
-			       u64 *count, bool is_dedupe);
+extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
+					 struct file *file_out, loff_t pos_out,
+					 u64 *count, bool is_dedupe);
 extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-- 
cgit v1.2.3


From 2e5dfc99f2e61c42083ba742395e7a7b353513d1 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:21 +1100
Subject: vfs: combine the clone and dedupe into a single remap_file_range

Combine the clone_file_range and dedupe_file_range operations into a
single remap_file_range file operation dispatch since they're
fundamentally the same operation.  The differences between the two can
be made in the prep functions.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 Documentation/filesystems/porting |  5 +++++
 Documentation/filesystems/vfs.txt | 20 +++++++++++------
 fs/btrfs/ctree.h                  |  8 +++----
 fs/btrfs/file.c                   |  3 +--
 fs/btrfs/ioctl.c                  | 45 ++++++++++++++++++++-------------------
 fs/cifs/cifsfs.c                  | 22 +++++++++++--------
 fs/nfs/nfs4file.c                 | 10 ++++++---
 fs/ocfs2/file.c                   | 24 +++++++--------------
 fs/overlayfs/file.c               | 30 +++++++++++++++-----------
 fs/read_write.c                   | 18 ++++++++--------
 fs/xfs/xfs_file.c                 | 23 ++++++--------------
 include/linux/fs.h                | 25 ++++++++++++++++++----
 12 files changed, 127 insertions(+), 106 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 7b7b845c490a..e6d4466268dd 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -622,3 +622,8 @@ in your dentry operations instead.
 	alloc_file_clone(file, flags, ops) does not affect any caller's references.
 	On success you get a new struct file sharing the mount/dentry with the
 	original, on failure - ERR_PTR().
+--
+[mandatory]
+	->clone_file_range() and ->dedupe_file_range have been replaced with
+	->remap_file_range().  See Documentation/filesystems/vfs.txt for more
+	information.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index a6c6a8af48a2..6f5babfee27b 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -883,8 +883,9 @@ struct file_operations {
 	unsigned (*mmap_capabilities)(struct file *);
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
-	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
-	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t, u64);
+	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 };
 
@@ -960,11 +961,16 @@ otherwise noted.
 
   copy_file_range: called by the copy_file_range(2) system call.
 
-  clone_file_range: called by the ioctl(2) system call for FICLONERANGE and
-	FICLONE commands.
-
-  dedupe_file_range: called by the ioctl(2) system call for FIDEDUPERANGE
-	command.
+  remap_file_range: called by the ioctl(2) system call for FICLONERANGE and
+	FICLONE and FIDEDUPERANGE commands to remap file ranges.  An
+	implementation should remap len bytes at pos_in of the source file into
+	the dest file at pos_out.  Implementations must handle callers passing
+	in len == 0; this means "remap to the end of the source file".  The
+	return value should be zero if all bytes were remapped, or the usual
+	negative error code if the remapping did not succeed completely.
+	The remap_flags parameter accepts REMAP_FILE_* flags.  If
+	REMAP_FILE_DEDUP is set then the implementation must only remap if the
+	requested file ranges have identical contents.
 
   fadvise: possibly called by the fadvise64() system call.
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 2cddfe7806a4..124a05662fc2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3218,9 +3218,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
 				struct btrfs_ioctl_space_info *space);
 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
 			       struct btrfs_ioctl_balance_args *bargs);
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
-			    struct file *dst_file, loff_t dst_loff,
-			    u64 olen);
 
 /* file.c */
 int __init btrfs_auto_defrag_init(void);
@@ -3250,8 +3247,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
 		      struct extent_state **cached);
 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
-int btrfs_clone_file_range(struct file *file_in, loff_t pos_in,
-			   struct file *file_out, loff_t pos_out, u64 len);
+int btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
+			   struct file *file_out, loff_t pos_out, u64 len,
+			   unsigned int remap_flags);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 2be00e873e92..9a963f061393 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3269,8 +3269,7 @@ const struct file_operations btrfs_file_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= btrfs_compat_ioctl,
 #endif
-	.clone_file_range = btrfs_clone_file_range,
-	.dedupe_file_range = btrfs_dedupe_file_range,
+	.remap_file_range = btrfs_remap_file_range,
 };
 
 void __cold btrfs_auto_defrag_exit(void)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d60b6caf09e8..bfd99c66723e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3627,26 +3627,6 @@ out_unlock:
 	return ret;
 }
 
-int btrfs_dedupe_file_range(struct file *src_file, loff_t src_loff,
-			    struct file *dst_file, loff_t dst_loff,
-			    u64 olen)
-{
-	struct inode *src = file_inode(src_file);
-	struct inode *dst = file_inode(dst_file);
-	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
-
-	if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
-		/*
-		 * Btrfs does not support blocksize < page_size. As a
-		 * result, btrfs_cmp_data() won't correctly handle
-		 * this situation without an update.
-		 */
-		return -EINVAL;
-	}
-
-	return btrfs_extent_same(src, src_loff, olen, dst, dst_loff);
-}
-
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 				     struct inode *inode,
 				     u64 endoff,
@@ -4348,9 +4328,30 @@ out_unlock:
 	return ret;
 }
 
-int btrfs_clone_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len)
+int btrfs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, u64 len,
+		unsigned int remap_flags)
 {
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
+
+	if (remap_flags & REMAP_FILE_DEDUP) {
+		struct inode *src = file_inode(src_file);
+		struct inode *dst = file_inode(dst_file);
+		u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+
+		if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
+			/*
+			 * Btrfs does not support blocksize < page_size. As a
+			 * result, btrfs_cmp_data() won't correctly handle
+			 * this situation without an update.
+			 */
+			return -EINVAL;
+		}
+
+		return btrfs_extent_same(src, off, len, dst, destoff);
+	}
+
 	return btrfs_clone_files(dst_file, src_file, off, len, destoff);
 }
 
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 7065426b3280..e8144d0dcde2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -975,8 +975,9 @@ const struct inode_operations cifs_symlink_inode_ops = {
 	.listxattr = cifs_listxattr,
 };
 
-static int cifs_clone_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len)
+static int cifs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, u64 len,
+		unsigned int remap_flags)
 {
 	struct inode *src_inode = file_inode(src_file);
 	struct inode *target_inode = file_inode(dst_file);
@@ -986,6 +987,9 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
 	unsigned int xid;
 	int rc;
 
+	if (remap_flags & ~REMAP_FILE_ADVISORY)
+		return -EINVAL;
+
 	cifs_dbg(FYI, "clone range\n");
 
 	xid = get_xid();
@@ -1134,7 +1138,7 @@ const struct file_operations cifs_file_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1153,7 +1157,7 @@ const struct file_operations cifs_file_strict_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1172,7 +1176,7 @@ const struct file_operations cifs_file_direct_ops = {
 	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1191,7 +1195,7 @@ const struct file_operations cifs_file_nobrl_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1209,7 +1213,7 @@ const struct file_operations cifs_file_strict_nobrl_ops = {
 	.llseek = cifs_llseek,
 	.unlocked_ioctl	= cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
 };
@@ -1227,7 +1231,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
 	.splice_write = iter_file_splice_write,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = cifs_llseek,
 	.setlease = cifs_setlease,
 	.fallocate = cifs_fallocate,
@@ -1239,7 +1243,7 @@ const struct file_operations cifs_dir_ops = {
 	.read    = generic_read_dir,
 	.unlocked_ioctl  = cifs_ioctl,
 	.copy_file_range = cifs_copy_file_range,
-	.clone_file_range = cifs_clone_file_range,
+	.remap_file_range = cifs_remap_file_range,
 	.llseek = generic_file_llseek,
 	.fsync = cifs_dir_fsync,
 };
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4288a6ecaf75..ae5780ce41dc 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -180,8 +180,9 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
 	return nfs42_proc_allocate(filep, offset, len);
 }
 
-static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
-		struct file *dst_file, loff_t dst_off, u64 count)
+static int nfs42_remap_file_range(struct file *src_file, loff_t src_off,
+		struct file *dst_file, loff_t dst_off, u64 count,
+		unsigned int remap_flags)
 {
 	struct inode *dst_inode = file_inode(dst_file);
 	struct nfs_server *server = NFS_SERVER(dst_inode);
@@ -190,6 +191,9 @@ static int nfs42_clone_file_range(struct file *src_file, loff_t src_off,
 	bool same_inode = false;
 	int ret;
 
+	if (remap_flags & ~REMAP_FILE_ADVISORY)
+		return -EINVAL;
+
 	/* check alignment w.r.t. clone_blksize */
 	ret = -EINVAL;
 	if (bs) {
@@ -262,7 +266,7 @@ const struct file_operations nfs4_file_operations = {
 	.copy_file_range = nfs4_copy_file_range,
 	.llseek		= nfs4_file_llseek,
 	.fallocate	= nfs42_fallocate,
-	.clone_file_range = nfs42_clone_file_range,
+	.remap_file_range = nfs42_remap_file_range,
 #else
 	.llseek		= nfs_file_llseek,
 #endif
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9fa35cb6f6e0..0b757a24567c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2527,24 +2527,18 @@ out:
 	return offset;
 }
 
-static int ocfs2_file_clone_range(struct file *file_in,
+static int ocfs2_remap_file_range(struct file *file_in,
 				  loff_t pos_in,
 				  struct file *file_out,
 				  loff_t pos_out,
-				  u64 len)
+				  u64 len,
+				  unsigned int remap_flags)
 {
-	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					 len, false);
-}
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
 
-static int ocfs2_file_dedupe_range(struct file *file_in,
-				   loff_t pos_in,
-				   struct file *file_out,
-				   loff_t pos_out,
-				   u64 len)
-{
 	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					  len, true);
+					 len, remap_flags & REMAP_FILE_DEDUP);
 }
 
 const struct inode_operations ocfs2_file_iops = {
@@ -2586,8 +2580,7 @@ const struct file_operations ocfs2_fops = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= ocfs2_fallocate,
-	.clone_file_range = ocfs2_file_clone_range,
-	.dedupe_file_range = ocfs2_file_dedupe_range,
+	.remap_file_range = ocfs2_remap_file_range,
 };
 
 const struct file_operations ocfs2_dops = {
@@ -2633,8 +2626,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.fallocate	= ocfs2_fallocate,
-	.clone_file_range = ocfs2_file_clone_range,
-	.dedupe_file_range = ocfs2_file_dedupe_range,
+	.remap_file_range = ocfs2_remap_file_range,
 };
 
 const struct file_operations ocfs2_dops_no_plocks = {
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 986313da0c88..fffb36fd5920 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -489,26 +489,31 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
 			    OVL_COPY);
 }
 
-static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out, u64 len)
+static int ovl_remap_file_range(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags)
 {
-	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-			    OVL_CLONE);
-}
+	enum ovl_copyop op;
+
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
+
+	if (remap_flags & REMAP_FILE_DEDUP)
+		op = OVL_DEDUPE;
+	else
+		op = OVL_CLONE;
 
-static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
-				 struct file *file_out, loff_t pos_out, u64 len)
-{
 	/*
 	 * Don't copy up because of a dedupe request, this wouldn't make sense
 	 * most of the time (data would be duplicated instead of deduplicated).
 	 */
-	if (!ovl_inode_upper(file_inode(file_in)) ||
-	    !ovl_inode_upper(file_inode(file_out)))
+	if (op == OVL_DEDUPE &&
+	    (!ovl_inode_upper(file_inode(file_in)) ||
+	     !ovl_inode_upper(file_inode(file_out))))
 		return -EPERM;
 
 	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-			    OVL_DEDUPE);
+			    op);
 }
 
 const struct file_operations ovl_file_operations = {
@@ -525,6 +530,5 @@ const struct file_operations ovl_file_operations = {
 	.compat_ioctl	= ovl_compat_ioctl,
 
 	.copy_file_range	= ovl_copy_file_range,
-	.clone_file_range	= ovl_clone_file_range,
-	.dedupe_file_range	= ovl_dedupe_file_range,
+	.remap_file_range	= ovl_remap_file_range,
 };
diff --git a/fs/read_write.c b/fs/read_write.c
index 734c5661fb69..766bdcb381f3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1588,9 +1588,9 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	 * Try cloning first, this is supported by more file systems, and
 	 * more efficient if both clone and copy are supported (e.g. NFS).
 	 */
-	if (file_in->f_op->clone_file_range) {
-		ret = file_in->f_op->clone_file_range(file_in, pos_in,
-				file_out, pos_out, len);
+	if (file_in->f_op->remap_file_range) {
+		ret = file_in->f_op->remap_file_range(file_in, pos_in,
+				file_out, pos_out, len, 0);
 		if (ret == 0) {
 			ret = len;
 			goto done;
@@ -1849,7 +1849,7 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	    (file_out->f_flags & O_APPEND))
 		return -EBADF;
 
-	if (!file_in->f_op->clone_file_range)
+	if (!file_in->f_op->remap_file_range)
 		return -EOPNOTSUPP;
 
 	ret = remap_verify_area(file_in, pos_in, len, false);
@@ -1860,8 +1860,8 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 	if (ret)
 		return ret;
 
-	ret = file_in->f_op->clone_file_range(file_in, pos_in,
-			file_out, pos_out, len);
+	ret = file_in->f_op->remap_file_range(file_in, pos_in,
+			file_out, pos_out, len, 0);
 	if (!ret) {
 		fsnotify_access(file_in);
 		fsnotify_modify(file_out);
@@ -2006,7 +2006,7 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 
 	ret = -EINVAL;
-	if (!dst_file->f_op->dedupe_file_range)
+	if (!dst_file->f_op->remap_file_range)
 		goto out_drop_write;
 
 	if (len == 0) {
@@ -2014,8 +2014,8 @@ int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 	}
 
-	ret = dst_file->f_op->dedupe_file_range(src_file, src_pos,
-						dst_file, dst_pos, len);
+	ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
+			dst_pos, len, REMAP_FILE_DEDUP);
 out_drop_write:
 	mnt_drop_write_file(dst_file);
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 61a5ad2600e8..2ad94d508f80 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -920,27 +920,19 @@ out_unlock:
 }
 
 STATIC int
-xfs_file_clone_range(
+xfs_file_remap_range(
 	struct file	*file_in,
 	loff_t		pos_in,
 	struct file	*file_out,
 	loff_t		pos_out,
-	u64		len)
+	u64		len,
+	unsigned int	remap_flags)
 {
-	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-				     len, false);
-}
+	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
+		return -EINVAL;
 
-STATIC int
-xfs_file_dedupe_range(
-	struct file	*file_in,
-	loff_t		pos_in,
-	struct file	*file_out,
-	loff_t		pos_out,
-	u64		len)
-{
 	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-				     len, true);
+			len, remap_flags & REMAP_FILE_DEDUP);
 }
 
 STATIC int
@@ -1175,8 +1167,7 @@ const struct file_operations xfs_file_operations = {
 	.fsync		= xfs_file_fsync,
 	.get_unmapped_area = thp_get_unmapped_area,
 	.fallocate	= xfs_file_fallocate,
-	.clone_file_range = xfs_file_clone_range,
-	.dedupe_file_range = xfs_file_dedupe_range,
+	.remap_file_range = xfs_file_remap_range,
 };
 
 const struct file_operations xfs_dir_file_operations = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 55729e1c2e75..888cef35c7d7 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1721,6 +1721,24 @@ struct block_device_operations;
 #define NOMMU_VMFLAGS \
 	(NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC)
 
+/*
+ * These flags control the behavior of the remap_file_range function pointer.
+ * If it is called with len == 0 that means "remap to end of source file".
+ * See Documentation/filesystems/vfs.txt for more details about this call.
+ *
+ * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ */
+#define REMAP_FILE_DEDUP		(1 << 0)
+
+/*
+ * These flags signal that the caller is ok with altering various aspects of
+ * the behavior of the remap operation.  The changes must be made by the
+ * implementation; the vfs remap helper functions can take advantage of them.
+ * Flags in this category exist to preserve the quirky behavior of the hoisted
+ * btrfs clone/dedupe ioctls.
+ * There are no flags yet, but subsequent commits will add some.
+ */
+#define REMAP_FILE_ADVISORY		(0)
 
 struct iov_iter;
 
@@ -1759,10 +1777,9 @@ struct file_operations {
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
 			loff_t, size_t, unsigned int);
-	int (*clone_file_range)(struct file *, loff_t, struct file *, loff_t,
-			u64);
-	int (*dedupe_file_range)(struct file *, loff_t, struct file *, loff_t,
-			u64);
+	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				struct file *file_out, loff_t pos_out,
+				u64 len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 } __randomize_layout;
 
-- 
cgit v1.2.3


From a91ae49bbaf43910edb09e03fedf26b23875bd52 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:28 +1100
Subject: vfs: pass remap flags to generic_remap_file_range_prep

Plumb the remap flags through the filesystem from the vfs function
dispatcher all the way to the prep function to prepare for behavior
changes in subsequent patches.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ocfs2/file.c         |  2 +-
 fs/ocfs2/refcounttree.c |  4 ++--
 fs/ocfs2/refcounttree.h |  2 +-
 fs/read_write.c         | 14 +++++++-------
 fs/xfs/xfs_file.c       |  2 +-
 fs/xfs/xfs_reflink.c    | 21 +++++++++++----------
 fs/xfs/xfs_reflink.h    |  3 ++-
 include/linux/fs.h      |  2 +-
 8 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0b757a24567c..9809b0e5746f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2538,7 +2538,7 @@ static int ocfs2_remap_file_range(struct file *file_in,
 		return -EINVAL;
 
 	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					 len, remap_flags & REMAP_FILE_DEDUP);
+					 len, remap_flags);
 }
 
 const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 36c56dfbe485..df9781567ec0 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4825,7 +4825,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 			      struct file *file_out,
 			      loff_t pos_out,
 			      u64 len,
-			      bool is_dedupe)
+			      unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -4851,7 +4851,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 		goto out_unlock;
 
 	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
-			&len, is_dedupe);
+			&len, remap_flags);
 	if (ret <= 0)
 		goto out_unlock;
 
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 4af55bf4b35b..d2c5f526edff 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -120,6 +120,6 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 			      struct file *file_out,
 			      loff_t pos_out,
 			      u64 len,
-			      bool is_dedupe);
+			      unsigned int remap_flags);
 
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/read_write.c b/fs/read_write.c
index 766bdcb381f3..201381689284 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1722,14 +1722,14 @@ static int generic_remap_check_len(struct inode *inode_in,
 				   struct inode *inode_out,
 				   loff_t pos_out,
 				   u64 *len,
-				   bool is_dedupe)
+				   unsigned int remap_flags)
 {
 	u64 blkmask = i_blocksize(inode_in) - 1;
 
 	if ((*len & blkmask) == 0)
 		return 0;
 
-	if (is_dedupe)
+	if (remap_flags & REMAP_FILE_DEDUP)
 		*len &= ~blkmask;
 	else if (pos_out + *len < i_size_read(inode_out))
 		return -EINVAL;
@@ -1747,7 +1747,7 @@ static int generic_remap_check_len(struct inode *inode_in,
  */
 int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 				  struct file *file_out, loff_t pos_out,
-				  u64 *len, bool is_dedupe)
+				  u64 *len, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -1771,7 +1771,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	if (*len == 0) {
 		loff_t isize = i_size_read(inode_in);
 
-		if (is_dedupe || pos_in == isize)
+		if ((remap_flags & REMAP_FILE_DEDUP) || pos_in == isize)
 			return 0;
 		if (pos_in > isize)
 			return -EINVAL;
@@ -1782,7 +1782,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 
 	/* Check that we don't violate system file offset limits. */
 	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
-			is_dedupe);
+			(remap_flags & REMAP_FILE_DEDUP));
 	if (ret)
 		return ret;
 
@@ -1804,7 +1804,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	/*
 	 * Check that the extents are the same.
 	 */
-	if (is_dedupe) {
+	if (remap_flags & REMAP_FILE_DEDUP) {
 		bool		is_same = false;
 
 		ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
@@ -1816,7 +1816,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 	}
 
 	ret = generic_remap_check_len(inode_in, inode_out, pos_out, len,
-			is_dedupe);
+			remap_flags);
 	if (ret)
 		return ret;
 
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 2ad94d508f80..20314eb4677a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -932,7 +932,7 @@ xfs_file_remap_range(
 		return -EINVAL;
 
 	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-			len, remap_flags & REMAP_FILE_DEDUP);
+			len, remap_flags);
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index a7757a128a78..29aab196ce7e 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -921,13 +921,14 @@ xfs_reflink_update_dest(
 	struct xfs_inode	*dest,
 	xfs_off_t		newlen,
 	xfs_extlen_t		cowextsize,
-	bool			is_dedupe)
+	unsigned int		remap_flags)
 {
 	struct xfs_mount	*mp = dest->i_mount;
 	struct xfs_trans	*tp;
 	int			error;
 
-	if (is_dedupe && newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
+	if ((remap_flags & REMAP_FILE_DEDUP) &&
+	    newlen <= i_size_read(VFS_I(dest)) && cowextsize == 0)
 		return 0;
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
@@ -948,7 +949,7 @@ xfs_reflink_update_dest(
 		dest->i_d.di_flags2 |= XFS_DIFLAG2_COWEXTSIZE;
 	}
 
-	if (!is_dedupe) {
+	if (!(remap_flags & REMAP_FILE_DEDUP)) {
 		xfs_trans_ichgtime(tp, dest,
 				   XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 	}
@@ -1296,7 +1297,7 @@ xfs_reflink_remap_prep(
 	struct file		*file_out,
 	loff_t			pos_out,
 	u64			*len,
-	bool			is_dedupe)
+	unsigned int		remap_flags)
 {
 	struct inode		*inode_in = file_inode(file_in);
 	struct xfs_inode	*src = XFS_I(inode_in);
@@ -1327,7 +1328,7 @@ xfs_reflink_remap_prep(
 		goto out_unlock;
 
 	ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
-			len, is_dedupe);
+			len, remap_flags);
 	if (ret <= 0)
 		goto out_unlock;
 
@@ -1336,7 +1337,7 @@ xfs_reflink_remap_prep(
 	 * from the source file so we don't try to dedupe the partial
 	 * EOF block.
 	 */
-	if (is_dedupe) {
+	if (remap_flags & REMAP_FILE_DEDUP) {
 		*len &= ~blkmask;
 	} else if (*len & blkmask) {
 		/*
@@ -1372,7 +1373,7 @@ xfs_reflink_remap_prep(
 				   PAGE_ALIGN(pos_out + *len) - 1);
 
 	/* If we're altering the file contents... */
-	if (!is_dedupe) {
+	if (!(remap_flags & REMAP_FILE_DEDUP)) {
 		/*
 		 * ...update the timestamps (which will grab the ilock again
 		 * from xfs_fs_dirty_inode, so we have to call it before we
@@ -1410,7 +1411,7 @@ xfs_reflink_remap_range(
 	struct file		*file_out,
 	loff_t			pos_out,
 	u64			len,
-	bool			is_dedupe)
+	unsigned int		remap_flags)
 {
 	struct inode		*inode_in = file_inode(file_in);
 	struct xfs_inode	*src = XFS_I(inode_in);
@@ -1430,7 +1431,7 @@ xfs_reflink_remap_range(
 
 	/* Prepare and then clone file data. */
 	ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out,
-			&len, is_dedupe);
+			&len, remap_flags);
 	if (ret <= 0)
 		return ret;
 
@@ -1457,7 +1458,7 @@ xfs_reflink_remap_range(
 		cowextsize = src->i_d.di_cowextsize;
 
 	ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
-			is_dedupe);
+			remap_flags);
 
 out_unlock:
 	xfs_reflink_remap_unlock(file_in, file_out);
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index c585ad9552b2..6f82d628bf17 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -28,7 +28,8 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
 extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
+		struct file *file_out, loff_t pos_out, u64 len,
+		unsigned int remap_flags);
 extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
 		struct xfs_inode *ip, bool *has_shared);
 extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 888cef35c7d7..631c28ce1436 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1844,7 +1844,7 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
 extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 					 struct file *file_out, loff_t pos_out,
-					 u64 *count, bool is_dedupe);
+					 u64 *count, unsigned int remap_flags);
 extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
 			       struct file *file_out, loff_t pos_out, u64 len);
 extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-- 
cgit v1.2.3


From 3d28193e1df043764deb7abdaba5e3a6660bc393 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:34 +1100
Subject: vfs: pass remap flags to generic_remap_checks

Pass the same remap flags to generic_remap_checks for consistency.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c    | 2 +-
 include/linux/fs.h | 2 +-
 mm/filemap.c       | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/read_write.c b/fs/read_write.c
index 201381689284..ebcbfc4f2907 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1782,7 +1782,7 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 
 	/* Check that we don't violate system file offset limits. */
 	ret = generic_remap_checks(file_in, pos_in, file_out, pos_out, len,
-			(remap_flags & REMAP_FILE_DEDUP));
+			remap_flags);
 	if (ret)
 		return ret;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 631c28ce1436..c5435ca81132 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2986,7 +2986,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
 				struct file *file_out, loff_t pos_out,
-				uint64_t *count, bool is_dedupe);
+				uint64_t *count, unsigned int remap_flags);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 84b7301e41a0..410dc58f7b16 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2994,7 +2994,7 @@ EXPORT_SYMBOL(generic_write_checks);
  */
 int generic_remap_checks(struct file *file_in, loff_t pos_in,
 			 struct file *file_out, loff_t pos_out,
-			 uint64_t *req_count, bool is_dedupe)
+			 uint64_t *req_count, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_in->f_mapping->host;
 	struct inode *inode_out = file_out->f_mapping->host;
@@ -3016,7 +3016,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	size_out = i_size_read(inode_out);
 
 	/* Dedupe requires both ranges to be within EOF. */
-	if (is_dedupe &&
+	if ((remap_flags & REMAP_FILE_DEDUP) &&
 	    (pos_in >= size_in || pos_in + count > size_in ||
 	     pos_out >= size_out || pos_out + count > size_out))
 		return -EINVAL;
-- 
cgit v1.2.3


From 42ec3d4c02187a18e27ff94b409ec27234bf2ffd Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:49 +1100
Subject: vfs: make remap_file_range functions take and return bytes completed

Change the remap_file_range functions to take a number of bytes to
operate upon and return the number of bytes they operated on.  This is a
requirement for allowing fs implementations to return short clone/dedupe
results to the user, which will enable us to obey resource limits in a
graceful manner.

A subsequent patch will enable copy_file_range to signal to the
->clone_file_range implementation that it can handle a short length,
which will be returned in the function's return value.  For now the
short return is not implemented anywhere so the behavior won't change --
either copy_file_range manages to clone the entire range or it tries an
alternative.

Neither clone ioctl can take advantage of this, alas.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 Documentation/filesystems/vfs.txt | 10 ++++----
 fs/btrfs/ctree.h                  |  6 ++---
 fs/btrfs/ioctl.c                  | 13 +++++++----
 fs/cifs/cifsfs.c                  |  6 ++---
 fs/ioctl.c                        | 10 +++++++-
 fs/nfs/nfs4file.c                 |  6 ++---
 fs/nfsd/vfs.c                     |  8 +++++--
 fs/ocfs2/file.c                   | 16 ++++++-------
 fs/ocfs2/refcounttree.c           |  2 +-
 fs/ocfs2/refcounttree.h           |  2 +-
 fs/overlayfs/copy_up.c            |  6 ++---
 fs/overlayfs/file.c               | 12 +++++-----
 fs/read_write.c                   | 49 +++++++++++++++++++++------------------
 fs/xfs/xfs_file.c                 |  9 ++++---
 fs/xfs/xfs_reflink.c              |  4 ++--
 fs/xfs/xfs_reflink.h              |  2 +-
 include/linux/fs.h                | 27 +++++++++++----------
 mm/filemap.c                      |  2 +-
 18 files changed, 108 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 6f5babfee27b..1bd2919deaca 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -883,9 +883,9 @@ struct file_operations {
 	unsigned (*mmap_capabilities)(struct file *);
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
-	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out,
-				u64 len, unsigned int remap_flags);
+	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 };
 
@@ -966,8 +966,8 @@ otherwise noted.
 	implementation should remap len bytes at pos_in of the source file into
 	the dest file at pos_out.  Implementations must handle callers passing
 	in len == 0; this means "remap to the end of the source file".  The
-	return value should be zero if all bytes were remapped, or the usual
-	negative error code if the remapping did not succeed completely.
+	return value should the number of bytes remapped, or the usual
+	negative error code if errors occurred before any bytes were remapped.
 	The remap_flags parameter accepts REMAP_FILE_* flags.  If
 	REMAP_FILE_DEDUP is set then the implementation must only remap if the
 	requested file ranges have identical contents.
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 124a05662fc2..771a961d77ad 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3247,9 +3247,9 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
 		      struct extent_state **cached);
 int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
-int btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
-			   struct file *file_out, loff_t pos_out, u64 len,
-			   unsigned int remap_flags);
+loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
+			      struct file *file_out, loff_t pos_out,
+			      loff_t len, unsigned int remap_flags);
 
 /* tree-defrag.c */
 int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index bfd99c66723e..b0c513e10977 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4328,10 +4328,12 @@ out_unlock:
 	return ret;
 }
 
-int btrfs_remap_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len,
+loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, loff_t len,
 		unsigned int remap_flags)
 {
+	int ret;
+
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
@@ -4349,10 +4351,11 @@ int btrfs_remap_file_range(struct file *src_file, loff_t off,
 			return -EINVAL;
 		}
 
-		return btrfs_extent_same(src, off, len, dst, destoff);
+		ret = btrfs_extent_same(src, off, len, dst, destoff);
+	} else {
+		ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
 	}
-
-	return btrfs_clone_files(dst_file, src_file, off, len, destoff);
+	return ret < 0 ? ret : len;
 }
 
 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index e8144d0dcde2..5ca71c6c8be2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -975,8 +975,8 @@ const struct inode_operations cifs_symlink_inode_ops = {
 	.listxattr = cifs_listxattr,
 };
 
-static int cifs_remap_file_range(struct file *src_file, loff_t off,
-		struct file *dst_file, loff_t destoff, u64 len,
+static loff_t cifs_remap_file_range(struct file *src_file, loff_t off,
+		struct file *dst_file, loff_t destoff, loff_t len,
 		unsigned int remap_flags)
 {
 	struct inode *src_inode = file_inode(src_file);
@@ -1029,7 +1029,7 @@ static int cifs_remap_file_range(struct file *src_file, loff_t off,
 	unlock_two_nondirectories(src_inode, target_inode);
 out:
 	free_xid(xid);
-	return rc;
+	return rc < 0 ? rc : len;
 }
 
 ssize_t cifs_file_copychunk_range(unsigned int xid,
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 2005529af560..72537b68c272 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -223,6 +223,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 			     u64 off, u64 olen, u64 destoff)
 {
 	struct fd src_file = fdget(srcfd);
+	loff_t cloned;
 	int ret;
 
 	if (!src_file.file)
@@ -230,7 +231,14 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 	ret = -EXDEV;
 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
 		goto fdput;
-	ret = vfs_clone_file_range(src_file.file, off, dst_file, destoff, olen);
+	cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
+				      olen);
+	if (cloned < 0)
+		ret = cloned;
+	else if (olen && cloned != olen)
+		ret = -EINVAL;
+	else
+		ret = 0;
 fdput:
 	fdput(src_file);
 	return ret;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index ae5780ce41dc..46d691ba04bc 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -180,8 +180,8 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
 	return nfs42_proc_allocate(filep, offset, len);
 }
 
-static int nfs42_remap_file_range(struct file *src_file, loff_t src_off,
-		struct file *dst_file, loff_t dst_off, u64 count,
+static loff_t nfs42_remap_file_range(struct file *src_file, loff_t src_off,
+		struct file *dst_file, loff_t dst_off, loff_t count,
 		unsigned int remap_flags)
 {
 	struct inode *dst_inode = file_inode(dst_file);
@@ -244,7 +244,7 @@ out_unlock:
 		inode_unlock(src_inode);
 	}
 out:
-	return ret;
+	return ret < 0 ? ret : count;
 }
 #endif /* CONFIG_NFS_V4_2 */
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b53e76391e52..ac6cb6101cbe 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -541,8 +541,12 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
 __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
 		u64 dst_pos, u64 count)
 {
-	return nfserrno(vfs_clone_file_range(src, src_pos, dst, dst_pos,
-					     count));
+	loff_t cloned;
+
+	cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count);
+	if (count && cloned != count)
+		cloned = -EINVAL;
+	return nfserrno(cloned < 0 ? cloned : 0);
 }
 
 ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9809b0e5746f..fbaeafe44b5f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2527,18 +2527,18 @@ out:
 	return offset;
 }
 
-static int ocfs2_remap_file_range(struct file *file_in,
-				  loff_t pos_in,
-				  struct file *file_out,
-				  loff_t pos_out,
-				  u64 len,
-				  unsigned int remap_flags)
+static loff_t ocfs2_remap_file_range(struct file *file_in, loff_t pos_in,
+				     struct file *file_out, loff_t pos_out,
+				     loff_t len, unsigned int remap_flags)
 {
+	int ret;
+
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
-	return ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
-					 len, remap_flags);
+	ret = ocfs2_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+					len, remap_flags);
+	return ret < 0 ? ret : len;
 }
 
 const struct inode_operations ocfs2_file_iops = {
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index df9781567ec0..6a42c04ac0ab 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4824,7 +4824,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 			      loff_t pos_in,
 			      struct file *file_out,
 			      loff_t pos_out,
-			      u64 len,
+			      loff_t len,
 			      unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index d2c5f526edff..eb65c1d0843c 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -119,7 +119,7 @@ int ocfs2_reflink_remap_range(struct file *file_in,
 			      loff_t pos_in,
 			      struct file *file_out,
 			      loff_t pos_out,
-			      u64 len,
+			      loff_t len,
 			      unsigned int remap_flags);
 
 #endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 1cc797a08a5b..8750b7235516 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -125,6 +125,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	struct file *new_file;
 	loff_t old_pos = 0;
 	loff_t new_pos = 0;
+	loff_t cloned;
 	int error = 0;
 
 	if (len == 0)
@@ -141,11 +142,10 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	}
 
 	/* Try to use clone_file_range to clone up within the same fs */
-	error = do_clone_file_range(old_file, 0, new_file, 0, len);
-	if (!error)
+	cloned = do_clone_file_range(old_file, 0, new_file, 0, len);
+	if (cloned == len)
 		goto out;
 	/* Couldn't clone, so now we try to copy the data */
-	error = 0;
 
 	/* FIXME: copy up sparse files efficiently */
 	while (len) {
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fffb36fd5920..6c3fec6168e9 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -434,14 +434,14 @@ enum ovl_copyop {
 	OVL_DEDUPE,
 };
 
-static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
+static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 			    struct file *file_out, loff_t pos_out,
-			    u64 len, unsigned int flags, enum ovl_copyop op)
+			    loff_t len, unsigned int flags, enum ovl_copyop op)
 {
 	struct inode *inode_out = file_inode(file_out);
 	struct fd real_in, real_out;
 	const struct cred *old_cred;
-	ssize_t ret;
+	loff_t ret;
 
 	ret = ovl_real_fdget(file_out, &real_out);
 	if (ret)
@@ -489,9 +489,9 @@ static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
 			    OVL_COPY);
 }
 
-static int ovl_remap_file_range(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out,
-				u64 len, unsigned int remap_flags)
+static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len, unsigned int remap_flags)
 {
 	enum ovl_copyop op;
 
diff --git a/fs/read_write.c b/fs/read_write.c
index b61bd3fc7154..356641afa487 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1589,10 +1589,13 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 	 * more efficient if both clone and copy are supported (e.g. NFS).
 	 */
 	if (file_in->f_op->remap_file_range) {
-		ret = file_in->f_op->remap_file_range(file_in, pos_in,
-				file_out, pos_out, len, 0);
-		if (ret == 0) {
-			ret = len;
+		loff_t cloned;
+
+		cloned = file_in->f_op->remap_file_range(file_in, pos_in,
+				file_out, pos_out,
+				min_t(loff_t, MAX_RW_COUNT, len), 0);
+		if (cloned > 0) {
+			ret = cloned;
 			goto done;
 		}
 	}
@@ -1686,11 +1689,12 @@ out2:
 	return ret;
 }
 
-static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write)
+static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
+			     bool write)
 {
 	struct inode *inode = file_inode(file);
 
-	if (unlikely(pos < 0))
+	if (unlikely(pos < 0 || len < 0))
 		return -EINVAL;
 
 	 if (unlikely((loff_t) (pos + len) < 0))
@@ -1721,7 +1725,7 @@ static int remap_verify_area(struct file *file, loff_t pos, u64 len, bool write)
 static int generic_remap_check_len(struct inode *inode_in,
 				   struct inode *inode_out,
 				   loff_t pos_out,
-				   u64 *len,
+				   loff_t *len,
 				   unsigned int remap_flags)
 {
 	u64 blkmask = i_blocksize(inode_in) - 1;
@@ -1747,7 +1751,7 @@ static int generic_remap_check_len(struct inode *inode_in,
  */
 int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 				  struct file *file_out, loff_t pos_out,
-				  u64 *len, unsigned int remap_flags)
+				  loff_t *len, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
@@ -1843,12 +1847,12 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 }
 EXPORT_SYMBOL(generic_remap_file_range_prep);
 
-int do_clone_file_range(struct file *file_in, loff_t pos_in,
-			struct file *file_out, loff_t pos_out, u64 len)
+loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+			   struct file *file_out, loff_t pos_out, loff_t len)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
-	int ret;
+	loff_t ret;
 
 	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
 		return -EISDIR;
@@ -1881,19 +1885,19 @@ int do_clone_file_range(struct file *file_in, loff_t pos_in,
 
 	ret = file_in->f_op->remap_file_range(file_in, pos_in,
 			file_out, pos_out, len, 0);
-	if (!ret) {
-		fsnotify_access(file_in);
-		fsnotify_modify(file_out);
-	}
+	if (ret < 0)
+		return ret;
 
+	fsnotify_access(file_in);
+	fsnotify_modify(file_out);
 	return ret;
 }
 EXPORT_SYMBOL(do_clone_file_range);
 
-int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-			 struct file *file_out, loff_t pos_out, u64 len)
+loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+			    struct file *file_out, loff_t pos_out, loff_t len)
 {
-	int ret;
+	loff_t ret;
 
 	file_start_write(file_out);
 	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
@@ -1999,10 +2003,11 @@ out_error:
 }
 EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
 
-int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-			      struct file *dst_file, loff_t dst_pos, u64 len)
+loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+				 struct file *dst_file, loff_t dst_pos,
+				 loff_t len)
 {
-	s64 ret;
+	loff_t ret;
 
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
@@ -2051,7 +2056,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 	int i;
 	int ret;
 	u16 count = same->dest_count;
-	int deduped;
+	loff_t deduped;
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EINVAL;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 20314eb4677a..38fde4e11714 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -919,20 +919,23 @@ out_unlock:
 	return error;
 }
 
-STATIC int
+STATIC loff_t
 xfs_file_remap_range(
 	struct file	*file_in,
 	loff_t		pos_in,
 	struct file	*file_out,
 	loff_t		pos_out,
-	u64		len,
+	loff_t		len,
 	unsigned int	remap_flags)
 {
+	int		ret;
+
 	if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
 		return -EINVAL;
 
-	return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
+	ret = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
 			len, remap_flags);
+	return ret < 0 ? ret : len;
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 2d7dd8b28d7c..3dbe5fb7e9c0 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1296,7 +1296,7 @@ xfs_reflink_remap_prep(
 	loff_t			pos_in,
 	struct file		*file_out,
 	loff_t			pos_out,
-	u64			*len,
+	loff_t			*len,
 	unsigned int		remap_flags)
 {
 	struct inode		*inode_in = file_inode(file_in);
@@ -1387,7 +1387,7 @@ xfs_reflink_remap_range(
 	loff_t			pos_in,
 	struct file		*file_out,
 	loff_t			pos_out,
-	u64			len,
+	loff_t			len,
 	unsigned int		remap_flags)
 {
 	struct inode		*inode_in = file_inode(file_in);
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 6f82d628bf17..c3c46c276fe1 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -28,7 +28,7 @@ extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
 		xfs_off_t count);
 extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
 extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
-		struct file *file_out, loff_t pos_out, u64 len,
+		struct file *file_out, loff_t pos_out, loff_t len,
 		unsigned int remap_flags);
 extern int xfs_reflink_inode_has_shared_extents(struct xfs_trans *tp,
 		struct xfs_inode *ip, bool *has_shared);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c5435ca81132..c72d8c3c065a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1777,9 +1777,9 @@ struct file_operations {
 #endif
 	ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
 			loff_t, size_t, unsigned int);
-	int (*remap_file_range)(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out,
-				u64 len, unsigned int remap_flags);
+	loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len, unsigned int remap_flags);
 	int (*fadvise)(struct file *, loff_t, loff_t, int);
 } __randomize_layout;
 
@@ -1844,19 +1844,22 @@ extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
 extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 					 struct file *file_out, loff_t pos_out,
-					 u64 *count, unsigned int remap_flags);
-extern int do_clone_file_range(struct file *file_in, loff_t pos_in,
-			       struct file *file_out, loff_t pos_out, u64 len);
-extern int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-				struct file *file_out, loff_t pos_out, u64 len);
+					 loff_t *count,
+					 unsigned int remap_flags);
+extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
+				  struct file *file_out, loff_t pos_out,
+				  loff_t len);
+extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
+				   struct file *file_out, loff_t pos_out,
+				   loff_t len);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
 					 loff_t len, bool *is_same);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
-extern int vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-				     struct file *dst_file, loff_t dst_pos,
-				     u64 len);
+extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
+					struct file *dst_file, loff_t dst_pos,
+					loff_t len);
 
 
 struct super_operations {
@@ -2986,7 +2989,7 @@ extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 extern int generic_remap_checks(struct file *file_in, loff_t pos_in,
 				struct file *file_out, loff_t pos_out,
-				uint64_t *count, unsigned int remap_flags);
+				loff_t *count, unsigned int remap_flags);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/mm/filemap.c b/mm/filemap.c
index 410dc58f7b16..e9091d731f84 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2994,7 +2994,7 @@ EXPORT_SYMBOL(generic_write_checks);
  */
 int generic_remap_checks(struct file *file_in, loff_t pos_in,
 			 struct file *file_out, loff_t pos_out,
-			 uint64_t *req_count, unsigned int remap_flags)
+			 loff_t *req_count, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_in->f_mapping->host;
 	struct inode *inode_out = file_out->f_mapping->host;
-- 
cgit v1.2.3


From 452ce65951a2f0719e4e119ecca134c06cfe22ee Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:41:56 +1100
Subject: vfs: plumb remap flags through the vfs clone functions

Plumb a remap_flags argument through the {do,vfs}_clone_file_range
functions so that clone can take advantage of it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/ioctl.c             |  2 +-
 fs/nfsd/vfs.c          |  2 +-
 fs/overlayfs/copy_up.c |  2 +-
 fs/overlayfs/file.c    |  6 +++---
 fs/read_write.c        | 13 +++++++++----
 include/linux/fs.h     |  4 ++--
 6 files changed, 17 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ioctl.c b/fs/ioctl.c
index 72537b68c272..505275ec5596 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -232,7 +232,7 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
 		goto fdput;
 	cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
-				      olen);
+				      olen, 0);
 	if (cloned < 0)
 		ret = cloned;
 	else if (olen && cloned != olen)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index ac6cb6101cbe..726fc5b2b27a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -543,7 +543,7 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
 {
 	loff_t cloned;
 
-	cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count);
+	cloned = vfs_clone_file_range(src, src_pos, dst, dst_pos, count, 0);
 	if (count && cloned != count)
 		cloned = -EINVAL;
 	return nfserrno(cloned < 0 ? cloned : 0);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 8750b7235516..5f82fece64a0 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -142,7 +142,7 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
 	}
 
 	/* Try to use clone_file_range to clone up within the same fs */
-	cloned = do_clone_file_range(old_file, 0, new_file, 0, len);
+	cloned = do_clone_file_range(old_file, 0, new_file, 0, len, 0);
 	if (cloned == len)
 		goto out;
 	/* Couldn't clone, so now we try to copy the data */
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 6c3fec6168e9..0393815c8971 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -462,7 +462,7 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 
 	case OVL_CLONE:
 		ret = vfs_clone_file_range(real_in.file, pos_in,
-					   real_out.file, pos_out, len);
+					   real_out.file, pos_out, len, flags);
 		break;
 
 	case OVL_DEDUPE:
@@ -512,8 +512,8 @@ static loff_t ovl_remap_file_range(struct file *file_in, loff_t pos_in,
 	     !ovl_inode_upper(file_inode(file_out))))
 		return -EPERM;
 
-	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
-			    op);
+	return ovl_copyfile(file_in, pos_in, file_out, pos_out, len,
+			    remap_flags, op);
 }
 
 const struct file_operations ovl_file_operations = {
diff --git a/fs/read_write.c b/fs/read_write.c
index 356641afa487..0d1ac1b9bc22 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1848,12 +1848,15 @@ int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 EXPORT_SYMBOL(generic_remap_file_range_prep);
 
 loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
-			   struct file *file_out, loff_t pos_out, loff_t len)
+			   struct file *file_out, loff_t pos_out,
+			   loff_t len, unsigned int remap_flags)
 {
 	struct inode *inode_in = file_inode(file_in);
 	struct inode *inode_out = file_inode(file_out);
 	loff_t ret;
 
+	WARN_ON_ONCE(remap_flags);
+
 	if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
 		return -EISDIR;
 	if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
@@ -1884,7 +1887,7 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 		return ret;
 
 	ret = file_in->f_op->remap_file_range(file_in, pos_in,
-			file_out, pos_out, len, 0);
+			file_out, pos_out, len, remap_flags);
 	if (ret < 0)
 		return ret;
 
@@ -1895,12 +1898,14 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 EXPORT_SYMBOL(do_clone_file_range);
 
 loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
-			    struct file *file_out, loff_t pos_out, loff_t len)
+			    struct file *file_out, loff_t pos_out,
+			    loff_t len, unsigned int remap_flags)
 {
 	loff_t ret;
 
 	file_start_write(file_out);
-	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len);
+	ret = do_clone_file_range(file_in, pos_in, file_out, pos_out, len,
+				  remap_flags);
 	file_end_write(file_out);
 
 	return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c72d8c3c065a..1c5e55d2a67d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1848,10 +1848,10 @@ extern int generic_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 					 unsigned int remap_flags);
 extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 				  struct file *file_out, loff_t pos_out,
-				  loff_t len);
+				  loff_t len, unsigned int remap_flags);
 extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 				   struct file *file_out, loff_t pos_out,
-				   loff_t len);
+				   loff_t len, unsigned int remap_flags);
 extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 					 struct inode *dest, loff_t destoff,
 					 loff_t len, bool *is_same);
-- 
cgit v1.2.3


From df3658361951e17364f1e1c3fa92862a990ad8bd Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:42:03 +1100
Subject: vfs: plumb remap flags through the vfs dedupe functions

Plumb a remap_flags argument through the vfs_dedupe_file_range_one
functions so that dedupe can take advantage of it.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/overlayfs/file.c | 3 ++-
 fs/read_write.c     | 9 ++++++---
 include/linux/fs.h  | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 0393815c8971..84dd957efa24 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -467,7 +467,8 @@ static loff_t ovl_copyfile(struct file *file_in, loff_t pos_in,
 
 	case OVL_DEDUPE:
 		ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
-						real_out.file, pos_out, len);
+						real_out.file, pos_out, len,
+						flags);
 		break;
 	}
 	revert_creds(old_cred);
diff --git a/fs/read_write.c b/fs/read_write.c
index 0d1ac1b9bc22..ea30666013b0 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -2010,10 +2010,12 @@ EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
 
 loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 				 struct file *dst_file, loff_t dst_pos,
-				 loff_t len)
+				 loff_t len, unsigned int remap_flags)
 {
 	loff_t ret;
 
+	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP));
+
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
 		return ret;
@@ -2044,7 +2046,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 	}
 
 	ret = dst_file->f_op->remap_file_range(src_file, src_pos, dst_file,
-			dst_pos, len, REMAP_FILE_DEDUP);
+			dst_pos, len, remap_flags | REMAP_FILE_DEDUP);
 out_drop_write:
 	mnt_drop_write_file(dst_file);
 
@@ -2112,7 +2114,8 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 		}
 
 		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
-						    info->dest_offset, len);
+						    info->dest_offset, len,
+						    0);
 		if (deduped == -EBADE)
 			info->status = FILE_DEDUPE_RANGE_DIFFERS;
 		else if (deduped < 0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1c5e55d2a67d..544ab5083b48 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1859,7 +1859,7 @@ extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 					struct file *dst_file, loff_t dst_pos,
-					loff_t len);
+					loff_t len, unsigned int remap_flags);
 
 
 struct super_operations {
-- 
cgit v1.2.3


From eca3654e3cc7d93e9734d0fa96cfb15c7f356244 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:42:10 +1100
Subject: vfs: enable remap callers that can handle short operations

Plumb in a remap flag that enables the filesystem remap handler to
shorten remapping requests for callers that can handle it.  Now
copy_file_range can report partial success (in case we run up against
alignment problems, resource limits, etc.).

We also enable CAN_SHORTEN for fideduperange to maintain existing
userspace-visible behavior where xfs/btrfs shorten the dedupe range to
avoid stale post-eof data exposure.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 Documentation/filesystems/vfs.txt |  4 +++-
 fs/read_write.c                   | 28 ++++++++++++++++++++--------
 include/linux/fs.h                |  5 +++--
 mm/filemap.c                      | 11 +++++++----
 4 files changed, 33 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1bd2919deaca..5f71a252e2e0 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -970,7 +970,9 @@ otherwise noted.
 	negative error code if errors occurred before any bytes were remapped.
 	The remap_flags parameter accepts REMAP_FILE_* flags.  If
 	REMAP_FILE_DEDUP is set then the implementation must only remap if the
-	requested file ranges have identical contents.
+	requested file ranges have identical contents.  If REMAP_CAN_SHORTEN is
+	set, the caller is ok with the implementation shortening the request
+	length to satisfy alignment or EOF requirements (or any other reason).
 
   fadvise: possibly called by the fadvise64() system call.
 
diff --git a/fs/read_write.c b/fs/read_write.c
index ea30666013b0..c0bcc1a20650 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1593,7 +1593,8 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
 
 		cloned = file_in->f_op->remap_file_range(file_in, pos_in,
 				file_out, pos_out,
-				min_t(loff_t, MAX_RW_COUNT, len), 0);
+				min_t(loff_t, MAX_RW_COUNT, len),
+				REMAP_FILE_CAN_SHORTEN);
 		if (cloned > 0) {
 			ret = cloned;
 			goto done;
@@ -1721,6 +1722,8 @@ static int remap_verify_area(struct file *file, loff_t pos, loff_t len,
  * can't meaningfully compare post-EOF contents.
  *
  * For clone we only link a partial EOF block above the destination file's EOF.
+ *
+ * Shorten the request if possible.
  */
 static int generic_remap_check_len(struct inode *inode_in,
 				   struct inode *inode_out,
@@ -1729,16 +1732,24 @@ static int generic_remap_check_len(struct inode *inode_in,
 				   unsigned int remap_flags)
 {
 	u64 blkmask = i_blocksize(inode_in) - 1;
+	loff_t new_len = *len;
 
 	if ((*len & blkmask) == 0)
 		return 0;
 
-	if (remap_flags & REMAP_FILE_DEDUP)
-		*len &= ~blkmask;
-	else if (pos_out + *len < i_size_read(inode_out))
-		return -EINVAL;
+	if ((remap_flags & REMAP_FILE_DEDUP) ||
+	    pos_out + *len < i_size_read(inode_out))
+		new_len &= ~blkmask;
 
-	return 0;
+	if (new_len == *len)
+		return 0;
+
+	if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+		*len = new_len;
+		return 0;
+	}
+
+	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
 }
 
 /*
@@ -2014,7 +2025,8 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 {
 	loff_t ret;
 
-	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP));
+	WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+				     REMAP_FILE_CAN_SHORTEN));
 
 	ret = mnt_want_write_file(dst_file);
 	if (ret)
@@ -2115,7 +2127,7 @@ int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
 
 		deduped = vfs_dedupe_file_range_one(file, off, dst_file,
 						    info->dest_offset, len,
-						    0);
+						    REMAP_FILE_CAN_SHORTEN);
 		if (deduped == -EBADE)
 			info->status = FILE_DEDUPE_RANGE_DIFFERS;
 		else if (deduped < 0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 544ab5083b48..34c22d695011 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1727,8 +1727,10 @@ struct block_device_operations;
  * See Documentation/filesystems/vfs.txt for more details about this call.
  *
  * REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
  */
 #define REMAP_FILE_DEDUP		(1 << 0)
+#define REMAP_FILE_CAN_SHORTEN		(1 << 1)
 
 /*
  * These flags signal that the caller is ok with altering various aspects of
@@ -1736,9 +1738,8 @@ struct block_device_operations;
  * implementation; the vfs remap helper functions can take advantage of them.
  * Flags in this category exist to preserve the quirky behavior of the hoisted
  * btrfs clone/dedupe ioctls.
- * There are no flags yet, but subsequent commits will add some.
  */
-#define REMAP_FILE_ADVISORY		(0)
+#define REMAP_FILE_ADVISORY		(REMAP_FILE_CAN_SHORTEN)
 
 struct iov_iter;
 
diff --git a/mm/filemap.c b/mm/filemap.c
index e9091d731f84..1775d4ad3317 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3045,8 +3045,7 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 		bcount = ALIGN(size_in, bs) - pos_in;
 	} else {
 		if (!IS_ALIGNED(count, bs))
-			return -EINVAL;
-
+			count = ALIGN_DOWN(count, bs);
 		bcount = count;
 	}
 
@@ -3056,10 +3055,14 @@ int generic_remap_checks(struct file *file_in, loff_t pos_in,
 	    pos_out < pos_in + bcount)
 		return -EINVAL;
 
-	/* For now we don't support changing the length. */
-	if (*req_count != count)
+	/*
+	 * We shortened the request but the caller can't deal with that, so
+	 * bounce the request back to userspace.
+	 */
+	if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
 		return -EINVAL;
 
+	*req_count = count;
 	return 0;
 }
 
-- 
cgit v1.2.3


From c32e5f39953fa6bbff35c655bdcb7b3128f1e79f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Tue, 30 Oct 2018 10:42:17 +1100
Subject: vfs: hide file range comparison function

There are no callers of vfs_dedupe_file_range_compare, so we might as
well make it a static helper and remove the export.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/read_write.c    | 187 ++++++++++++++++++++++++++---------------------------
 include/linux/fs.h |   3 -
 2 files changed, 91 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/fs/read_write.c b/fs/read_write.c
index c0bcc1a20650..e4d295d0d236 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1752,6 +1752,97 @@ static int generic_remap_check_len(struct inode *inode_in,
 	return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
 }
 
+/*
+ * Read a page's worth of file data into the page cache.  Return the page
+ * locked.
+ */
+static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
+{
+	struct page *page;
+
+	page = read_mapping_page(inode->i_mapping, offset >> PAGE_SHIFT, NULL);
+	if (IS_ERR(page))
+		return page;
+	if (!PageUptodate(page)) {
+		put_page(page);
+		return ERR_PTR(-EIO);
+	}
+	lock_page(page);
+	return page;
+}
+
+/*
+ * Compare extents of two files to see if they are the same.
+ * Caller must have locked both inodes to prevent write races.
+ */
+static int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
+					 struct inode *dest, loff_t destoff,
+					 loff_t len, bool *is_same)
+{
+	loff_t src_poff;
+	loff_t dest_poff;
+	void *src_addr;
+	void *dest_addr;
+	struct page *src_page;
+	struct page *dest_page;
+	loff_t cmp_len;
+	bool same;
+	int error;
+
+	error = -EINVAL;
+	same = true;
+	while (len) {
+		src_poff = srcoff & (PAGE_SIZE - 1);
+		dest_poff = destoff & (PAGE_SIZE - 1);
+		cmp_len = min(PAGE_SIZE - src_poff,
+			      PAGE_SIZE - dest_poff);
+		cmp_len = min(cmp_len, len);
+		if (cmp_len <= 0)
+			goto out_error;
+
+		src_page = vfs_dedupe_get_page(src, srcoff);
+		if (IS_ERR(src_page)) {
+			error = PTR_ERR(src_page);
+			goto out_error;
+		}
+		dest_page = vfs_dedupe_get_page(dest, destoff);
+		if (IS_ERR(dest_page)) {
+			error = PTR_ERR(dest_page);
+			unlock_page(src_page);
+			put_page(src_page);
+			goto out_error;
+		}
+		src_addr = kmap_atomic(src_page);
+		dest_addr = kmap_atomic(dest_page);
+
+		flush_dcache_page(src_page);
+		flush_dcache_page(dest_page);
+
+		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
+			same = false;
+
+		kunmap_atomic(dest_addr);
+		kunmap_atomic(src_addr);
+		unlock_page(dest_page);
+		unlock_page(src_page);
+		put_page(dest_page);
+		put_page(src_page);
+
+		if (!same)
+			break;
+
+		srcoff += cmp_len;
+		destoff += cmp_len;
+		len -= cmp_len;
+	}
+
+	*is_same = same;
+	return 0;
+
+out_error:
+	return error;
+}
+
 /*
  * Check that the two inodes are eligible for cloning, the ranges make
  * sense, and then flush all dirty data.  Caller must ensure that the
@@ -1923,102 +2014,6 @@ loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 }
 EXPORT_SYMBOL(vfs_clone_file_range);
 
-/*
- * Read a page's worth of file data into the page cache.  Return the page
- * locked.
- */
-static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
-{
-	struct address_space *mapping;
-	struct page *page;
-	pgoff_t n;
-
-	n = offset >> PAGE_SHIFT;
-	mapping = inode->i_mapping;
-	page = read_mapping_page(mapping, n, NULL);
-	if (IS_ERR(page))
-		return page;
-	if (!PageUptodate(page)) {
-		put_page(page);
-		return ERR_PTR(-EIO);
-	}
-	lock_page(page);
-	return page;
-}
-
-/*
- * Compare extents of two files to see if they are the same.
- * Caller must have locked both inodes to prevent write races.
- */
-int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
-				  struct inode *dest, loff_t destoff,
-				  loff_t len, bool *is_same)
-{
-	loff_t src_poff;
-	loff_t dest_poff;
-	void *src_addr;
-	void *dest_addr;
-	struct page *src_page;
-	struct page *dest_page;
-	loff_t cmp_len;
-	bool same;
-	int error;
-
-	error = -EINVAL;
-	same = true;
-	while (len) {
-		src_poff = srcoff & (PAGE_SIZE - 1);
-		dest_poff = destoff & (PAGE_SIZE - 1);
-		cmp_len = min(PAGE_SIZE - src_poff,
-			      PAGE_SIZE - dest_poff);
-		cmp_len = min(cmp_len, len);
-		if (cmp_len <= 0)
-			goto out_error;
-
-		src_page = vfs_dedupe_get_page(src, srcoff);
-		if (IS_ERR(src_page)) {
-			error = PTR_ERR(src_page);
-			goto out_error;
-		}
-		dest_page = vfs_dedupe_get_page(dest, destoff);
-		if (IS_ERR(dest_page)) {
-			error = PTR_ERR(dest_page);
-			unlock_page(src_page);
-			put_page(src_page);
-			goto out_error;
-		}
-		src_addr = kmap_atomic(src_page);
-		dest_addr = kmap_atomic(dest_page);
-
-		flush_dcache_page(src_page);
-		flush_dcache_page(dest_page);
-
-		if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
-			same = false;
-
-		kunmap_atomic(dest_addr);
-		kunmap_atomic(src_addr);
-		unlock_page(dest_page);
-		unlock_page(src_page);
-		put_page(dest_page);
-		put_page(src_page);
-
-		if (!same)
-			break;
-
-		srcoff += cmp_len;
-		destoff += cmp_len;
-		len -= cmp_len;
-	}
-
-	*is_same = same;
-	return 0;
-
-out_error:
-	return error;
-}
-EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
-
 loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 				 struct file *dst_file, loff_t dst_pos,
 				 loff_t len, unsigned int remap_flags)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 34c22d695011..346036a84f18 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1853,9 +1853,6 @@ extern loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 extern loff_t vfs_clone_file_range(struct file *file_in, loff_t pos_in,
 				   struct file *file_out, loff_t pos_out,
 				   loff_t len, unsigned int remap_flags);
-extern int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
-					 struct inode *dest, loff_t destoff,
-					 loff_t len, bool *is_same);
 extern int vfs_dedupe_file_range(struct file *file,
 				 struct file_dedupe_range *same);
 extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
-- 
cgit v1.2.3


From 966c37f2d77eb44d47af8e919267b1ba675b2eca Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 26 Oct 2018 11:30:35 +0800
Subject: ipv4/igmp: fix v1/v2 switchback timeout based on rfc3376, 8.12

Similiar with ipv6 mcast commit 89225d1ce6af3 ("net: ipv6: mld: fix v1/v2
switchback timeout to rfc3810, 9.12.")

i) RFC3376 8.12. Older Version Querier Present Timeout says:

   The Older Version Querier Interval is the time-out for transitioning
   a host back to IGMPv3 mode once an older version query is heard.
   When an older version query is received, hosts set their Older
   Version Querier Present Timer to Older Version Querier Interval.

   This value MUST be ((the Robustness Variable) times (the Query
   Interval in the last Query received)) plus (one Query Response
   Interval).

Currently we only use a hardcode value IGMP_V1/v2_ROUTER_PRESENT_TIMEOUT.
Fix it by adding two new items mr_qi(Query Interval) and mr_qri(Query Response
Interval) in struct in_device.

Now we can calculate the switchback time via (mr_qrv * mr_qi) + mr_qri.
We need update these values when receive IGMPv3 queries.

Reported-by: Ying Xu <yinxu@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h |  4 +++-
 net/ipv4/igmp.c            | 53 +++++++++++++++++++++++++++++++---------------
 2 files changed, 39 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index c759d1cbcedd..a64f21a97369 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -37,7 +37,9 @@ struct in_device {
 	unsigned long		mr_v1_seen;
 	unsigned long		mr_v2_seen;
 	unsigned long		mr_maxdelay;
-	unsigned char		mr_qrv;
+	unsigned long		mr_qi;		/* Query Interval */
+	unsigned long		mr_qri;		/* Query Response Interval */
+	unsigned char		mr_qrv;		/* Query Robustness Variable */
 	unsigned char		mr_gq_running;
 	unsigned char		mr_ifc_count;
 	struct timer_list	mr_gq_timer;	/* general query timer */
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4da39446da2d..765b2b32c4a4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -111,13 +111,10 @@
 #ifdef CONFIG_IP_MULTICAST
 /* Parameter names and values are taken from igmp-v2-06 draft */
 
-#define IGMP_V1_ROUTER_PRESENT_TIMEOUT		(400*HZ)
-#define IGMP_V2_ROUTER_PRESENT_TIMEOUT		(400*HZ)
 #define IGMP_V2_UNSOLICITED_REPORT_INTERVAL	(10*HZ)
 #define IGMP_V3_UNSOLICITED_REPORT_INTERVAL	(1*HZ)
+#define IGMP_QUERY_INTERVAL			(125*HZ)
 #define IGMP_QUERY_RESPONSE_INTERVAL		(10*HZ)
-#define IGMP_QUERY_ROBUSTNESS_VARIABLE		2
-
 
 #define IGMP_INITIAL_REPORT_DELAY		(1)
 
@@ -935,13 +932,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 
 			max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
 			in_dev->mr_v1_seen = jiffies +
-				IGMP_V1_ROUTER_PRESENT_TIMEOUT;
+				(in_dev->mr_qrv * in_dev->mr_qi) +
+				in_dev->mr_qri;
 			group = 0;
 		} else {
 			/* v2 router present */
 			max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
 			in_dev->mr_v2_seen = jiffies +
-				IGMP_V2_ROUTER_PRESENT_TIMEOUT;
+				(in_dev->mr_qrv * in_dev->mr_qi) +
+				in_dev->mr_qri;
 		}
 		/* cancel the interface change timer */
 		in_dev->mr_ifc_count = 0;
@@ -981,8 +980,21 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
 		if (!max_delay)
 			max_delay = 1;	/* can't mod w/ 0 */
 		in_dev->mr_maxdelay = max_delay;
-		if (ih3->qrv)
-			in_dev->mr_qrv = ih3->qrv;
+
+		/* RFC3376, 4.1.6. QRV and 4.1.7. QQIC, when the most recently
+		 * received value was zero, use the default or statically
+		 * configured value.
+		 */
+		in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+		in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
+
+		/* RFC3376, 8.3. Query Response Interval:
+		 * The number of seconds represented by the [Query Response
+		 * Interval] must be less than the [Query Interval].
+		 */
+		if (in_dev->mr_qri >= in_dev->mr_qi)
+			in_dev->mr_qri = (in_dev->mr_qi/HZ - 1)*HZ;
+
 		if (!group) { /* general query */
 			if (ih3->nsrcs)
 				return true;	/* no sources allowed */
@@ -1723,18 +1735,30 @@ void ip_mc_down(struct in_device *in_dev)
 	ip_mc_dec_group(in_dev, IGMP_ALL_HOSTS);
 }
 
-void ip_mc_init_dev(struct in_device *in_dev)
-{
 #ifdef CONFIG_IP_MULTICAST
+static void ip_mc_reset(struct in_device *in_dev)
+{
 	struct net *net = dev_net(in_dev->dev);
+
+	in_dev->mr_qi = IGMP_QUERY_INTERVAL;
+	in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
+	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+}
+#else
+static void ip_mc_reset(struct in_device *in_dev)
+{
+}
 #endif
+
+void ip_mc_init_dev(struct in_device *in_dev)
+{
 	ASSERT_RTNL();
 
 #ifdef CONFIG_IP_MULTICAST
 	timer_setup(&in_dev->mr_gq_timer, igmp_gq_timer_expire, 0);
 	timer_setup(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, 0);
-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
 #endif
+	ip_mc_reset(in_dev);
 
 	spin_lock_init(&in_dev->mc_tomb_lock);
 }
@@ -1744,15 +1768,10 @@ void ip_mc_init_dev(struct in_device *in_dev)
 void ip_mc_up(struct in_device *in_dev)
 {
 	struct ip_mc_list *pmc;
-#ifdef CONFIG_IP_MULTICAST
-	struct net *net = dev_net(in_dev->dev);
-#endif
 
 	ASSERT_RTNL();
 
-#ifdef CONFIG_IP_MULTICAST
-	in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
-#endif
+	ip_mc_reset(in_dev);
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
 	for_each_pmc_rtnl(in_dev, pmc) {
-- 
cgit v1.2.3


From ffb6ce7086ee2d68d8d6d987882f1c5e923fee7e Mon Sep 17 00:00:00 2001
From: Daniel Drake <drake@endlessm.com>
Date: Tue, 9 Oct 2018 14:40:55 +0800
Subject: platform/x86: asus-wmi: export function for evaluating WMI methods

Export asus_wmi_evaluate_method() and related headers for use by other
drivers.

hid-asus is going to use this to avoid advertising that it has a keyboard
backlight when the keyboard backlight is controlled via WMI.

Signed-off-by: Daniel Drake <drake@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/platform/x86/asus-wmi.c            |  88 +------------------------
 include/linux/platform_data/x86/asus-wmi.h | 101 +++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+), 85 deletions(-)
 create mode 100644 include/linux/platform_data/x86/asus-wmi.h

(limited to 'include/linux')

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index a805227b31e1..b52b192a4f16 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -43,6 +43,7 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
+#include <linux/platform_data/x86/asus-wmi.h>
 #include <linux/platform_device.h>
 #include <linux/thermal.h>
 #include <linux/acpi.h>
@@ -69,89 +70,6 @@ MODULE_LICENSE("GPL");
 #define NOTIFY_KBD_BRTDWN		0xc5
 #define NOTIFY_KBD_BRTTOGGLE		0xc7
 
-/* WMI Methods */
-#define ASUS_WMI_METHODID_SPEC	        0x43455053 /* BIOS SPECification */
-#define ASUS_WMI_METHODID_SFBD		0x44424653 /* Set First Boot Device */
-#define ASUS_WMI_METHODID_GLCD		0x44434C47 /* Get LCD status */
-#define ASUS_WMI_METHODID_GPID		0x44495047 /* Get Panel ID?? (Resol) */
-#define ASUS_WMI_METHODID_QMOD		0x444F4D51 /* Quiet MODe */
-#define ASUS_WMI_METHODID_SPLV		0x4C425053 /* Set Panel Light Value */
-#define ASUS_WMI_METHODID_AGFN		0x4E464741 /* FaN? */
-#define ASUS_WMI_METHODID_SFUN		0x4E554653 /* FUNCtionalities */
-#define ASUS_WMI_METHODID_SDSP		0x50534453 /* Set DiSPlay output */
-#define ASUS_WMI_METHODID_GDSP		0x50534447 /* Get DiSPlay output */
-#define ASUS_WMI_METHODID_DEVP		0x50564544 /* DEVice Policy */
-#define ASUS_WMI_METHODID_OSVR		0x5256534F /* OS VeRsion */
-#define ASUS_WMI_METHODID_DSTS		0x53544344 /* Device STatuS */
-#define ASUS_WMI_METHODID_DSTS2		0x53545344 /* Device STatuS #2*/
-#define ASUS_WMI_METHODID_BSTS		0x53545342 /* Bios STatuS ? */
-#define ASUS_WMI_METHODID_DEVS		0x53564544 /* DEVice Set */
-#define ASUS_WMI_METHODID_CFVS		0x53564643 /* CPU Frequency Volt Set */
-#define ASUS_WMI_METHODID_KBFT		0x5446424B /* KeyBoard FilTer */
-#define ASUS_WMI_METHODID_INIT		0x54494E49 /* INITialize */
-#define ASUS_WMI_METHODID_HKEY		0x59454B48 /* Hot KEY ?? */
-
-#define ASUS_WMI_UNSUPPORTED_METHOD	0xFFFFFFFE
-
-/* Wireless */
-#define ASUS_WMI_DEVID_HW_SWITCH	0x00010001
-#define ASUS_WMI_DEVID_WIRELESS_LED	0x00010002
-#define ASUS_WMI_DEVID_CWAP		0x00010003
-#define ASUS_WMI_DEVID_WLAN		0x00010011
-#define ASUS_WMI_DEVID_WLAN_LED		0x00010012
-#define ASUS_WMI_DEVID_BLUETOOTH	0x00010013
-#define ASUS_WMI_DEVID_GPS		0x00010015
-#define ASUS_WMI_DEVID_WIMAX		0x00010017
-#define ASUS_WMI_DEVID_WWAN3G		0x00010019
-#define ASUS_WMI_DEVID_UWB		0x00010021
-
-/* Leds */
-/* 0x000200XX and 0x000400XX */
-#define ASUS_WMI_DEVID_LED1		0x00020011
-#define ASUS_WMI_DEVID_LED2		0x00020012
-#define ASUS_WMI_DEVID_LED3		0x00020013
-#define ASUS_WMI_DEVID_LED4		0x00020014
-#define ASUS_WMI_DEVID_LED5		0x00020015
-#define ASUS_WMI_DEVID_LED6		0x00020016
-
-/* Backlight and Brightness */
-#define ASUS_WMI_DEVID_ALS_ENABLE	0x00050001 /* Ambient Light Sensor */
-#define ASUS_WMI_DEVID_BACKLIGHT	0x00050011
-#define ASUS_WMI_DEVID_BRIGHTNESS	0x00050012
-#define ASUS_WMI_DEVID_KBD_BACKLIGHT	0x00050021
-#define ASUS_WMI_DEVID_LIGHT_SENSOR	0x00050022 /* ?? */
-#define ASUS_WMI_DEVID_LIGHTBAR		0x00050025
-
-/* Misc */
-#define ASUS_WMI_DEVID_CAMERA		0x00060013
-
-/* Storage */
-#define ASUS_WMI_DEVID_CARDREADER	0x00080013
-
-/* Input */
-#define ASUS_WMI_DEVID_TOUCHPAD		0x00100011
-#define ASUS_WMI_DEVID_TOUCHPAD_LED	0x00100012
-
-/* Fan, Thermal */
-#define ASUS_WMI_DEVID_THERMAL_CTRL	0x00110011
-#define ASUS_WMI_DEVID_FAN_CTRL		0x00110012
-
-/* Power */
-#define ASUS_WMI_DEVID_PROCESSOR_STATE	0x00120012
-
-/* Deep S3 / Resume on LID open */
-#define ASUS_WMI_DEVID_LID_RESUME	0x00120031
-
-/* DSTS masks */
-#define ASUS_WMI_DSTS_STATUS_BIT	0x00000001
-#define ASUS_WMI_DSTS_UNKNOWN_BIT	0x00000002
-#define ASUS_WMI_DSTS_PRESENCE_BIT	0x00010000
-#define ASUS_WMI_DSTS_USER_BIT		0x00020000
-#define ASUS_WMI_DSTS_BIOS_BIT		0x00040000
-#define ASUS_WMI_DSTS_BRIGHTNESS_MASK	0x000000FF
-#define ASUS_WMI_DSTS_MAX_BRIGTH_MASK	0x0000FF00
-#define ASUS_WMI_DSTS_LIGHTBAR_MASK	0x0000000F
-
 #define ASUS_FAN_DESC			"cpu_fan"
 #define ASUS_FAN_MFUN			0x13
 #define ASUS_FAN_SFUN_READ		0x06
@@ -301,8 +219,7 @@ static void asus_wmi_input_exit(struct asus_wmi *asus)
 	asus->inputdev = NULL;
 }
 
-static int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
-				    u32 *retval)
+int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval)
 {
 	struct bios_args args = {
 		.arg0 = arg0,
@@ -338,6 +255,7 @@ exit:
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(asus_wmi_evaluate_method);
 
 static int asus_wmi_evaluate_method_agfn(const struct acpi_buffer args)
 {
diff --git a/include/linux/platform_data/x86/asus-wmi.h b/include/linux/platform_data/x86/asus-wmi.h
new file mode 100644
index 000000000000..53dfc2541960
--- /dev/null
+++ b/include/linux/platform_data/x86/asus-wmi.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PLATFORM_DATA_X86_ASUS_WMI_H
+#define __PLATFORM_DATA_X86_ASUS_WMI_H
+
+#include <linux/errno.h>
+#include <linux/types.h>
+
+/* WMI Methods */
+#define ASUS_WMI_METHODID_SPEC	        0x43455053 /* BIOS SPECification */
+#define ASUS_WMI_METHODID_SFBD		0x44424653 /* Set First Boot Device */
+#define ASUS_WMI_METHODID_GLCD		0x44434C47 /* Get LCD status */
+#define ASUS_WMI_METHODID_GPID		0x44495047 /* Get Panel ID?? (Resol) */
+#define ASUS_WMI_METHODID_QMOD		0x444F4D51 /* Quiet MODe */
+#define ASUS_WMI_METHODID_SPLV		0x4C425053 /* Set Panel Light Value */
+#define ASUS_WMI_METHODID_AGFN		0x4E464741 /* FaN? */
+#define ASUS_WMI_METHODID_SFUN		0x4E554653 /* FUNCtionalities */
+#define ASUS_WMI_METHODID_SDSP		0x50534453 /* Set DiSPlay output */
+#define ASUS_WMI_METHODID_GDSP		0x50534447 /* Get DiSPlay output */
+#define ASUS_WMI_METHODID_DEVP		0x50564544 /* DEVice Policy */
+#define ASUS_WMI_METHODID_OSVR		0x5256534F /* OS VeRsion */
+#define ASUS_WMI_METHODID_DSTS		0x53544344 /* Device STatuS */
+#define ASUS_WMI_METHODID_DSTS2		0x53545344 /* Device STatuS #2*/
+#define ASUS_WMI_METHODID_BSTS		0x53545342 /* Bios STatuS ? */
+#define ASUS_WMI_METHODID_DEVS		0x53564544 /* DEVice Set */
+#define ASUS_WMI_METHODID_CFVS		0x53564643 /* CPU Frequency Volt Set */
+#define ASUS_WMI_METHODID_KBFT		0x5446424B /* KeyBoard FilTer */
+#define ASUS_WMI_METHODID_INIT		0x54494E49 /* INITialize */
+#define ASUS_WMI_METHODID_HKEY		0x59454B48 /* Hot KEY ?? */
+
+#define ASUS_WMI_UNSUPPORTED_METHOD	0xFFFFFFFE
+
+/* Wireless */
+#define ASUS_WMI_DEVID_HW_SWITCH	0x00010001
+#define ASUS_WMI_DEVID_WIRELESS_LED	0x00010002
+#define ASUS_WMI_DEVID_CWAP		0x00010003
+#define ASUS_WMI_DEVID_WLAN		0x00010011
+#define ASUS_WMI_DEVID_WLAN_LED		0x00010012
+#define ASUS_WMI_DEVID_BLUETOOTH	0x00010013
+#define ASUS_WMI_DEVID_GPS		0x00010015
+#define ASUS_WMI_DEVID_WIMAX		0x00010017
+#define ASUS_WMI_DEVID_WWAN3G		0x00010019
+#define ASUS_WMI_DEVID_UWB		0x00010021
+
+/* Leds */
+/* 0x000200XX and 0x000400XX */
+#define ASUS_WMI_DEVID_LED1		0x00020011
+#define ASUS_WMI_DEVID_LED2		0x00020012
+#define ASUS_WMI_DEVID_LED3		0x00020013
+#define ASUS_WMI_DEVID_LED4		0x00020014
+#define ASUS_WMI_DEVID_LED5		0x00020015
+#define ASUS_WMI_DEVID_LED6		0x00020016
+
+/* Backlight and Brightness */
+#define ASUS_WMI_DEVID_ALS_ENABLE	0x00050001 /* Ambient Light Sensor */
+#define ASUS_WMI_DEVID_BACKLIGHT	0x00050011
+#define ASUS_WMI_DEVID_BRIGHTNESS	0x00050012
+#define ASUS_WMI_DEVID_KBD_BACKLIGHT	0x00050021
+#define ASUS_WMI_DEVID_LIGHT_SENSOR	0x00050022 /* ?? */
+#define ASUS_WMI_DEVID_LIGHTBAR		0x00050025
+
+/* Misc */
+#define ASUS_WMI_DEVID_CAMERA		0x00060013
+
+/* Storage */
+#define ASUS_WMI_DEVID_CARDREADER	0x00080013
+
+/* Input */
+#define ASUS_WMI_DEVID_TOUCHPAD		0x00100011
+#define ASUS_WMI_DEVID_TOUCHPAD_LED	0x00100012
+
+/* Fan, Thermal */
+#define ASUS_WMI_DEVID_THERMAL_CTRL	0x00110011
+#define ASUS_WMI_DEVID_FAN_CTRL		0x00110012
+
+/* Power */
+#define ASUS_WMI_DEVID_PROCESSOR_STATE	0x00120012
+
+/* Deep S3 / Resume on LID open */
+#define ASUS_WMI_DEVID_LID_RESUME	0x00120031
+
+/* DSTS masks */
+#define ASUS_WMI_DSTS_STATUS_BIT	0x00000001
+#define ASUS_WMI_DSTS_UNKNOWN_BIT	0x00000002
+#define ASUS_WMI_DSTS_PRESENCE_BIT	0x00010000
+#define ASUS_WMI_DSTS_USER_BIT		0x00020000
+#define ASUS_WMI_DSTS_BIOS_BIT		0x00040000
+#define ASUS_WMI_DSTS_BRIGHTNESS_MASK	0x000000FF
+#define ASUS_WMI_DSTS_MAX_BRIGTH_MASK	0x0000FF00
+#define ASUS_WMI_DSTS_LIGHTBAR_MASK	0x0000000F
+
+#if IS_REACHABLE(CONFIG_ASUS_WMI)
+int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1, u32 *retval);
+#else
+static inline int asus_wmi_evaluate_method(u32 method_id, u32 arg0, u32 arg1,
+					   u32 *retval)
+{
+	return -ENODEV;
+}
+#endif
+
+#endif	/* __PLATFORM_DATA_X86_ASUS_WMI_H */
-- 
cgit v1.2.3


From f813f21971b96f61a789dd48151f92220fdd2e0a Mon Sep 17 00:00:00 2001
From: Jérôme Glisse <jglisse@redhat.com>
Date: Tue, 30 Oct 2018 15:04:06 -0700
Subject: mm/hmm: fix utf8 ...
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "HMM updates, improvements and fixes", v2

Few fixes that only affect HMM users.  Improve the synchronization call
back so that we match was other mmu_notifier listener do and add proper
support to the new blockable flags in the process.

For curious folks here are branches to leverage HMM in various existing
device drivers:

https://cgit.freedesktop.org/~glisse/linux/log/?h=hmm-nouveau-v01
https://cgit.freedesktop.org/~glisse/linux/log/?h=hmm-radeon-v00
https://cgit.freedesktop.org/~glisse/linux/log/?h=hmm-intel-v00

More to come (amd gpu, Mellanox, ...)

I expect more of the preparatory work for nouveau will be merge in 4.20
(like we have been doing since 4.16) and i will wait until this patchset
is upstream before pushing the patches that actualy make use of HMM (to
avoid complex tree inter-dependency).

This patch (of 6):

Somehow utf=8 must have been broken.

Link: http://lkml.kernel.org/r/20181019160442.18723-2-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hmm.h | 2 +-
 mm/hmm.c            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index dde947083d4e..42b1ae915915 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -11,7 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * Authors: JÃ©rÃ´me Glisse <jglisse@redhat.com>
+ * Authors: Jérôme Glisse <jglisse@redhat.com>
  */
 /*
  * Heterogeneous Memory Management (HMM)
diff --git a/mm/hmm.c b/mm/hmm.c
index 774d684fa2b4..de9840f60100 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -11,7 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * Authors: JÃ©rÃ´me Glisse <jglisse@redhat.com>
+ * Authors: Jérôme Glisse <jglisse@redhat.com>
  */
 /*
  * Refer to include/linux/hmm.h for information about heterogeneous memory
-- 
cgit v1.2.3


From 44532d4c591c10d6907ac5030373bc306617d92b Mon Sep 17 00:00:00 2001
From: Jérôme Glisse <jglisse@redhat.com>
Date: Tue, 30 Oct 2018 15:04:24 -0700
Subject: mm/hmm: use a structure for update callback parameters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a structure to gather all the parameters for the update callback.
This make it easier when adding new parameters by avoiding having to
update all callback function signature.

The hmm_update structure is always associated with a mmu_notifier
callbacks so we are not planing on grouping multiple updates together.
Nor do we care about page size for the range as range will over fully
cover the page being invalidated (this is a mmu_notifier property).

Link: http://lkml.kernel.org/r/20181019160442.18723-6-jglisse@redhat.com
Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hmm.h | 31 ++++++++++++++++++++++---------
 mm/hmm.c            | 33 ++++++++++++++++++++++-----------
 2 files changed, 44 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index 42b1ae915915..c6fb869a81c0 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -274,13 +274,28 @@ static inline uint64_t hmm_pfn_from_pfn(const struct hmm_range *range,
 struct hmm_mirror;
 
 /*
- * enum hmm_update_type - type of update
+ * enum hmm_update_event - type of update
  * @HMM_UPDATE_INVALIDATE: invalidate range (no indication as to why)
  */
-enum hmm_update_type {
+enum hmm_update_event {
 	HMM_UPDATE_INVALIDATE,
 };
 
+/*
+ * struct hmm_update - HMM update informations for callback
+ *
+ * @start: virtual start address of the range to update
+ * @end: virtual end address of the range to update
+ * @event: event triggering the update (what is happening)
+ * @blockable: can the callback block/sleep ?
+ */
+struct hmm_update {
+	unsigned long start;
+	unsigned long end;
+	enum hmm_update_event event;
+	bool blockable;
+};
+
 /*
  * struct hmm_mirror_ops - HMM mirror device operations callback
  *
@@ -300,9 +315,9 @@ struct hmm_mirror_ops {
 	/* sync_cpu_device_pagetables() - synchronize page tables
 	 *
 	 * @mirror: pointer to struct hmm_mirror
-	 * @update_type: type of update that occurred to the CPU page table
-	 * @start: virtual start address of the range to update
-	 * @end: virtual end address of the range to update
+	 * @update: update informations (see struct hmm_update)
+	 * Returns: -EAGAIN if update.blockable false and callback need to
+	 *          block, 0 otherwise.
 	 *
 	 * This callback ultimately originates from mmu_notifiers when the CPU
 	 * page table is updated. The device driver must update its page table
@@ -313,10 +328,8 @@ struct hmm_mirror_ops {
 	 * page tables are completely updated (TLBs flushed, etc); this is a
 	 * synchronous call.
 	 */
-	void (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
-					   enum hmm_update_type update_type,
-					   unsigned long start,
-					   unsigned long end);
+	int (*sync_cpu_device_pagetables)(struct hmm_mirror *mirror,
+					  const struct hmm_update *update);
 };
 
 /*
diff --git a/mm/hmm.c b/mm/hmm.c
index a3d532ff7c3d..b4e9afdc2181 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -126,10 +126,8 @@ void hmm_mm_destroy(struct mm_struct *mm)
 	kfree(mm->hmm);
 }
 
-static void hmm_invalidate_range(struct hmm *hmm,
-				 enum hmm_update_type action,
-				 unsigned long start,
-				 unsigned long end)
+static int hmm_invalidate_range(struct hmm *hmm,
+				const struct hmm_update *update)
 {
 	struct hmm_mirror *mirror;
 	struct hmm_range *range;
@@ -138,22 +136,30 @@ static void hmm_invalidate_range(struct hmm *hmm,
 	list_for_each_entry(range, &hmm->ranges, list) {
 		unsigned long addr, idx, npages;
 
-		if (end < range->start || start >= range->end)
+		if (update->end < range->start || update->start >= range->end)
 			continue;
 
 		range->valid = false;
-		addr = max(start, range->start);
+		addr = max(update->start, range->start);
 		idx = (addr - range->start) >> PAGE_SHIFT;
-		npages = (min(range->end, end) - addr) >> PAGE_SHIFT;
+		npages = (min(range->end, update->end) - addr) >> PAGE_SHIFT;
 		memset(&range->pfns[idx], 0, sizeof(*range->pfns) * npages);
 	}
 	spin_unlock(&hmm->lock);
 
 	down_read(&hmm->mirrors_sem);
-	list_for_each_entry(mirror, &hmm->mirrors, list)
-		mirror->ops->sync_cpu_device_pagetables(mirror, action,
-							start, end);
+	list_for_each_entry(mirror, &hmm->mirrors, list) {
+		int ret;
+
+		ret = mirror->ops->sync_cpu_device_pagetables(mirror, update);
+		if (!update->blockable && ret == -EAGAIN) {
+			up_read(&hmm->mirrors_sem);
+			return -EAGAIN;
+		}
+	}
 	up_read(&hmm->mirrors_sem);
+
+	return 0;
 }
 
 static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
@@ -202,11 +208,16 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
 				     unsigned long start,
 				     unsigned long end)
 {
+	struct hmm_update update;
 	struct hmm *hmm = mm->hmm;
 
 	VM_BUG_ON(!hmm);
 
-	hmm_invalidate_range(mm->hmm, HMM_UPDATE_INVALIDATE, start, end);
+	update.start = start;
+	update.end = end;
+	update.event = HMM_UPDATE_INVALIDATE;
+	update.blockable = true;
+	hmm_invalidate_range(hmm, &update);
 }
 
 static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
-- 
cgit v1.2.3


From 7275b097851a5e2e0dd4da039c7e96b59ac5314e Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 30 Oct 2018 15:04:59 -0700
Subject: linux/bitmap.h: handle constant zero-size bitmaps correctly

The static inlines in bitmap.h do not handle a compile-time constant
nbits==0 correctly (they dereference the passed src or dst pointers,
despite only 0 words being valid to access).  I had the 0-day buildbot
chew on a patch [1] that would cause build failures for such cases without
complaining, suggesting that we don't have any such users currently, at
least for the 70 .config/arch combinations that was built.  Should any
turn up, make sure they use the out-of-line versions, which do handle
nbits==0 correctly.

This is of course not the most efficient, but it's much less churn than
teaching all the static inlines an "if (zero_const_nbits())", and since we
don't have any current instances, this doesn't affect existing code at
all.

[1] lkml.kernel.org/r/20180815085539.27485-1-linux@rasmusvillemoes.dk

Link: http://lkml.kernel.org/r/20180818131623.8755-3-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Yury Norov <ynorov@caviumnetworks.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index acf5e8df3504..a9805bacbd7c 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -204,8 +204,13 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf,
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
 #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
 
+/*
+ * The static inlines below do not handle constant nbits==0 correctly,
+ * so make such users (should any ever turn up) call the out-of-line
+ * versions.
+ */
 #define small_const_nbits(nbits) \
-	(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
+	(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
 
 static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
 {
-- 
cgit v1.2.3


From c8cebc553368209426f7279736db4f88f1853396 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 30 Oct 2018 15:05:02 -0700
Subject: linux/bitmap.h: remove redundant uses of small_const_nbits()

In the _zero, _fill and _copy functions, the small_const_nbits branch is
redundant.  If nbits is small and const, gcc knows full well that
BITS_TO_LONGS(nbits) is 1, so len is also a compile-time constant
(sizeof(long)), and calling memset or memcpy with a length argument of
sizeof(long) makes gcc generate the expected code anyway:

#include <string.h>
void a(unsigned long *x) { memset(x, 0, 8); }
void b(unsigned long *x) { memset(x, 0xff, 8); }
void c(unsigned long *x, const unsigned long *y) { memcpy(x, y, 8); }

turns into

0000000000000000 <a>:
   0:   48 c7 07 00 00 00 00    movq   $0x0,(%rdi)
   7:   c3                      retq
   8:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
   f:   00

0000000000000010 <b>:
  10:   48 c7 07 ff ff ff ff    movq   $0xffffffffffffffff,(%rdi)
  17:   c3                      retq
  18:   0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
  1f:   00

0000000000000020 <c>:
  20:   48 8b 06                mov    (%rsi),%rax
  23:   48 89 07                mov    %rax,(%rdi)
  26:   c3                      retq

Link: http://lkml.kernel.org/r/20180818131623.8755-4-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Yury Norov <ynorov@caviumnetworks.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 24 ++++++------------------
 1 file changed, 6 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index a9805bacbd7c..004cd42a3c4d 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -214,33 +214,21 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf,
 
 static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
 {
-	if (small_const_nbits(nbits))
-		*dst = 0UL;
-	else {
-		unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
-		memset(dst, 0, len);
-	}
+	unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+	memset(dst, 0, len);
 }
 
 static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
 {
-	if (small_const_nbits(nbits))
-		*dst = ~0UL;
-	else {
-		unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
-		memset(dst, 0xff, len);
-	}
+	unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+	memset(dst, 0xff, len);
 }
 
 static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
 			unsigned int nbits)
 {
-	if (small_const_nbits(nbits))
-		*dst = *src;
-	else {
-		unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
-		memcpy(dst, src, len);
-	}
+	unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+	memcpy(dst, src, len);
 }
 
 /*
-- 
cgit v1.2.3


From d9873969fa8725dc6a5a21ab788c057fd8719751 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 30 Oct 2018 15:05:07 -0700
Subject: linux/bitmap.h: fix type of nbits in bitmap_shift_right()

Most other bitmap API, including the OOL version __bitmap_shift_right,
take unsigned nbits.  This was accidentally left out from 2fbad29917c98.

Link: http://lkml.kernel.org/r/20180818131623.8755-5-linux@rasmusvillemoes.dk
Fixes: 2fbad29917c98 ("lib: bitmap: change bitmap_shift_right to take unsigned parameters")
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reported-by: Yury Norov <ynorov@caviumnetworks.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 004cd42a3c4d..4032680e629d 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -391,7 +391,7 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start,
 }
 
 static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src,
-				unsigned int shift, int nbits)
+				unsigned int shift, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
 		*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
-- 
cgit v1.2.3


From 41e7b1661ffbf562d3aa2b7ce4ad283db50b711a Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 30 Oct 2018 15:05:10 -0700
Subject: linux/bitmap.h: relax comment on compile-time constant nbits

It's not clear what's so horrible about emitting a function call to handle
a run-time sized bitmap.  Moreover, gcc also emits a function call for a
compile-time-constant-but-huge nbits, so the comment isn't even accurate.

Link: http://lkml.kernel.org/r/20180818131623.8755-6-linux@rasmusvillemoes.dk
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Cc: Yury Norov <ynorov@caviumnetworks.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 4032680e629d..f58e97446abc 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -28,8 +28,8 @@
  * The available bitmap operations and their rough meaning in the
  * case that the bitmap is a single unsigned long are thus:
  *
- * Note that nbits should be always a compile time evaluable constant.
- * Otherwise many inlines will generate horrible code.
+ * The generated code is more efficient when nbits is known at
+ * compile-time and at most BITS_PER_LONG.
  *
  * ::
  *
-- 
cgit v1.2.3


From 7e5ca363a5a1ec42c54dc1e0644b361a2daf984c Mon Sep 17 00:00:00 2001
From: Wei Yang <richard.weiyang@gmail.com>
Date: Tue, 30 Oct 2018 15:05:42 -0700
Subject: lib/rbtree.c: fix typo in comment of rb_insert_augmented()

The function name in the comment is not correct.

Link: http://lkml.kernel.org/r/20181010021344.60433-1-richard.weiyang@gmail.com
Signed-off-by: Wei Yang <richard.weiyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index af8a61be2d8d..9510c677ac70 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -51,8 +51,8 @@ extern void __rb_insert_augmented(struct rb_node *node,
  *
  * On insertion, the user must update the augmented information on the path
  * leading to the inserted node, then call rb_link_node() as usual and
- * rb_augment_inserted() instead of the usual rb_insert_color() call.
- * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * rb_insert_augmented() instead of the usual rb_insert_color() call.
+ * If rb_insert_augmented() rebalances the rbtree, it will callback into
  * a user provided function to update the augmented information on the
  * affected subtrees.
  */
-- 
cgit v1.2.3


From 89976005536c47a788bdd6cef5a4e9fef3c0de32 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 30 Oct 2018 15:05:49 -0700
Subject: include/linux/compat.h: mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases where
we are expecting to fall through.

Link: http://lkml.kernel.org/r/20181013115048.GA3262@embeddedor.com
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compat.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index d30e4dbd4be2..06e77473f175 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -488,8 +488,11 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set,
 	compat_sigset_t v;
 	switch (_NSIG_WORDS) {
 	case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3];
+		/* fall through */
 	case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2];
+		/* fall through */
 	case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1];
+		/* fall through */
 	case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0];
 	}
 	return copy_to_user(compat, &v, size) ? -EFAULT : 0;
-- 
cgit v1.2.3


From 3819ddec1f8c8a5daf215bec4067f8fd1dc40d6d Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 30 Oct 2018 15:07:10 -0700
Subject: include/linux/signal.h: mark expected switch fall-throughs

In preparation to enabling -Wimplicit-fallthrough, mark switch cases where
we are expecting to fall through.

Link: http://lkml.kernel.org/r/20181013114847.GA3160@embeddedor.com
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 200ed96a05af..f428e86f4800 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -129,9 +129,11 @@ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \
 		b3 = b->sig[3]; b2 = b->sig[2];				\
 		r->sig[3] = op(a3, b3);					\
 		r->sig[2] = op(a2, b2);					\
+		/* fall through */					\
 	case 2:								\
 		a1 = a->sig[1]; b1 = b->sig[1];				\
 		r->sig[1] = op(a1, b1);					\
+		/* fall through */					\
 	case 1:								\
 		a0 = a->sig[0]; b0 = b->sig[0];				\
 		r->sig[0] = op(a0, b0);					\
@@ -161,7 +163,9 @@ static inline void name(sigset_t *set)					\
 	switch (_NSIG_WORDS) {						\
 	case 4:	set->sig[3] = op(set->sig[3]);				\
 		set->sig[2] = op(set->sig[2]);				\
+		/* fall through */					\
 	case 2:	set->sig[1] = op(set->sig[1]);				\
+		/* fall through */					\
 	case 1:	set->sig[0] = op(set->sig[0]);				\
 		    break;						\
 	default:							\
@@ -182,6 +186,7 @@ static inline void sigemptyset(sigset_t *set)
 		memset(set, 0, sizeof(sigset_t));
 		break;
 	case 2: set->sig[1] = 0;
+		/* fall through */
 	case 1:	set->sig[0] = 0;
 		break;
 	}
@@ -194,6 +199,7 @@ static inline void sigfillset(sigset_t *set)
 		memset(set, -1, sizeof(sigset_t));
 		break;
 	case 2: set->sig[1] = -1;
+		/* fall through */
 	case 1:	set->sig[0] = -1;
 		break;
 	}
-- 
cgit v1.2.3


From 69a60bc75fe73511af89328ded1b33bc4a625a5c Mon Sep 17 00:00:00 2001
From: Alexander Pateenok <pateenoc@gmail.com>
Date: Tue, 30 Oct 2018 15:07:36 -0700
Subject: percpu: remove PER_CPU_DEF_ATTRIBUTES macro

The macro is not used:

  $ grep -r PER_CPU_DEF_ATTRIBUTES
  include/linux/percpu-defs.h:	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak		\
  include/linux/percpu-defs.h:	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES		\
  include/asm-generic/percpu.h:#ifndef PER_CPU_DEF_ATTRIBUTES
  include/asm-generic/percpu.h:#define PER_CPU_DEF_ATTRIBUTES

It was added with b01e8dc34379 ("alpha: fix percpu build breakage") and
removed in 2009 with b01e8dc34379..6088464cf1ae.

Link: http://lkml.kernel.org/r/20180821164904.qqhcduimjznods66@K55DR.localdomain
Signed-off-by: Alexander Pateenok <pateenoc@gmail.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/asm-generic/percpu.h | 4 ----
 include/linux/percpu-defs.h  | 6 ++----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 1817a8415a5e..c2de013b2cf4 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -62,10 +62,6 @@ extern void setup_per_cpu_areas(void);
 #define PER_CPU_ATTRIBUTES
 #endif
 
-#ifndef PER_CPU_DEF_ATTRIBUTES
-#define PER_CPU_DEF_ATTRIBUTES
-#endif
-
 #define raw_cpu_generic_read(pcp)					\
 ({									\
 	*raw_cpu_ptr(&(pcp));						\
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 2d2096ba1cfe..1ce8e264a269 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -91,8 +91,7 @@
 	extern __PCPU_DUMMY_ATTRS char __pcpu_unique_##name;		\
 	__PCPU_DUMMY_ATTRS char __pcpu_unique_##name;			\
 	extern __PCPU_ATTRS(sec) __typeof__(type) name;			\
-	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES __weak			\
-	__typeof__(type) name
+	__PCPU_ATTRS(sec) __weak __typeof__(type) name
 #else
 /*
  * Normal declaration and definition macros.
@@ -101,8 +100,7 @@
 	extern __PCPU_ATTRS(sec) __typeof__(type) name
 
 #define DEFINE_PER_CPU_SECTION(type, name, sec)				\
-	__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES			\
-	__typeof__(type) name
+	__PCPU_ATTRS(sec) __typeof__(type) name
 #endif
 
 /*
-- 
cgit v1.2.3


From b4a991ec584b3ce333342c431c3cc4fef8a690d7 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:40 -0700
Subject: mm: remove CONFIG_NO_BOOTMEM

All achitectures select NO_BOOTMEM which essentially becomes 'Y' for any
kernel configuration and therefore it can be removed.

[alexander.h.duyck@linux.intel.com: remove now defunct NO_BOOTMEM from depends list for deferred init]
  Link: http://lkml.kernel.org/r/20180925201814.3576.15105.stgit@localhost.localdomain
Link: http://lkml.kernel.org/r/1536927045-23536-3-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/Kconfig      |  1 -
 arch/arc/Kconfig        |  1 -
 arch/arm/Kconfig        |  1 -
 arch/arm64/Kconfig      |  1 -
 arch/csky/Kconfig       |  1 -
 arch/h8300/Kconfig      |  1 -
 arch/hexagon/Kconfig    |  1 -
 arch/ia64/Kconfig       |  1 -
 arch/m68k/Kconfig       |  1 -
 arch/microblaze/Kconfig |  1 -
 arch/mips/Kconfig       |  1 -
 arch/nds32/Kconfig      |  1 -
 arch/nios2/Kconfig      |  1 -
 arch/openrisc/Kconfig   |  1 -
 arch/parisc/Kconfig     |  1 -
 arch/powerpc/Kconfig    |  1 -
 arch/riscv/Kconfig      |  1 -
 arch/s390/Kconfig       |  1 -
 arch/sh/Kconfig         |  1 -
 arch/sparc/Kconfig      |  1 -
 arch/um/Kconfig         |  1 -
 arch/unicore32/Kconfig  |  1 -
 arch/x86/Kconfig        |  3 ---
 arch/xtensa/Kconfig     |  1 -
 include/linux/bootmem.h | 36 ++----------------------------------
 include/linux/mmzone.h  |  5 +----
 mm/Kconfig              |  4 ----
 mm/Makefile             |  7 +------
 mm/memblock.c           |  2 --
 29 files changed, 4 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 620b0a711ee4..04de6be101bc 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -32,7 +32,6 @@ config ALPHA
 	select OLD_SIGSUSPEND
 	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index e98c6b8e6186..56995f356f69 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -44,7 +44,6 @@ config ARC
 	select HANDLE_DOMAIN_IRQ
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
-	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b8c6062ca0c1..37d4c40f405a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -100,7 +100,6 @@ config ARM
 	select IRQ_FORCED_THREADING
 	select MODULES_USE_ELF_REL
 	select NEED_DMA_MAP_STATE
-	select NO_BOOTMEM
 	select OF_EARLY_FLATTREE if OF
 	select OF_RESERVED_MEM if OF
 	select OLD_SIGACTION
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 964f682a2b7b..a8f36c7041ff 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -161,7 +161,6 @@ config ARM64
 	select MULTI_IRQ_HANDLER
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
-	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 0a0558567eaa..46966f576249 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -39,7 +39,6 @@ config CSKY
 	select HAVE_MEMBLOCK
 	select MAY_HAVE_SPARSE_IRQ
 	select MODULES_USE_ELF_RELA if MODULES
-	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 0b334b671e90..5e89d40be8cd 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -16,7 +16,6 @@ config H8300
 	select OF_IRQ
 	select OF_EARLY_FLATTREE
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	select TIMER_OF
 	select H8300_TMR8
 	select HAVE_KERNEL_GZIP
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 7b25d7c8fa49..7823f15e17b2 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -23,7 +23,6 @@ config HEXAGON
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
-	select NO_BOOTMEM
 	select NEED_SG_DMA_LENGTH
 	select NO_IOPORT_MAP
 	select GENERIC_IOMAP
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8b4a0c1748c0..2bf4ef792f2c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -28,7 +28,6 @@ config IA64
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select NO_BOOTMEM
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select ARCH_HAS_DMA_MARK_CLEAN
 	select ARCH_HAS_SG_CHAIN
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index c7b2a8d60a41..26edf2542295 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -29,7 +29,6 @@ config M68K
 	select DMA_DIRECT_OPS if HAS_DMA
 	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
-	select NO_BOOTMEM
 
 config CPU_BIG_ENDIAN
 	def_bool y
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 164a4857737a..5c2777bb5555 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -28,7 +28,6 @@ config MICROBLAZE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
-	select NO_BOOTMEM
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_OPROFILE
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 80778b40f8fa..366934157632 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -78,7 +78,6 @@ config MIPS
 	select RTC_LIB
 	select SYSCTL_EXCEPTION_TRACE
 	select VIRT_TO_BUS
-	select NO_BOOTMEM
 
 menu "Machine selection"
 
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 56992330026a..93c0b587ee23 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -36,7 +36,6 @@ config NDS32
 	select MODULES_USE_ELF_RELA
 	select OF
 	select OF_EARLY_FLATTREE
-	select NO_BOOTMEM
 	select NO_IOPORT_MAP
 	select RTC_LIB
 	select THREAD_INFO_IN_TASK
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 2df0c57f2833..b01febeb5ce3 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -25,7 +25,6 @@ config NIOS2
 	select CPU_NO_EFFICIENT_FFS
 	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
-	select NO_BOOTMEM
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index a655ae280637..cbfaf15f9d70 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -32,7 +32,6 @@ config OPENRISC
 	select HAVE_DEBUG_STACKOVERFLOW
 	select OR1K_PIC
 	select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
-	select NO_BOOTMEM
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_USE_QUEUED_RWLOCKS
 	select OMPIC if SMP
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index f1cd12afd943..7418ebeeb4a4 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -16,7 +16,6 @@ config PARISC
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	select BUG
 	select BUILDTIME_EXTABLE_SORT
 	select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e84943d24e5c..5798ffb1379b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -231,7 +231,6 @@ config PPC
 	select MODULES_USE_ELF_RELA
 	select NEED_DMA_MAP_STATE		if PPC64 || NOT_COHERENT_CACHE
 	select NEED_SG_DMA_LENGTH
-	select NO_BOOTMEM
 	select OF
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index fe451348ae57..f7c6b7cf124a 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -35,7 +35,6 @@ config RISCV
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_PERF_EVENTS
 	select IRQ_DOMAIN
-	select NO_BOOTMEM
 	select RISCV_ISA_A if SMP
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8b25e1f45b27..c7af83a15b2d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -175,7 +175,6 @@ config S390
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select MODULES_USE_ELF_RELA
-	select NO_BOOTMEM
 	select OLD_SIGACTION
 	select OLD_SIGSUSPEND3
 	select SPARSE_IRQ
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 475d786a65b0..7aaea9690e18 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -11,7 +11,6 @@ config SUPERH
 	select HAVE_IDE if HAS_IOPORT_MAP
 	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
-	select NO_BOOTMEM
 	select ARCH_DISCARD_MEMBLOCK
 	select HAVE_OPROFILE
 	select HAVE_GENERIC_DMA_COHERENT
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 7e2aa59fcc29..98c0996f3c2c 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -46,7 +46,6 @@ config SPARC
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 10c15b8853ae..ce3d56299b28 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -13,7 +13,6 @@ config UML
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 0c5111b206bd..3a3b40f79558 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -6,7 +6,6 @@ config UNICORE32
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select DMA_DIRECT_OPS
 	select HAVE_MEMBLOCK
-	select NO_BOOTMEM
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index ffebfc3f43c1..0354f52ef30e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -834,9 +834,6 @@ config JAILHOUSE_GUEST
 
 endif #HYPERVISOR_GUEST
 
-config NO_BOOTMEM
-	def_bool y
-
 source "arch/x86/Kconfig.cpu"
 
 config HPET_TIMER
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index ea5d8d03e53b..df43d2e76842 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -34,7 +34,6 @@ config XTENSA
 	select HAVE_STACKPROTECTOR
 	select IRQ_DOMAIN
 	select MODULES_USE_ELF_RELA
-	select NO_BOOTMEM
 	select PERF_USE_VMALLOC
 	select VIRT_TO_BUS
 	help
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 42515195d7d8..1f005b5c1555 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -26,34 +26,6 @@ extern unsigned long max_pfn;
  */
 extern unsigned long long max_possible_pfn;
 
-#ifndef CONFIG_NO_BOOTMEM
-/**
- * struct bootmem_data - per-node information used by the bootmem allocator
- * @node_min_pfn: the starting physical address of the node's memory
- * @node_low_pfn: the end physical address of the directly addressable memory
- * @node_bootmem_map: is a bitmap pointer - the bits represent all physical
- *		      memory pages (including holes) on the node.
- * @last_end_off: the offset within the page of the end of the last allocation;
- *                if 0, the page used is full
- * @hint_idx: the PFN of the page used with the last allocation;
- *            together with using this with the @last_end_offset field,
- *            a test can be made to see if allocations can be merged
- *            with the page used for the last allocation rather than
- *            using up a full new page.
- * @list: list entry in the linked list ordered by the memory addresses
- */
-typedef struct bootmem_data {
-	unsigned long node_min_pfn;
-	unsigned long node_low_pfn;
-	void *node_bootmem_map;
-	unsigned long last_end_off;
-	unsigned long hint_idx;
-	struct list_head list;
-} bootmem_data_t;
-
-extern bootmem_data_t bootmem_node_data[];
-#endif
-
 extern unsigned long bootmem_bootmap_pages(unsigned long);
 
 extern unsigned long init_bootmem_node(pg_data_t *pgdat,
@@ -125,12 +97,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 				      unsigned long align,
 				      unsigned long goal) __malloc;
 
-#ifdef CONFIG_NO_BOOTMEM
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
-#else
-#define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS)
-#endif
 
 #ifndef ARCH_LOW_ADDRESS_LIMIT
 #define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
@@ -165,7 +133,7 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
 
-#if defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM)
+#if defined(CONFIG_HAVE_MEMBLOCK)
 
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE	0
@@ -373,7 +341,7 @@ static inline void __init memblock_free_late(
 {
 	free_bootmem_late(base, size);
 }
-#endif /* defined(CONFIG_HAVE_MEMBLOCK) && defined(CONFIG_NO_BOOTMEM) */
+#endif /* defined(CONFIG_HAVE_MEMBLOCK) */
 
 extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long bucketsize,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9f0caccd5833..847705a6d0ec 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -633,9 +633,6 @@ typedef struct pglist_data {
 	struct page_ext *node_page_ext;
 #endif
 #endif
-#ifndef CONFIG_NO_BOOTMEM
-	struct bootmem_data *bdata;
-#endif
 #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 	/*
 	 * Must be held any time you expect node_start_pfn, node_present_pages
@@ -869,7 +866,7 @@ static inline int is_highmem_idx(enum zone_type idx)
 }
 
 /**
- * is_highmem - helper function to quickly check if a struct zone is a 
+ * is_highmem - helper function to quickly check if a struct zone is a
  *              highmem zone or not.  This is an attempt to keep references
  *              to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
  * @zone - pointer to struct zone variable
diff --git a/mm/Kconfig b/mm/Kconfig
index 02301a89089e..f7cb608f9ab2 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -142,9 +142,6 @@ config HAVE_GENERIC_GUP
 config ARCH_DISCARD_MEMBLOCK
 	bool
 
-config NO_BOOTMEM
-	bool
-
 config MEMORY_ISOLATION
 	bool
 
@@ -634,7 +631,6 @@ config MAX_STACK_SIZE_MB
 config DEFERRED_STRUCT_PAGE_INIT
 	bool "Defer initialisation of struct pages to kthreads"
 	default n
-	depends on NO_BOOTMEM
 	depends on SPARSEMEM
 	depends on !NEED_PER_CPU_KM
 	depends on 64BIT
diff --git a/mm/Makefile b/mm/Makefile
index 6485d5745dd7..ea5333886593 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -42,12 +42,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   debug.o $(mmu-y)
 
 obj-y += init-mm.o
-
-ifdef CONFIG_NO_BOOTMEM
-	obj-y		+= nobootmem.o
-else
-	obj-y		+= bootmem.o
-endif
+obj-y += nobootmem.o
 
 ifdef CONFIG_MMU
 	obj-$(CONFIG_ADVISE_SYSCALLS)	+= madvise.o
diff --git a/mm/memblock.c b/mm/memblock.c
index a85315083b5a..d6897fbe021f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1318,7 +1318,6 @@ phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, i
 	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
-#if defined(CONFIG_NO_BOOTMEM)
 /**
  * memblock_virt_alloc_internal - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
@@ -1524,7 +1523,6 @@ void * __init memblock_virt_alloc_try_nid(
 	      __func__, (u64)size, (u64)align, nid, &min_addr, &max_addr);
 	return NULL;
 }
-#endif
 
 /**
  * __memblock_free_early - free boot memory block
-- 
cgit v1.2.3


From aca52c39838910605b1063a2243f553aa2a02d5c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:44 -0700
Subject: mm: remove CONFIG_HAVE_MEMBLOCK

All architecures use memblock for early memory management. There is no need
for the CONFIG_HAVE_MEMBLOCK configuration option.

[rppt@linux.vnet.ibm.com: of/fdt: fixup #ifdefs]
  Link: http://lkml.kernel.org/r/20180919103457.GA20545@rapoport-lnx
[rppt@linux.vnet.ibm.com: csky: fixups after bootmem removal]
  Link: http://lkml.kernel.org/r/20180926112744.GC4628@rapoport-lnx
[rppt@linux.vnet.ibm.com: remove stale #else and the code it protects]
  Link: http://lkml.kernel.org/r/1538067825-24835-1-git-send-email-rppt@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1536927045-23536-4-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Tested-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/Kconfig                  |   1 -
 arch/arc/Kconfig                    |   1 -
 arch/arm/Kconfig                    |   1 -
 arch/arm64/Kconfig                  |   1 -
 arch/c6x/Kconfig                    |   1 -
 arch/csky/Kconfig                   |   1 -
 arch/csky/kernel/setup.c            |   1 -
 arch/csky/mm/highmem.c              |   4 +-
 arch/csky/mm/init.c                 |   3 +-
 arch/h8300/Kconfig                  |   1 -
 arch/hexagon/Kconfig                |   1 -
 arch/ia64/Kconfig                   |   1 -
 arch/m68k/Kconfig                   |   1 -
 arch/microblaze/Kconfig             |   1 -
 arch/mips/Kconfig                   |   1 -
 arch/nds32/Kconfig                  |   1 -
 arch/nios2/Kconfig                  |   1 -
 arch/openrisc/Kconfig               |   1 -
 arch/parisc/Kconfig                 |   1 -
 arch/powerpc/Kconfig                |   1 -
 arch/riscv/Kconfig                  |   1 -
 arch/s390/Kconfig                   |   1 -
 arch/sh/Kconfig                     |   1 -
 arch/sparc/Kconfig                  |   1 -
 arch/um/Kconfig                     |   1 -
 arch/unicore32/Kconfig              |   1 -
 arch/x86/Kconfig                    |   1 -
 arch/xtensa/Kconfig                 |   1 -
 drivers/of/fdt.c                    |  21 -------
 drivers/of/of_reserved_mem.c        |  13 +----
 drivers/staging/android/ion/Kconfig |   2 +-
 fs/pstore/Kconfig                   |   1 -
 include/linux/bootmem.h             | 112 ------------------------------------
 include/linux/memblock.h            |   7 ---
 include/linux/mm.h                  |   2 +-
 lib/Kconfig.debug                   |   3 +-
 mm/Kconfig                          |   5 +-
 mm/Makefile                         |   2 +-
 mm/nobootmem.c                      |   4 --
 mm/page_alloc.c                     |   5 +-
 40 files changed, 11 insertions(+), 199 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 04de6be101bc..5b4f88363453 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -31,7 +31,6 @@ config ALPHA
 	select ODD_RT_SIGACTION
 	select OLD_SIGSUSPEND
 	select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
-	select HAVE_MEMBLOCK
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 56995f356f69..c9e2a1323536 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -37,7 +37,6 @@ config ARC
 	select HAVE_KERNEL_LZMA
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
-	select HAVE_MEMBLOCK
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 37d4c40f405a..91be74d8df65 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -82,7 +82,6 @@ config ARM
 	select HAVE_KERNEL_XZ
 	select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M
 	select HAVE_KRETPROBES if (HAVE_KPROBES)
-	select HAVE_MEMBLOCK
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
 	select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a8f36c7041ff..787d7850e064 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -139,7 +139,6 @@ config ARM64
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP if NUMA
 	select HAVE_NMI
 	select HAVE_PATA_PLATFORM
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index f65a084607fd..84420109113d 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -13,7 +13,6 @@ config C6X
 	select GENERIC_ATOMIC64
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_MEMBLOCK
 	select SPARSE_IRQ
 	select IRQ_DOMAIN
 	select OF
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 46966f576249..cb64f8dacd08 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -36,7 +36,6 @@ config CSKY
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_CONTIGUOUS
-	select HAVE_MEMBLOCK
 	select MAY_HAVE_SPARSE_IRQ
 	select MODULES_USE_ELF_RELA if MODULES
 	select OF
diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index a5e3ab1d5360..dff8b89444ec 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -3,7 +3,6 @@
 
 #include <linux/console.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/initrd.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c
index e168ac087ccb..53b1bfa4c462 100644
--- a/arch/csky/mm/highmem.c
+++ b/arch/csky/mm/highmem.c
@@ -4,7 +4,7 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 #include <linux/smp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
@@ -140,7 +140,7 @@ static void __init fixrange_init(unsigned long start, unsigned long end,
 			pmd = (pmd_t *)pud;
 			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
 				if (pmd_none(*pmd)) {
-					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
+					pte = (pte_t *) memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 					set_pmd(pmd, __pmd(__pa(pte)));
 					BUG_ON(pte != pte_offset_kernel(pmd, 0));
 				}
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index ce2711e050ad..dc07c078f9b8 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -14,7 +14,6 @@
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/highmem.h>
 #include <linux/memblock.h>
 #include <linux/swap.h>
@@ -47,7 +46,7 @@ void __init mem_init(void)
 #endif
 	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
-	free_all_bootmem();
+	memblock_free_all();
 
 #ifdef CONFIG_HIGHMEM
 	for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e89d40be8cd..d19c6b16cd5d 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -15,7 +15,6 @@ config H8300
 	select OF
 	select OF_IRQ
 	select OF_EARLY_FLATTREE
-	select HAVE_MEMBLOCK
 	select TIMER_OF
 	select H8300_TMR8
 	select HAVE_KERNEL_GZIP
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index 7823f15e17b2..2b688af379e6 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -21,7 +21,6 @@ config HEXAGON
 	select GENERIC_IRQ_SHOW
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
 	select NEED_SG_DMA_LENGTH
 	select NO_IOPORT_MAP
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 2bf4ef792f2c..36773def6920 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -26,7 +26,6 @@ config IA64
 	select HAVE_FUNCTION_TRACER
 	select TTY
 	select HAVE_ARCH_TRACEHOOK
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_VIRT_CPU_ACCOUNTING
 	select ARCH_HAS_DMA_MARK_CLEAN
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 26edf2542295..1bc9f1ba759a 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -27,7 +27,6 @@ config M68K
 	select OLD_SIGSUSPEND3
 	select OLD_SIGACTION
 	select DMA_DIRECT_OPS if HAS_DMA
-	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
 
 config CPU_BIG_ENDIAN
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 5c2777bb5555..effed2efd306 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -28,7 +28,6 @@ config MICROBLAZE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_OPROFILE
 	select IRQ_DOMAIN
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 366934157632..8272ea4c7264 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -60,7 +60,6 @@ config MIPS
 	select HAVE_IRQ_TIME_ACCOUNTING
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 93c0b587ee23..7a04adacb2f0 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -29,7 +29,6 @@ config NDS32
 	select HANDLE_DOMAIN_IRQ
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_MEMBLOCK
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select IRQ_DOMAIN
 	select LOCKDEP_SUPPORT
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index b01febeb5ce3..7e95506e957a 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -23,7 +23,6 @@ config NIOS2
 	select SPARSE_IRQ
 	select USB_ARCH_HAS_HCD if USB_SUPPORT
 	select CPU_NO_EFFICIENT_FFS
-	select HAVE_MEMBLOCK
 	select ARCH_DISCARD_MEMBLOCK
 
 config GENERIC_CSUM
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index cbfaf15f9d70..285f7d05c8ed 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -12,7 +12,6 @@ config OPENRISC
 	select OF_EARLY_FLATTREE
 	select IRQ_DOMAIN
 	select HANDLE_DOMAIN_IRQ
-	select HAVE_MEMBLOCK
 	select GPIOLIB
         select HAVE_ARCH_TRACEHOOK
 	select SPARSE_IRQ
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 7418ebeeb4a4..92a339ee28b3 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -15,7 +15,6 @@ config PARISC
 	select RTC_CLASS
 	select RTC_DRV_GENERIC
 	select INIT_ALL_POSSIBLE
-	select HAVE_MEMBLOCK
 	select BUG
 	select BUILDTIME_EXTABLE_SORT
 	select HAVE_PERF_EVENTS
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5798ffb1379b..2d51b2bd4aa1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -206,7 +206,6 @@ config PPC
 	select HAVE_KRETPROBES
 	select HAVE_LD_DEAD_CODE_DATA_ELIMINATION
 	select HAVE_LIVEPATCH			if HAVE_DYNAMIC_FTRACE_WITH_REGS
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index f7c6b7cf124a..d86842c21710 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -28,7 +28,6 @@ config RISCV
 	select GENERIC_STRNLEN_USER
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_FUTEX_CMPXCHG if FUTEX
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c7af83a15b2d..5173366af8f3 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -163,7 +163,6 @@ config S390
 	select HAVE_LIVEPATCH
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MEMBLOCK_PHYS_MAP
 	select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 7aaea9690e18..f82a4da7adf3 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -9,7 +9,6 @@ config SUPERH
 	select CLKDEV_LOOKUP
 	select DMA_DIRECT_OPS
 	select HAVE_IDE if HAS_IOPORT_MAP
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select ARCH_DISCARD_MEMBLOCK
 	select HAVE_OPROFILE
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 98c0996f3c2c..490b2c95c212 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -45,7 +45,6 @@ config SPARC
 	select LOCKDEP_SMALL if LOCKDEP
 	select NEED_DMA_MAP_STATE
 	select NEED_SG_DMA_LENGTH
-	select HAVE_MEMBLOCK
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index ce3d56299b28..6b9938919f0b 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -12,7 +12,6 @@ config UML
 	select HAVE_UID16
 	select HAVE_FUTEX_CMPXCHG if FUTEX
 	select HAVE_DEBUG_KMEMLEAK
-	select HAVE_MEMBLOCK
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_CLOCKEVENTS
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 3a3b40f79558..a4c05159dca5 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -5,7 +5,6 @@ config UNICORE32
 	select ARCH_MIGHT_HAVE_PC_PARPORT
 	select ARCH_MIGHT_HAVE_PC_SERIO
 	select DMA_DIRECT_OPS
-	select HAVE_MEMBLOCK
 	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0354f52ef30e..c51c989c19c0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -169,7 +169,6 @@ config X86
 	select HAVE_KRETPROBES
 	select HAVE_KVM
 	select HAVE_LIVEPATCH			if X86_64
-	select HAVE_MEMBLOCK
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_MIXED_BREAKPOINTS_REGS
 	select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index df43d2e76842..60c141af222b 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -28,7 +28,6 @@ config XTENSA
 	select HAVE_FUTEX_CMPXCHG if !MMU
 	select HAVE_HW_BREAKPOINT if PERF_EVENTS
 	select HAVE_IRQ_TIME_ACCOUNTING
-	select HAVE_MEMBLOCK
 	select HAVE_OPROFILE
 	select HAVE_PERF_EVENTS
 	select HAVE_STACKPROTECTOR
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 76c83c1ffeda..4f915cea6f75 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1115,7 +1115,6 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
 	return 1;
 }
 
-#ifdef CONFIG_HAVE_MEMBLOCK
 #ifndef MIN_MEMBLOCK_ADDR
 #define MIN_MEMBLOCK_ADDR	__pa(PAGE_OFFSET)
 #endif
@@ -1178,26 +1177,6 @@ int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
 	return memblock_reserve(base, size);
 }
 
-#else
-void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
-{
-	WARN_ON(1);
-}
-
-int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size)
-{
-	return -ENOSYS;
-}
-
-int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
-					phys_addr_t size, bool nomap)
-{
-	pr_err("Reserved memory not supported, ignoring range %pa - %pa%s\n",
-		  &base, &size, nomap ? " (nomap)" : "");
-	return -ENOSYS;
-}
-#endif
-
 static void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
 	return memblock_virt_alloc(size, align);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 895c83e0c7b6..d6255c276a41 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -20,13 +20,12 @@
 #include <linux/of_reserved_mem.h>
 #include <linux/sort.h>
 #include <linux/slab.h>
+#include <linux/memblock.h>
 
 #define MAX_RESERVED_REGIONS	32
 static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
 static int reserved_mem_count;
 
-#if defined(CONFIG_HAVE_MEMBLOCK)
-#include <linux/memblock.h>
 int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
 	phys_addr_t *res_base)
@@ -54,16 +53,6 @@ int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 		return memblock_remove(base, size);
 	return 0;
 }
-#else
-int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
-	phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
-	phys_addr_t *res_base)
-{
-	pr_err("Reserved memory not supported, ignoring region 0x%llx%s\n",
-		  size, nomap ? " (nomap)" : "");
-	return -ENOSYS;
-}
-#endif
 
 /**
  * res_mem_save_node() - save fdt node for second pass initialization
diff --git a/drivers/staging/android/ion/Kconfig b/drivers/staging/android/ion/Kconfig
index c16dd16afe6a..0fdda6f62953 100644
--- a/drivers/staging/android/ion/Kconfig
+++ b/drivers/staging/android/ion/Kconfig
@@ -1,6 +1,6 @@
 menuconfig ION
 	bool "Ion Memory Manager"
-	depends on HAVE_MEMBLOCK && HAS_DMA && MMU
+	depends on HAS_DMA && MMU
 	select GENERIC_ALLOCATOR
 	select DMA_SHARED_BUFFER
 	help
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index 503086f7f7c1..0d19d191ae70 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -141,7 +141,6 @@ config PSTORE_RAM
 	tristate "Log panic/oops to a RAM buffer"
 	depends on PSTORE
 	depends on HAS_IOMEM
-	depends on HAVE_MEMBLOCK
 	select REED_SOLOMON
 	select REED_SOLOMON_ENC8
 	select REED_SOLOMON_DEC8
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 1f005b5c1555..ee61ac355104 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -132,9 +132,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 #define alloc_bootmem_low_pages_node(pgdat, x) \
 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
-
-#if defined(CONFIG_HAVE_MEMBLOCK)
-
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE	0
 #define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
@@ -234,115 +231,6 @@ static inline void __init memblock_free_late(
 	__memblock_free_late(base, size);
 }
 
-#else
-
-#define BOOTMEM_ALLOC_ACCESSIBLE	0
-
-
-/* Fall back to all the existing bootmem APIs */
-static inline void * __init memblock_virt_alloc(
-					phys_addr_t size,  phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_raw(
-					phys_addr_t size,  phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_nopanic(
-					phys_addr_t size, phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_low(
-					phys_addr_t size, phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_low(size, align, 0);
-}
-
-static inline void * __init memblock_virt_alloc_low_nopanic(
-					phys_addr_t size, phys_addr_t align)
-{
-	if (!align)
-		align = SMP_CACHE_BYTES;
-	return __alloc_bootmem_low_nopanic(size, align, 0);
-}
-
-static inline void * __init memblock_virt_alloc_from_nopanic(
-		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
-{
-	return __alloc_bootmem_nopanic(size, align, min_addr);
-}
-
-static inline void * __init memblock_virt_alloc_node(
-						phys_addr_t size, int nid)
-{
-	return __alloc_bootmem_node(NODE_DATA(nid), size, SMP_CACHE_BYTES,
-				     BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_node_nopanic(
-						phys_addr_t size, int nid)
-{
-	return __alloc_bootmem_node_nopanic(NODE_DATA(nid), size,
-					     SMP_CACHE_BYTES,
-					     BOOTMEM_LOW_LIMIT);
-}
-
-static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size,
-	phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid)
-{
-	return __alloc_bootmem_node_high(NODE_DATA(nid), size, align,
-					  min_addr);
-}
-
-static inline void * __init memblock_virt_alloc_try_nid_raw(
-			phys_addr_t size, phys_addr_t align,
-			phys_addr_t min_addr, phys_addr_t max_addr, int nid)
-{
-	return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
-				min_addr, max_addr);
-}
-
-static inline void * __init memblock_virt_alloc_try_nid_nopanic(
-			phys_addr_t size, phys_addr_t align,
-			phys_addr_t min_addr, phys_addr_t max_addr, int nid)
-{
-	return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align,
-				min_addr, max_addr);
-}
-
-static inline void __init memblock_free_early(
-					phys_addr_t base, phys_addr_t size)
-{
-	free_bootmem(base, size);
-}
-
-static inline void __init memblock_free_early_nid(
-				phys_addr_t base, phys_addr_t size, int nid)
-{
-	free_bootmem_node(NODE_DATA(nid), base, size);
-}
-
-static inline void __init memblock_free_late(
-					phys_addr_t base, phys_addr_t size)
-{
-	free_bootmem_late(base, size);
-}
-#endif /* defined(CONFIG_HAVE_MEMBLOCK) */
-
 extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long bucketsize,
 				     unsigned long numentries,
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 2acdd046df2d..224d27363cca 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -2,7 +2,6 @@
 #define _LINUX_MEMBLOCK_H
 #ifdef __KERNEL__
 
-#ifdef CONFIG_HAVE_MEMBLOCK
 /*
  * Logical memory blocks.
  *
@@ -440,12 +439,6 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end)
 {
 }
 #endif
-#else
-static inline phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align)
-{
-	return 0;
-}
-#endif /* CONFIG_HAVE_MEMBLOCK */
 
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1e52b8fd1685..fcf9cc9d535f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2163,7 +2163,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
 					struct mminit_pfnnid_cache *state);
 #endif
 
-#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
 void zero_resv_unavail(void);
 #else
 static inline void zero_resv_unavail(void) {}
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e0ba05e6f6bd..1af29b8224fd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1292,7 +1292,7 @@ config DEBUG_KOBJECT
 	depends on DEBUG_KERNEL
 	help
 	  If you say Y here, some extra kobject debugging messages will be sent
-	  to the syslog. 
+	  to the syslog.
 
 config DEBUG_KOBJECT_RELEASE
 	bool "kobject release debugging"
@@ -1980,7 +1980,6 @@ endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
 	bool "Memtest"
-	depends on HAVE_MEMBLOCK
 	---help---
 	  This option adds a kernel parameter 'memtest', which allows memtest
 	  to be set.
diff --git a/mm/Kconfig b/mm/Kconfig
index f7cb608f9ab2..d85e39da47ae 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -127,9 +127,6 @@ config SPARSEMEM_VMEMMAP
 	 pfn_to_page and page_to_pfn operations.  This is the most
 	 efficient option when sufficient kernel resources are available.
 
-config HAVE_MEMBLOCK
-	bool
-
 config HAVE_MEMBLOCK_NODE_MAP
 	bool
 
@@ -478,7 +475,7 @@ config FRONTSWAP
 
 config CMA
 	bool "Contiguous Memory Allocator"
-	depends on HAVE_MEMBLOCK && MMU
+	depends on MMU
 	select MIGRATION
 	select MEMORY_ISOLATION
 	help
diff --git a/mm/Makefile b/mm/Makefile
index ea5333886593..ca3c844d7a56 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -43,11 +43,11 @@ obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 
 obj-y += init-mm.o
 obj-y += nobootmem.o
+obj-y += memblock.o
 
 ifdef CONFIG_MMU
 	obj-$(CONFIG_ADVISE_SYSCALLS)	+= madvise.o
 endif
-obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o swap_slots.o
 obj-$(CONFIG_FRONTSWAP)	+= frontswap.o
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 439af3b765a7..d4d0cd474087 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -23,10 +23,6 @@
 
 #include "internal.h"
 
-#ifndef CONFIG_HAVE_MEMBLOCK
-#error CONFIG_HAVE_MEMBLOCK not defined
-#endif
-
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 struct pglist_data __refdata contig_page_data;
 EXPORT_SYMBOL(contig_page_data);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 863d46da6586..59d171f84445 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6508,8 +6508,7 @@ void __init free_area_init_node(int nid, unsigned long *zones_size,
 	free_area_init_core(pgdat);
 }
 
-#if defined(CONFIG_HAVE_MEMBLOCK) && !defined(CONFIG_FLAT_NODE_MEM_MAP)
-
+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
 /*
  * Zero all valid struct pages in range [spfn, epfn), return number of struct
  * pages zeroed
@@ -6569,7 +6568,7 @@ void __init zero_resv_unavail(void)
 	if (pgcnt)
 		pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
 }
-#endif /* CONFIG_HAVE_MEMBLOCK && !CONFIG_FLAT_NODE_MEM_MAP */
+#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 
-- 
cgit v1.2.3


From 355c45affca7114ab510e296a5b7012943aeea17 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:50 -0700
Subject: mm: remove bootmem allocator implementation.

All architectures have been converted to use MEMBLOCK + NO_BOOTMEM. The
bootmem allocator implementation can be removed.

Link: http://lkml.kernel.org/r/1536927045-23536-5-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  16 -
 mm/bootmem.c            | 811 ------------------------------------------------
 2 files changed, 827 deletions(-)
 delete mode 100644 mm/bootmem.c

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index ee61ac355104..fce6278a1724 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -26,14 +26,6 @@ extern unsigned long max_pfn;
  */
 extern unsigned long long max_possible_pfn;
 
-extern unsigned long bootmem_bootmap_pages(unsigned long);
-
-extern unsigned long init_bootmem_node(pg_data_t *pgdat,
-				       unsigned long freepfn,
-				       unsigned long startpfn,
-				       unsigned long endpfn);
-extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
-
 extern unsigned long free_all_bootmem(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
@@ -55,14 +47,6 @@ extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 #define BOOTMEM_DEFAULT		0
 #define BOOTMEM_EXCLUSIVE	(1<<0)
 
-extern int reserve_bootmem(unsigned long addr,
-			   unsigned long size,
-			   int flags);
-extern int reserve_bootmem_node(pg_data_t *pgdat,
-				unsigned long physaddr,
-				unsigned long size,
-				int flags);
-
 extern void *__alloc_bootmem(unsigned long size,
 			     unsigned long align,
 			     unsigned long goal);
diff --git a/mm/bootmem.c b/mm/bootmem.c
deleted file mode 100644
index 97db0e8e362b..000000000000
--- a/mm/bootmem.c
+++ /dev/null
@@ -1,811 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  bootmem - A boot-time physical memory allocator and configurator
- *
- *  Copyright (C) 1999 Ingo Molnar
- *                1999 Kanoj Sarcar, SGI
- *                2008 Johannes Weiner
- *
- * Access to this subsystem has to be serialized externally (which is true
- * for the boot process anyway).
- */
-#include <linux/init.h>
-#include <linux/pfn.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/kmemleak.h>
-#include <linux/range.h>
-#include <linux/bug.h>
-#include <linux/io.h>
-#include <linux/bootmem.h>
-
-#include "internal.h"
-
-/**
- * DOC: bootmem overview
- *
- * Bootmem is a boot-time physical memory allocator and configurator.
- *
- * It is used early in the boot process before the page allocator is
- * set up.
- *
- * Bootmem is based on the most basic of allocators, a First Fit
- * allocator which uses a bitmap to represent memory. If a bit is 1,
- * the page is allocated and 0 if unallocated. To satisfy allocations
- * of sizes smaller than a page, the allocator records the Page Frame
- * Number (PFN) of the last allocation and the offset the allocation
- * ended at. Subsequent small allocations are merged together and
- * stored on the same page.
- *
- * The information used by the bootmem allocator is represented by
- * :c:type:`struct bootmem_data`. An array to hold up to %MAX_NUMNODES
- * such structures is statically allocated and then it is discarded
- * when the system initialization completes. Each entry in this array
- * corresponds to a node with memory. For UMA systems only entry 0 is
- * used.
- *
- * The bootmem allocator is initialized during early architecture
- * specific setup. Each architecture is required to supply a
- * :c:func:`setup_arch` function which, among other tasks, is
- * responsible for acquiring the necessary parameters to initialise
- * the boot memory allocator. These parameters define limits of usable
- * physical memory:
- *
- * * @min_low_pfn - the lowest PFN that is available in the system
- * * @max_low_pfn - the highest PFN that may be addressed by low
- *   memory (%ZONE_NORMAL)
- * * @max_pfn - the last PFN available to the system.
- *
- * After those limits are determined, the :c:func:`init_bootmem` or
- * :c:func:`init_bootmem_node` function should be called to initialize
- * the bootmem allocator. The UMA case should use the `init_bootmem`
- * function. It will initialize ``contig_page_data`` structure that
- * represents the only memory node in the system. In the NUMA case the
- * `init_bootmem_node` function should be called to initialize the
- * bootmem allocator for each node.
- *
- * Once the allocator is set up, it is possible to use either single
- * node or NUMA variant of the allocation APIs.
- */
-
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-struct pglist_data __refdata contig_page_data = {
-	.bdata = &bootmem_node_data[0]
-};
-EXPORT_SYMBOL(contig_page_data);
-#endif
-
-unsigned long max_low_pfn;
-unsigned long min_low_pfn;
-unsigned long max_pfn;
-unsigned long long max_possible_pfn;
-
-bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
-
-static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
-
-static int bootmem_debug;
-
-static int __init bootmem_debug_setup(char *buf)
-{
-	bootmem_debug = 1;
-	return 0;
-}
-early_param("bootmem_debug", bootmem_debug_setup);
-
-#define bdebug(fmt, args...) ({				\
-	if (unlikely(bootmem_debug))			\
-		pr_info("bootmem::%s " fmt,		\
-			__func__, ## args);		\
-})
-
-static unsigned long __init bootmap_bytes(unsigned long pages)
-{
-	unsigned long bytes = DIV_ROUND_UP(pages, BITS_PER_BYTE);
-
-	return ALIGN(bytes, sizeof(long));
-}
-
-/**
- * bootmem_bootmap_pages - calculate bitmap size in pages
- * @pages: number of pages the bitmap has to represent
- *
- * Return: the number of pages needed to hold the bitmap.
- */
-unsigned long __init bootmem_bootmap_pages(unsigned long pages)
-{
-	unsigned long bytes = bootmap_bytes(pages);
-
-	return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
-}
-
-/*
- * link bdata in order
- */
-static void __init link_bootmem(bootmem_data_t *bdata)
-{
-	bootmem_data_t *ent;
-
-	list_for_each_entry(ent, &bdata_list, list) {
-		if (bdata->node_min_pfn < ent->node_min_pfn) {
-			list_add_tail(&bdata->list, &ent->list);
-			return;
-		}
-	}
-
-	list_add_tail(&bdata->list, &bdata_list);
-}
-
-/*
- * Called once to set up the allocator itself.
- */
-static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
-	unsigned long mapstart, unsigned long start, unsigned long end)
-{
-	unsigned long mapsize;
-
-	mminit_validate_memmodel_limits(&start, &end);
-	bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
-	bdata->node_min_pfn = start;
-	bdata->node_low_pfn = end;
-	link_bootmem(bdata);
-
-	/*
-	 * Initially all pages are reserved - setup_arch() has to
-	 * register free RAM areas explicitly.
-	 */
-	mapsize = bootmap_bytes(end - start);
-	memset(bdata->node_bootmem_map, 0xff, mapsize);
-
-	bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
-		bdata - bootmem_node_data, start, mapstart, end, mapsize);
-
-	return mapsize;
-}
-
-/**
- * init_bootmem_node - register a node as boot memory
- * @pgdat: node to register
- * @freepfn: pfn where the bitmap for this node is to be placed
- * @startpfn: first pfn on the node
- * @endpfn: first pfn after the node
- *
- * Return: the number of bytes needed to hold the bitmap for this node.
- */
-unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
-				unsigned long startpfn, unsigned long endpfn)
-{
-	return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
-}
-
-/**
- * init_bootmem - register boot memory
- * @start: pfn where the bitmap is to be placed
- * @pages: number of available physical pages
- *
- * Return: the number of bytes needed to hold the bitmap.
- */
-unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
-{
-	max_low_pfn = pages;
-	min_low_pfn = start;
-	return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
-}
-
-void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
-{
-	unsigned long cursor, end;
-
-	kmemleak_free_part_phys(physaddr, size);
-
-	cursor = PFN_UP(physaddr);
-	end = PFN_DOWN(physaddr + size);
-
-	for (; cursor < end; cursor++) {
-		__free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
-		totalram_pages++;
-	}
-}
-
-static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
-{
-	struct page *page;
-	unsigned long *map, start, end, pages, cur, count = 0;
-
-	if (!bdata->node_bootmem_map)
-		return 0;
-
-	map = bdata->node_bootmem_map;
-	start = bdata->node_min_pfn;
-	end = bdata->node_low_pfn;
-
-	bdebug("nid=%td start=%lx end=%lx\n",
-		bdata - bootmem_node_data, start, end);
-
-	while (start < end) {
-		unsigned long idx, vec;
-		unsigned shift;
-
-		idx = start - bdata->node_min_pfn;
-		shift = idx & (BITS_PER_LONG - 1);
-		/*
-		 * vec holds at most BITS_PER_LONG map bits,
-		 * bit 0 corresponds to start.
-		 */
-		vec = ~map[idx / BITS_PER_LONG];
-
-		if (shift) {
-			vec >>= shift;
-			if (end - start >= BITS_PER_LONG)
-				vec |= ~map[idx / BITS_PER_LONG + 1] <<
-					(BITS_PER_LONG - shift);
-		}
-		/*
-		 * If we have a properly aligned and fully unreserved
-		 * BITS_PER_LONG block of pages in front of us, free
-		 * it in one go.
-		 */
-		if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
-			int order = ilog2(BITS_PER_LONG);
-
-			__free_pages_bootmem(pfn_to_page(start), start, order);
-			count += BITS_PER_LONG;
-			start += BITS_PER_LONG;
-		} else {
-			cur = start;
-
-			start = ALIGN(start + 1, BITS_PER_LONG);
-			while (vec && cur != start) {
-				if (vec & 1) {
-					page = pfn_to_page(cur);
-					__free_pages_bootmem(page, cur, 0);
-					count++;
-				}
-				vec >>= 1;
-				++cur;
-			}
-		}
-	}
-
-	cur = bdata->node_min_pfn;
-	page = virt_to_page(bdata->node_bootmem_map);
-	pages = bdata->node_low_pfn - bdata->node_min_pfn;
-	pages = bootmem_bootmap_pages(pages);
-	count += pages;
-	while (pages--)
-		__free_pages_bootmem(page++, cur++, 0);
-	bdata->node_bootmem_map = NULL;
-
-	bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
-
-	return count;
-}
-
-static int reset_managed_pages_done __initdata;
-
-void reset_node_managed_pages(pg_data_t *pgdat)
-{
-	struct zone *z;
-
-	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
-		z->managed_pages = 0;
-}
-
-void __init reset_all_zones_managed_pages(void)
-{
-	struct pglist_data *pgdat;
-
-	if (reset_managed_pages_done)
-		return;
-
-	for_each_online_pgdat(pgdat)
-		reset_node_managed_pages(pgdat);
-
-	reset_managed_pages_done = 1;
-}
-
-unsigned long __init free_all_bootmem(void)
-{
-	unsigned long total_pages = 0;
-	bootmem_data_t *bdata;
-
-	reset_all_zones_managed_pages();
-
-	list_for_each_entry(bdata, &bdata_list, list)
-		total_pages += free_all_bootmem_core(bdata);
-
-	totalram_pages += total_pages;
-
-	return total_pages;
-}
-
-static void __init __free(bootmem_data_t *bdata,
-			unsigned long sidx, unsigned long eidx)
-{
-	unsigned long idx;
-
-	bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
-		sidx + bdata->node_min_pfn,
-		eidx + bdata->node_min_pfn);
-
-	if (WARN_ON(bdata->node_bootmem_map == NULL))
-		return;
-
-	if (bdata->hint_idx > sidx)
-		bdata->hint_idx = sidx;
-
-	for (idx = sidx; idx < eidx; idx++)
-		if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
-			BUG();
-}
-
-static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
-			unsigned long eidx, int flags)
-{
-	unsigned long idx;
-	int exclusive = flags & BOOTMEM_EXCLUSIVE;
-
-	bdebug("nid=%td start=%lx end=%lx flags=%x\n",
-		bdata - bootmem_node_data,
-		sidx + bdata->node_min_pfn,
-		eidx + bdata->node_min_pfn,
-		flags);
-
-	if (WARN_ON(bdata->node_bootmem_map == NULL))
-		return 0;
-
-	for (idx = sidx; idx < eidx; idx++)
-		if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
-			if (exclusive) {
-				__free(bdata, sidx, idx);
-				return -EBUSY;
-			}
-			bdebug("silent double reserve of PFN %lx\n",
-				idx + bdata->node_min_pfn);
-		}
-	return 0;
-}
-
-static int __init mark_bootmem_node(bootmem_data_t *bdata,
-				unsigned long start, unsigned long end,
-				int reserve, int flags)
-{
-	unsigned long sidx, eidx;
-
-	bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
-		bdata - bootmem_node_data, start, end, reserve, flags);
-
-	BUG_ON(start < bdata->node_min_pfn);
-	BUG_ON(end > bdata->node_low_pfn);
-
-	sidx = start - bdata->node_min_pfn;
-	eidx = end - bdata->node_min_pfn;
-
-	if (reserve)
-		return __reserve(bdata, sidx, eidx, flags);
-	else
-		__free(bdata, sidx, eidx);
-	return 0;
-}
-
-static int __init mark_bootmem(unsigned long start, unsigned long end,
-				int reserve, int flags)
-{
-	unsigned long pos;
-	bootmem_data_t *bdata;
-
-	pos = start;
-	list_for_each_entry(bdata, &bdata_list, list) {
-		int err;
-		unsigned long max;
-
-		if (pos < bdata->node_min_pfn ||
-		    pos >= bdata->node_low_pfn) {
-			BUG_ON(pos != start);
-			continue;
-		}
-
-		max = min(bdata->node_low_pfn, end);
-
-		err = mark_bootmem_node(bdata, pos, max, reserve, flags);
-		if (reserve && err) {
-			mark_bootmem(start, pos, 0, 0);
-			return err;
-		}
-
-		if (max == end)
-			return 0;
-		pos = bdata->node_low_pfn;
-	}
-	BUG();
-}
-
-void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-			      unsigned long size)
-{
-	unsigned long start, end;
-
-	kmemleak_free_part_phys(physaddr, size);
-
-	start = PFN_UP(physaddr);
-	end = PFN_DOWN(physaddr + size);
-
-	mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
-}
-
-void __init free_bootmem(unsigned long physaddr, unsigned long size)
-{
-	unsigned long start, end;
-
-	kmemleak_free_part_phys(physaddr, size);
-
-	start = PFN_UP(physaddr);
-	end = PFN_DOWN(physaddr + size);
-
-	mark_bootmem(start, end, 0, 0);
-}
-
-/**
- * reserve_bootmem_node - mark a page range as reserved
- * @pgdat: node the range resides on
- * @physaddr: starting address of the range
- * @size: size of the range in bytes
- * @flags: reservation flags (see linux/bootmem.h)
- *
- * Partial pages will be reserved.
- *
- * The range must reside completely on the specified node.
- *
- * Return: 0 on success, -errno on failure.
- */
-int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-				 unsigned long size, int flags)
-{
-	unsigned long start, end;
-
-	start = PFN_DOWN(physaddr);
-	end = PFN_UP(physaddr + size);
-
-	return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
-}
-
-/**
- * reserve_bootmem - mark a page range as reserved
- * @addr: starting address of the range
- * @size: size of the range in bytes
- * @flags: reservation flags (see linux/bootmem.h)
- *
- * Partial pages will be reserved.
- *
- * The range must be contiguous but may span node boundaries.
- *
- * Return: 0 on success, -errno on failure.
- */
-int __init reserve_bootmem(unsigned long addr, unsigned long size,
-			    int flags)
-{
-	unsigned long start, end;
-
-	start = PFN_DOWN(addr);
-	end = PFN_UP(addr + size);
-
-	return mark_bootmem(start, end, 1, flags);
-}
-
-static unsigned long __init align_idx(struct bootmem_data *bdata,
-				      unsigned long idx, unsigned long step)
-{
-	unsigned long base = bdata->node_min_pfn;
-
-	/*
-	 * Align the index with respect to the node start so that the
-	 * combination of both satisfies the requested alignment.
-	 */
-
-	return ALIGN(base + idx, step) - base;
-}
-
-static unsigned long __init align_off(struct bootmem_data *bdata,
-				      unsigned long off, unsigned long align)
-{
-	unsigned long base = PFN_PHYS(bdata->node_min_pfn);
-
-	/* Same as align_idx for byte offsets */
-
-	return ALIGN(base + off, align) - base;
-}
-
-static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata,
-					unsigned long size, unsigned long align,
-					unsigned long goal, unsigned long limit)
-{
-	unsigned long fallback = 0;
-	unsigned long min, max, start, sidx, midx, step;
-
-	bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
-		bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
-		align, goal, limit);
-
-	BUG_ON(!size);
-	BUG_ON(align & (align - 1));
-	BUG_ON(limit && goal + size > limit);
-
-	if (!bdata->node_bootmem_map)
-		return NULL;
-
-	min = bdata->node_min_pfn;
-	max = bdata->node_low_pfn;
-
-	goal >>= PAGE_SHIFT;
-	limit >>= PAGE_SHIFT;
-
-	if (limit && max > limit)
-		max = limit;
-	if (max <= min)
-		return NULL;
-
-	step = max(align >> PAGE_SHIFT, 1UL);
-
-	if (goal && min < goal && goal < max)
-		start = ALIGN(goal, step);
-	else
-		start = ALIGN(min, step);
-
-	sidx = start - bdata->node_min_pfn;
-	midx = max - bdata->node_min_pfn;
-
-	if (bdata->hint_idx > sidx) {
-		/*
-		 * Handle the valid case of sidx being zero and still
-		 * catch the fallback below.
-		 */
-		fallback = sidx + 1;
-		sidx = align_idx(bdata, bdata->hint_idx, step);
-	}
-
-	while (1) {
-		int merge;
-		void *region;
-		unsigned long eidx, i, start_off, end_off;
-find_block:
-		sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
-		sidx = align_idx(bdata, sidx, step);
-		eidx = sidx + PFN_UP(size);
-
-		if (sidx >= midx || eidx > midx)
-			break;
-
-		for (i = sidx; i < eidx; i++)
-			if (test_bit(i, bdata->node_bootmem_map)) {
-				sidx = align_idx(bdata, i, step);
-				if (sidx == i)
-					sidx += step;
-				goto find_block;
-			}
-
-		if (bdata->last_end_off & (PAGE_SIZE - 1) &&
-				PFN_DOWN(bdata->last_end_off) + 1 == sidx)
-			start_off = align_off(bdata, bdata->last_end_off, align);
-		else
-			start_off = PFN_PHYS(sidx);
-
-		merge = PFN_DOWN(start_off) < sidx;
-		end_off = start_off + size;
-
-		bdata->last_end_off = end_off;
-		bdata->hint_idx = PFN_UP(end_off);
-
-		/*
-		 * Reserve the area now:
-		 */
-		if (__reserve(bdata, PFN_DOWN(start_off) + merge,
-				PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
-			BUG();
-
-		region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
-				start_off);
-		memset(region, 0, size);
-		/*
-		 * The min_count is set to 0 so that bootmem allocated blocks
-		 * are never reported as leaks.
-		 */
-		kmemleak_alloc(region, size, 0, 0);
-		return region;
-	}
-
-	if (fallback) {
-		sidx = align_idx(bdata, fallback - 1, step);
-		fallback = 0;
-		goto find_block;
-	}
-
-	return NULL;
-}
-
-static void * __init alloc_bootmem_core(unsigned long size,
-					unsigned long align,
-					unsigned long goal,
-					unsigned long limit)
-{
-	bootmem_data_t *bdata;
-	void *region;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc(size, GFP_NOWAIT);
-
-	list_for_each_entry(bdata, &bdata_list, list) {
-		if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
-			continue;
-		if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
-			break;
-
-		region = alloc_bootmem_bdata(bdata, size, align, goal, limit);
-		if (region)
-			return region;
-	}
-
-	return NULL;
-}
-
-static void * __init ___alloc_bootmem_nopanic(unsigned long size,
-					      unsigned long align,
-					      unsigned long goal,
-					      unsigned long limit)
-{
-	void *ptr;
-
-restart:
-	ptr = alloc_bootmem_core(size, align, goal, limit);
-	if (ptr)
-		return ptr;
-	if (goal) {
-		goal = 0;
-		goto restart;
-	}
-
-	return NULL;
-}
-
-void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
-					unsigned long goal)
-{
-	unsigned long limit = 0;
-
-	return ___alloc_bootmem_nopanic(size, align, goal, limit);
-}
-
-static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
-					unsigned long goal, unsigned long limit)
-{
-	void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
-
-	if (mem)
-		return mem;
-	/*
-	 * Whoops, we cannot satisfy the allocation request.
-	 */
-	pr_alert("bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
-}
-
-void * __init __alloc_bootmem(unsigned long size, unsigned long align,
-			      unsigned long goal)
-{
-	unsigned long limit = 0;
-
-	return ___alloc_bootmem(size, align, goal, limit);
-}
-
-void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-				unsigned long size, unsigned long align,
-				unsigned long goal, unsigned long limit)
-{
-	void *ptr;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-again:
-
-	/* do not panic in alloc_bootmem_bdata() */
-	if (limit && goal + size > limit)
-		limit = 0;
-
-	ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
-	if (ptr)
-		return ptr;
-
-	ptr = alloc_bootmem_core(size, align, goal, limit);
-	if (ptr)
-		return ptr;
-
-	if (goal) {
-		goal = 0;
-		goto again;
-	}
-
-	return NULL;
-}
-
-void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
-}
-
-void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
-				    unsigned long align, unsigned long goal,
-				    unsigned long limit)
-{
-	void *ptr;
-
-	ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
-	if (ptr)
-		return ptr;
-
-	pr_alert("bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
-}
-
-void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return  ___alloc_bootmem_node(pgdat, size, align, goal, 0);
-}
-
-void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-#ifdef MAX_DMA32_PFN
-	unsigned long end_pfn;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	/* update goal according ...MAX_DMA32_PFN */
-	end_pfn = pgdat_end_pfn(pgdat);
-
-	if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
-	    (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
-		void *ptr;
-		unsigned long new_goal;
-
-		new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
-		ptr = alloc_bootmem_bdata(pgdat->bdata, size, align,
-						 new_goal, 0);
-		if (ptr)
-			return ptr;
-	}
-#endif
-
-	return __alloc_bootmem_node(pgdat, size, align, goal);
-
-}
-
-void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
-				  unsigned long goal)
-{
-	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
-}
-
-void * __init __alloc_bootmem_low_nopanic(unsigned long size,
-					  unsigned long align,
-					  unsigned long goal)
-{
-	return ___alloc_bootmem_nopanic(size, align, goal,
-					ARCH_LOW_ADDRESS_LIMIT);
-}
-
-void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
-				       unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return ___alloc_bootmem_node(pgdat, size, align,
-				     goal, ARCH_LOW_ADDRESS_LIMIT);
-}
-- 
cgit v1.2.3


From b146ada221c178a384fee2a8e2e5b2e8a04476b1 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:54 -0700
Subject: mm: nobootmem: remove dead code

Several bootmem functions and macros are not used. Remove them.

Link: http://lkml.kernel.org/r/1536927045-23536-6-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 26 --------------------------
 mm/nobootmem.c          | 35 -----------------------------------
 2 files changed, 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index fce6278a1724..b74bafd110b9 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -36,17 +36,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
-/*
- * Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
- * the architecture-specific code should honor this).
- *
- * If flags is BOOTMEM_DEFAULT, then the return value is always 0 (success).
- * If flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the memory
- * already was reserved.
- */
-#define BOOTMEM_DEFAULT		0
-#define BOOTMEM_EXCLUSIVE	(1<<0)
-
 extern void *__alloc_bootmem(unsigned long size,
 			     unsigned long align,
 			     unsigned long goal);
@@ -73,13 +62,6 @@ void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
 extern void *__alloc_bootmem_low(unsigned long size,
 				 unsigned long align,
 				 unsigned long goal) __malloc;
-void *__alloc_bootmem_low_nopanic(unsigned long size,
-				 unsigned long align,
-				 unsigned long goal) __malloc;
-extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
-				      unsigned long size,
-				      unsigned long align,
-				      unsigned long goal) __malloc;
 
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
@@ -92,8 +74,6 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_align(x, align) \
 	__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_nopanic(x) \
-	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages(x) \
 	__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_nopanic(x) \
@@ -104,17 +84,11 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
 	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
 
 #define alloc_bootmem_low(x) \
 	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-#define alloc_bootmem_low_pages_nopanic(x) \
-	__alloc_bootmem_low_nopanic(x, PAGE_SIZE, 0)
 #define alloc_bootmem_low_pages(x) \
 	__alloc_bootmem_low(x, PAGE_SIZE, 0)
-#define alloc_bootmem_low_pages_node(pgdat, x) \
-	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
 
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE	0
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index d4d0cd474087..44ce7de1be8f 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -404,38 +404,3 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
 {
 	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
 }
-
-void * __init __alloc_bootmem_low_nopanic(unsigned long size,
-					  unsigned long align,
-					  unsigned long goal)
-{
-	return ___alloc_bootmem_nopanic(size, align, goal,
-					ARCH_LOW_ADDRESS_LIMIT);
-}
-
-/**
- * __alloc_bootmem_low_node - allocate low boot memory from a specific node
- * @pgdat: node to allocate from
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may fall back to any node in the system if the specified node
- * can not hold the requested memory.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
-				       unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return ___alloc_bootmem_node(pgdat, size, align, goal,
-				     ARCH_LOW_ADDRESS_LIMIT);
-}
-- 
cgit v1.2.3


From 9a8dd708d547268c899f1cb443c49bd4d8c84eb3 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:07:59 -0700
Subject: memblock: rename memblock_alloc{_nid,_try_nid} to
 memblock_phys_alloc*

Make it explicit that the caller gets a physical address rather than a
virtual one.

This will also allow using meblock_alloc prefix for memblock allocations
returning virtual address, which is done in the following patches.

The conversion is done using the following semantic patch:

@@
expression e1, e2, e3;
@@
(
- memblock_alloc(e1, e2)
+ memblock_phys_alloc(e1, e2)
|
- memblock_alloc_nid(e1, e2, e3)
+ memblock_phys_alloc_nid(e1, e2, e3)
|
- memblock_alloc_try_nid(e1, e2, e3)
+ memblock_phys_alloc_try_nid(e1, e2, e3)
)

Link: http://lkml.kernel.org/r/1536927045-23536-7-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/mmu.c                     |  2 +-
 arch/arm64/mm/mmu.c                   |  2 +-
 arch/arm64/mm/numa.c                  |  2 +-
 arch/c6x/mm/dma-coherent.c            |  4 ++--
 arch/nds32/mm/init.c                  |  8 ++++----
 arch/openrisc/mm/init.c               |  2 +-
 arch/openrisc/mm/ioremap.c            |  2 +-
 arch/powerpc/kernel/dt_cpu_ftrs.c     |  4 +---
 arch/powerpc/kernel/paca.c            |  2 +-
 arch/powerpc/kernel/prom.c            |  2 +-
 arch/powerpc/kernel/setup-common.c    |  3 +--
 arch/powerpc/kernel/setup_32.c        | 10 +++++-----
 arch/powerpc/mm/numa.c                |  2 +-
 arch/powerpc/mm/pgtable_32.c          |  2 +-
 arch/powerpc/mm/ppc_mmu_32.c          |  2 +-
 arch/powerpc/platforms/pasemi/iommu.c |  2 +-
 arch/powerpc/platforms/powernv/opal.c |  2 +-
 arch/powerpc/sysdev/dart_iommu.c      |  2 +-
 arch/s390/kernel/crash_dump.c         |  2 +-
 arch/s390/kernel/setup.c              |  3 ++-
 arch/s390/mm/vmem.c                   |  4 ++--
 arch/s390/numa/numa.c                 |  2 +-
 arch/sparc/kernel/mdesc.c             |  2 +-
 arch/sparc/kernel/prom_64.c           |  2 +-
 arch/sparc/mm/init_64.c               | 11 ++++++-----
 arch/unicore32/mm/mmu.c               |  2 +-
 arch/x86/mm/numa.c                    |  2 +-
 drivers/firmware/efi/memmap.c         |  2 +-
 include/linux/memblock.h              |  6 +++---
 mm/memblock.c                         |  8 ++++----
 30 files changed, 50 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e46a6a446cdd..f5cc1ccfea3d 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -721,7 +721,7 @@ EXPORT_SYMBOL(phys_mem_access_prot);
 
 static void __init *early_alloc_aligned(unsigned long sz, unsigned long align)
 {
-	void *ptr = __va(memblock_alloc(sz, align));
+	void *ptr = __va(memblock_phys_alloc(sz, align));
 	memset(ptr, 0, sz);
 	return ptr;
 }
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9498c15b847b..394b8d554def 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -101,7 +101,7 @@ static phys_addr_t __init early_pgtable_alloc(void)
 	phys_addr_t phys;
 	void *ptr;
 
-	phys = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
 
 	/*
 	 * The FIX_{PGD,PUD,PMD} slots may be in active use, but the FIX_PTE
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index d7b66fc5e1c5..c7fb34efd23e 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -237,7 +237,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 	if (start_pfn >= end_pfn)
 		pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
 
-	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
 	nd = __va(nd_pa);
 
 	/* report and initialize */
diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c
index d0a8e0c4b27e..01305c787201 100644
--- a/arch/c6x/mm/dma-coherent.c
+++ b/arch/c6x/mm/dma-coherent.c
@@ -135,8 +135,8 @@ void __init coherent_mem_init(phys_addr_t start, u32 size)
 	if (dma_size & (PAGE_SIZE - 1))
 		++dma_pages;
 
-	bitmap_phys = memblock_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long),
-				     sizeof(long));
+	bitmap_phys = memblock_phys_alloc(BITS_TO_LONGS(dma_pages) * sizeof(long),
+					  sizeof(long));
 
 	dma_bitmap = phys_to_virt(bitmap_phys);
 	memset(dma_bitmap, 0, dma_pages * PAGE_SIZE);
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index c713d2ad55dc..5af81b866aa5 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -81,7 +81,7 @@ static void __init map_ram(void)
 		}
 
 		/* Alloc one page for holding PTE's... */
-		pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+		pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 		memset(pte, 0, PAGE_SIZE);
 		set_pmd(pme, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
 
@@ -114,7 +114,7 @@ static void __init fixedrange_init(void)
 	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pud = pud_offset(pgd, vaddr);
 	pmd = pmd_offset(pud, vaddr);
-	fixmap_pmd_p = (pmd_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+	fixmap_pmd_p = (pmd_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 	memset(fixmap_pmd_p, 0, PAGE_SIZE);
 	set_pmd(pmd, __pmd(__pa(fixmap_pmd_p) + _PAGE_KERNEL_TABLE));
 
@@ -127,7 +127,7 @@ static void __init fixedrange_init(void)
 	pgd = swapper_pg_dir + pgd_index(vaddr);
 	pud = pud_offset(pgd, vaddr);
 	pmd = pmd_offset(pud, vaddr);
-	pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+	pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 	memset(pte, 0, PAGE_SIZE);
 	set_pmd(pmd, __pmd(__pa(pte) + _PAGE_KERNEL_TABLE));
 	pkmap_page_table = pte;
@@ -153,7 +153,7 @@ void __init paging_init(void)
 	fixedrange_init();
 
 	/* allocate space for empty_zero_page */
-	zero_page = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+	zero_page = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 	memset(zero_page, 0, PAGE_SIZE);
 	zone_sizes_init();
 
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index 6972d5d6f23f..b7670de26c11 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -106,7 +106,7 @@ static void __init map_ram(void)
 			}
 
 			/* Alloc one page for holding PTE's... */
-			pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+			pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 			set_pmd(pme, __pmd(_KERNPG_TABLE + __pa(pte)));
 
 			/* Fill the newly allocated page with PTE'S */
diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c
index 2175e4bfd9fc..c9697529b3f0 100644
--- a/arch/openrisc/mm/ioremap.c
+++ b/arch/openrisc/mm/ioremap.c
@@ -126,7 +126,7 @@ pte_t __ref *pte_alloc_one_kernel(struct mm_struct *mm,
 	if (likely(mem_init_done)) {
 		pte = (pte_t *) __get_free_page(GFP_KERNEL);
 	} else {
-		pte = (pte_t *) __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+		pte = (pte_t *) __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 	}
 
 	if (pte)
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index f432054234a4..8be3721d9302 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -1008,9 +1008,7 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
 	/* Count and allocate space for cpu features */
 	of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes,
 						&nr_dt_cpu_features);
-	dt_cpu_features = __va(
-		memblock_alloc(sizeof(struct dt_cpu_feature)*
-				nr_dt_cpu_features, PAGE_SIZE));
+	dt_cpu_features = __va(memblock_phys_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE));
 
 	cpufeatures_setup_start(isa);
 
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 0ee3e6d50f28..f331a0054b3a 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -198,7 +198,7 @@ void __init allocate_paca_ptrs(void)
 	paca_nr_cpu_ids = nr_cpu_ids;
 
 	paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
-	paca_ptrs = __va(memblock_alloc(paca_ptrs_size, 0));
+	paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, 0));
 	memset(paca_ptrs, 0x88, paca_ptrs_size);
 }
 
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index c4d7078e5295..fe758cedb93f 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -126,7 +126,7 @@ static void __init move_device_tree(void)
 	if ((memory_limit && (start + size) > PHYSICAL_START + memory_limit) ||
 			overlaps_crashkernel(start, size) ||
 			overlaps_initrd(start, size)) {
-		p = __va(memblock_alloc(size, PAGE_SIZE));
+		p = __va(memblock_phys_alloc(size, PAGE_SIZE));
 		memcpy(p, initial_boot_params, size);
 		initial_boot_params = p;
 		DBG("Moved device tree to 0x%p\n", p);
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 9ca9db707bcb..2b56d1f30387 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -460,8 +460,7 @@ void __init smp_setup_cpu_maps(void)
 
 	DBG("smp_setup_cpu_maps()\n");
 
-	cpu_to_phys_id = __va(memblock_alloc(nr_cpu_ids * sizeof(u32),
-							__alignof__(u32)));
+	cpu_to_phys_id = __va(memblock_phys_alloc(nr_cpu_ids * sizeof(u32), __alignof__(u32)));
 	memset(cpu_to_phys_id, 0, nr_cpu_ids * sizeof(u32));
 
 	for_each_node_by_type(dn, "cpu") {
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 8c507be12c3c..81909600013a 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -206,9 +206,9 @@ void __init irqstack_early_init(void)
 	 * as the memblock is limited to lowmem by default */
 	for_each_possible_cpu(i) {
 		softirq_ctx[i] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 		hardirq_ctx[i] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 	}
 }
 
@@ -227,12 +227,12 @@ void __init exc_lvl_early_init(void)
 #endif
 
 		critirq_ctx[hw_cpu] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 #ifdef CONFIG_BOOKE
 		dbgirq_ctx[hw_cpu] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 		mcheckirq_ctx[hw_cpu] = (struct thread_info *)
-			__va(memblock_alloc(THREAD_SIZE, THREAD_SIZE));
+			__va(memblock_phys_alloc(THREAD_SIZE, THREAD_SIZE));
 #endif
 	}
 }
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 693ae1c1acba..f04f15f9d232 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -788,7 +788,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
 	void *nd;
 	int tnid;
 
-	nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
 	nd = __va(nd_pa);
 
 	/* report and initialize */
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 5877f5aa8f5d..bda3c6f1bd32 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -50,7 +50,7 @@ __ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 	if (slab_is_available()) {
 		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_ZERO);
 	} else {
-		pte = __va(memblock_alloc(PAGE_SIZE, PAGE_SIZE));
+		pte = __va(memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE));
 		if (pte)
 			clear_page(pte);
 	}
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c
index 38a793bfca37..f6f575bae3bc 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/ppc_mmu_32.c
@@ -224,7 +224,7 @@ void __init MMU_init_hw(void)
 	 * Find some memory for the hash table.
 	 */
 	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
-	Hash = __va(memblock_alloc(Hash_size, Hash_size));
+	Hash = __va(memblock_phys_alloc(Hash_size, Hash_size));
 	memset(Hash, 0, Hash_size);
 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
 
diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c
index f06c83f321e6..f2971522fb4a 100644
--- a/arch/powerpc/platforms/pasemi/iommu.c
+++ b/arch/powerpc/platforms/pasemi/iommu.c
@@ -213,7 +213,7 @@ static int __init iob_init(struct device_node *dn)
 	pr_info("IOBMAP L2 allocated at: %p\n", iob_l2_base);
 
 	/* Allocate a spare page to map all invalid IOTLB pages. */
-	tmp = memblock_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
+	tmp = memblock_phys_alloc(IOBMAP_PAGE_SIZE, IOBMAP_PAGE_SIZE);
 	if (!tmp)
 		panic("IOBMAP: Cannot allocate spare page!");
 	/* Empty l1 is marked invalid */
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index a4641515956f..beed86f4224b 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -171,7 +171,7 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node,
 	/*
 	 * Allocate a buffer to hold the MC recoverable ranges.
 	 */
-	mc_recoverable_range =__va(memblock_alloc(size, __alignof__(u64)));
+	mc_recoverable_range =__va(memblock_phys_alloc(size, __alignof__(u64)));
 	memset(mc_recoverable_range, 0, size);
 
 	for (i = 0; i < mc_recoverable_range_len; i++) {
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 5ca3e22d0512..a5b40d1460f1 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -261,7 +261,7 @@ static void allocate_dart(void)
 	 * that to work around what looks like a problem with the HT bridge
 	 * prefetching into invalid pages and corrupting data
 	 */
-	tmp = memblock_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
+	tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE);
 	dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) &
 					 DARTMAP_RPNMASK);
 
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 376f6b6dfb3c..d17566a8c76f 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -61,7 +61,7 @@ struct save_area * __init save_area_alloc(bool is_boot_cpu)
 {
 	struct save_area *sa;
 
-	sa = (void *) memblock_alloc(sizeof(*sa), 8);
+	sa = (void *) memblock_phys_alloc(sizeof(*sa), 8);
 	if (is_boot_cpu)
 		list_add(&sa->list, &dump_save_areas);
 	else
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index a2e952b66248..204ccfa54bf3 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -967,7 +967,8 @@ static void __init setup_randomness(void)
 {
 	struct sysinfo_3_2_2 *vmms;
 
-	vmms = (struct sysinfo_3_2_2 *) memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+	vmms = (struct sysinfo_3_2_2 *) memblock_phys_alloc(PAGE_SIZE,
+							    PAGE_SIZE);
 	if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
 		add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
 	memblock_free((unsigned long) vmms, PAGE_SIZE);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index db55561c5981..04638b0b9ef1 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -36,7 +36,7 @@ static void __ref *vmem_alloc_pages(unsigned int order)
 
 	if (slab_is_available())
 		return (void *)__get_free_pages(GFP_KERNEL, order);
-	return (void *) memblock_alloc(size, size);
+	return (void *) memblock_phys_alloc(size, size);
 }
 
 void *vmem_crst_alloc(unsigned long val)
@@ -57,7 +57,7 @@ pte_t __ref *vmem_pte_alloc(void)
 	if (slab_is_available())
 		pte = (pte_t *) page_table_alloc(&init_mm);
 	else
-		pte = (pte_t *) memblock_alloc(size, size);
+		pte = (pte_t *) memblock_phys_alloc(size, size);
 	if (!pte)
 		return NULL;
 	memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 5bd374491f94..297f5d8b0890 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -64,7 +64,7 @@ static __init pg_data_t *alloc_node_data(void)
 {
 	pg_data_t *res;
 
-	res = (pg_data_t *) memblock_alloc(sizeof(pg_data_t), 8);
+	res = (pg_data_t *) memblock_phys_alloc(sizeof(pg_data_t), 8);
 	memset(res, 0, sizeof(pg_data_t));
 	return res;
 }
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 39a2503fa3e1..59131e72ee78 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -170,7 +170,7 @@ static struct mdesc_handle * __init mdesc_memblock_alloc(unsigned int mdesc_size
 		       mdesc_size);
 	alloc_size = PAGE_ALIGN(handle_size);
 
-	paddr = memblock_alloc(alloc_size, PAGE_SIZE);
+	paddr = memblock_phys_alloc(alloc_size, PAGE_SIZE);
 
 	hp = NULL;
 	if (paddr) {
diff --git a/arch/sparc/kernel/prom_64.c b/arch/sparc/kernel/prom_64.c
index baeaeed64993..c37955d127fe 100644
--- a/arch/sparc/kernel/prom_64.c
+++ b/arch/sparc/kernel/prom_64.c
@@ -34,7 +34,7 @@
 
 void * __init prom_early_alloc(unsigned long size)
 {
-	unsigned long paddr = memblock_alloc(size, SMP_CACHE_BYTES);
+	unsigned long paddr = memblock_phys_alloc(size, SMP_CACHE_BYTES);
 	void *ret;
 
 	if (!paddr) {
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 39822f611c01..b338f0440f6b 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1092,7 +1092,8 @@ static void __init allocate_node_data(int nid)
 #ifdef CONFIG_NEED_MULTIPLE_NODES
 	unsigned long paddr;
 
-	paddr = memblock_alloc_try_nid(sizeof(struct pglist_data), SMP_CACHE_BYTES, nid);
+	paddr = memblock_phys_alloc_try_nid(sizeof(struct pglist_data),
+					    SMP_CACHE_BYTES, nid);
 	if (!paddr) {
 		prom_printf("Cannot allocate pglist_data for nid[%d]\n", nid);
 		prom_halt();
@@ -1266,8 +1267,8 @@ static int __init grab_mlgroups(struct mdesc_handle *md)
 	if (!count)
 		return -ENOENT;
 
-	paddr = memblock_alloc(count * sizeof(struct mdesc_mlgroup),
-			  SMP_CACHE_BYTES);
+	paddr = memblock_phys_alloc(count * sizeof(struct mdesc_mlgroup),
+				    SMP_CACHE_BYTES);
 	if (!paddr)
 		return -ENOMEM;
 
@@ -1307,8 +1308,8 @@ static int __init grab_mblocks(struct mdesc_handle *md)
 	if (!count)
 		return -ENOENT;
 
-	paddr = memblock_alloc(count * sizeof(struct mdesc_mblock),
-			  SMP_CACHE_BYTES);
+	paddr = memblock_phys_alloc(count * sizeof(struct mdesc_mblock),
+				    SMP_CACHE_BYTES);
 	if (!paddr)
 		return -ENOMEM;
 
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 0c94b7b4514d..18b355a20f0b 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -144,7 +144,7 @@ static void __init build_mem_type_table(void)
 
 static void __init *early_alloc(unsigned long sz)
 {
-	void *ptr = __va(memblock_alloc(sz, sz));
+	void *ptr = __va(memblock_phys_alloc(sz, sz));
 	memset(ptr, 0, sz);
 	return ptr;
 }
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index fa150855647c..16e37d712ffd 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -196,7 +196,7 @@ static void __init alloc_node_data(int nid)
 	 * Allocate node data.  Try node-local memory and then any node.
 	 * Never allocate in DMA zone.
 	 */
-	nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
+	nd_pa = memblock_phys_alloc_nid(nd_size, SMP_CACHE_BYTES, nid);
 	if (!nd_pa) {
 		nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES,
 					      MEMBLOCK_ALLOC_ACCESSIBLE);
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index 5fc70520e04c..ef618bceb79a 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -15,7 +15,7 @@
 
 static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
 {
-	return memblock_alloc(size, 0);
+	return memblock_phys_alloc(size, 0);
 }
 
 static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 224d27363cca..9d46a7204975 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -300,10 +300,10 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-phys_addr_t memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
-phys_addr_t memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
+phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
-phys_addr_t memblock_alloc(phys_addr_t size, phys_addr_t align);
+phys_addr_t memblock_phys_alloc(phys_addr_t size, phys_addr_t align);
 
 /*
  * Set the allocation direction to bottom-up or top-down.
diff --git a/mm/memblock.c b/mm/memblock.c
index d6897fbe021f..20358374e8a8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1269,7 +1269,7 @@ phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
 	return memblock_alloc_range_nid(size, align, 0, max_addr, nid, flags);
 }
 
-phys_addr_t __init memblock_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
+phys_addr_t __init memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
 	enum memblock_flags flags = choose_memblock_flags();
 	phys_addr_t ret;
@@ -1304,14 +1304,14 @@ phys_addr_t __init memblock_alloc_base(phys_addr_t size, phys_addr_t align, phys
 	return alloc;
 }
 
-phys_addr_t __init memblock_alloc(phys_addr_t size, phys_addr_t align)
+phys_addr_t __init memblock_phys_alloc(phys_addr_t size, phys_addr_t align)
 {
 	return memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE);
 }
 
-phys_addr_t __init memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
+phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid)
 {
-	phys_addr_t res = memblock_alloc_nid(size, align, nid);
+	phys_addr_t res = memblock_phys_alloc_nid(size, align, nid);
 
 	if (res)
 		return res;
-- 
cgit v1.2.3


From eb31d559f1e8390195372cd51cfb198da8bc84b9 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:08:04 -0700
Subject: memblock: remove _virt from APIs returning virtual address

The conversion is done using

sed -i 's@memblock_virt_alloc@memblock_alloc@g' \
	$(git grep -l memblock_virt_alloc)

Link: http://lkml.kernel.org/r/1536927045-23536-8-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/kernel/setup.c                   |  4 ++--
 arch/arm/mach-omap2/omap_hwmod.c          |  6 ++---
 arch/arm64/mm/kasan_init.c                |  2 +-
 arch/arm64/mm/numa.c                      |  2 +-
 arch/mips/kernel/setup.c                  |  2 +-
 arch/powerpc/kernel/pci_32.c              |  2 +-
 arch/powerpc/lib/alloc.c                  |  2 +-
 arch/powerpc/mm/mmu_context_nohash.c      |  6 ++---
 arch/powerpc/platforms/powermac/nvram.c   |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  6 ++---
 arch/powerpc/platforms/ps3/setup.c        |  2 +-
 arch/powerpc/sysdev/msi_bitmap.c          |  2 +-
 arch/s390/kernel/setup.c                  |  8 +++----
 arch/s390/kernel/smp.c                    |  2 +-
 arch/s390/kernel/topology.c               |  4 ++--
 arch/s390/numa/mode_emu.c                 |  2 +-
 arch/s390/numa/toptree.c                  |  2 +-
 arch/x86/mm/kasan_init_64.c               |  4 ++--
 arch/xtensa/mm/kasan_init.c               |  2 +-
 drivers/clk/ti/clk.c                      |  2 +-
 drivers/firmware/memmap.c                 |  2 +-
 drivers/of/fdt.c                          |  2 +-
 drivers/of/unittest.c                     |  2 +-
 include/linux/bootmem.h                   | 38 +++++++++++++++----------------
 init/main.c                               |  6 ++---
 kernel/dma/swiotlb.c                      |  6 ++---
 kernel/power/snapshot.c                   |  2 +-
 kernel/printk/printk.c                    |  4 ++--
 lib/cpumask.c                             |  2 +-
 mm/hugetlb.c                              |  2 +-
 mm/kasan/kasan_init.c                     |  2 +-
 mm/memblock.c                             | 26 ++++++++++-----------
 mm/page_alloc.c                           |  8 +++----
 mm/page_ext.c                             |  2 +-
 mm/percpu.c                               | 28 +++++++++++------------
 mm/sparse-vmemmap.c                       |  2 +-
 mm/sparse.c                               | 12 +++++-----
 37 files changed, 105 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 4c249cb261f3..39e6090d23ac 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -857,7 +857,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 		 */
 		boot_alias_start = phys_to_idmap(start);
 		if (arm_has_idmap_alias() && boot_alias_start != IDMAP_INVALID_ADDR) {
-			res = memblock_virt_alloc(sizeof(*res), 0);
+			res = memblock_alloc(sizeof(*res), 0);
 			res->name = "System RAM (boot alias)";
 			res->start = boot_alias_start;
 			res->end = phys_to_idmap(end);
@@ -865,7 +865,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 			request_resource(&iomem_resource, res);
 		}
 
-		res = memblock_virt_alloc(sizeof(*res), 0);
+		res = memblock_alloc(sizeof(*res), 0);
 		res->name  = "System RAM";
 		res->start = start;
 		res->end = end;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 56a1fe90d394..1f9b34a7eccd 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -726,7 +726,7 @@ static int __init _setup_clkctrl_provider(struct device_node *np)
 	u64 size;
 	int i;
 
-	provider = memblock_virt_alloc(sizeof(*provider), 0);
+	provider = memblock_alloc(sizeof(*provider), 0);
 	if (!provider)
 		return -ENOMEM;
 
@@ -736,12 +736,12 @@ static int __init _setup_clkctrl_provider(struct device_node *np)
 		of_property_count_elems_of_size(np, "reg", sizeof(u32)) / 2;
 
 	provider->addr =
-		memblock_virt_alloc(sizeof(void *) * provider->num_addrs, 0);
+		memblock_alloc(sizeof(void *) * provider->num_addrs, 0);
 	if (!provider->addr)
 		return -ENOMEM;
 
 	provider->size =
-		memblock_virt_alloc(sizeof(u32) * provider->num_addrs, 0);
+		memblock_alloc(sizeof(u32) * provider->num_addrs, 0);
 	if (!provider->size)
 		return -ENOMEM;
 
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index fccb1a6f8c6f..6a65a2912d36 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -38,7 +38,7 @@ static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
 
 static phys_addr_t __init kasan_alloc_zeroed_page(int node)
 {
-	void *p = memblock_virt_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+	void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
 					      __pa(MAX_DMA_ADDRESS),
 					      MEMBLOCK_ALLOC_ACCESSIBLE, node);
 	return __pa(p);
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index c7fb34efd23e..0bff116c07a8 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -168,7 +168,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size,
 {
 	int nid = early_cpu_to_node(cpu);
 
-	return  memblock_virt_alloc_try_nid(size, align,
+	return  memblock_alloc_try_nid(size, align,
 			__pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 01a5ff4c41ff..0c997645e8f0 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -859,7 +859,7 @@ static void __init arch_mem_init(char **cmdline_p)
 	 * Prevent memblock from allocating high memory.
 	 * This cannot be done before max_low_pfn is detected, so up
 	 * to this point is possible to only reserve physical memory
-	 * with memblock_reserve; memblock_virt_alloc* can be used
+	 * with memblock_reserve; memblock_alloc* can be used
 	 * only after this point
 	 */
 	memblock_set_current_limit(PFN_PHYS(max_low_pfn));
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 4da8ed576229..d39ec3a4550a 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -203,7 +203,7 @@ pci_create_OF_bus_map(void)
 	struct property* of_prop;
 	struct device_node *dn;
 
-	of_prop = memblock_virt_alloc(sizeof(struct property) + 256, 0);
+	of_prop = memblock_alloc(sizeof(struct property) + 256, 0);
 	dn = of_find_node_by_path("/");
 	if (dn) {
 		memset(of_prop, -1, sizeof(struct property) + 256);
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 06796dec01ea..bf87d6e13369 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -14,7 +14,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 	if (slab_is_available())
 		p = kzalloc(size, mask);
 	else {
-		p = memblock_virt_alloc(size, 0);
+		p = memblock_alloc(size, 0);
 	}
 	return p;
 }
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 4d80239ef83c..954f1986af4d 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -461,10 +461,10 @@ void __init mmu_context_init(void)
 	/*
 	 * Allocate the maps used by context management
 	 */
-	context_map = memblock_virt_alloc(CTX_MAP_SIZE, 0);
-	context_mm = memblock_virt_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0);
+	context_map = memblock_alloc(CTX_MAP_SIZE, 0);
+	context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0);
 #ifdef CONFIG_SMP
-	stale_map[boot_cpuid] = memblock_virt_alloc(CTX_MAP_SIZE, 0);
+	stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, 0);
 
 	cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
 				  "powerpc/mmu/ctx:prepare",
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index 60b03a1703d1..f45b369177a4 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -513,7 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr)
 		printk(KERN_ERR "nvram: no address\n");
 		return -EINVAL;
 	}
-	nvram_image = memblock_virt_alloc(NVRAM_SIZE, 0);
+	nvram_image = memblock_alloc(NVRAM_SIZE, 0);
 	nvram_data = ioremap(addr, NVRAM_SIZE*2);
 	nvram_naddrs = 1; /* Make sure we get the correct case */
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index cde710297a4e..23a67b545b70 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3770,7 +3770,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	phb_id = be64_to_cpup(prop64);
 	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
 
-	phb = memblock_virt_alloc(sizeof(*phb), 0);
+	phb = memblock_alloc(sizeof(*phb), 0);
 
 	/* Allocate PCI controller */
 	phb->hose = hose = pcibios_alloc_controller(np);
@@ -3816,7 +3816,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	else
 		phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
 
-	phb->diag_data = memblock_virt_alloc(phb->diag_data_size, 0);
+	phb->diag_data = memblock_alloc(phb->diag_data_size, 0);
 
 	/* Parse 32-bit and IO ranges (if any) */
 	pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
@@ -3875,7 +3875,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	}
 	pemap_off = size;
 	size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
-	aux = memblock_virt_alloc(size, 0);
+	aux = memblock_alloc(size, 0);
 	phb->ioda.pe_alloc = aux;
 	phb->ioda.m64_segmap = aux + m64map_off;
 	phb->ioda.m32_segmap = aux + m32map_off;
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 77a37520068d..12519857a33c 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -126,7 +126,7 @@ static void __init prealloc(struct ps3_prealloc *p)
 	if (!p->size)
 		return;
 
-	p->address = memblock_virt_alloc(p->size, p->align);
+	p->address = memblock_alloc(p->size, p->align);
 
 	printk(KERN_INFO "%s: %lu bytes at %p\n", p->name, p->size,
 	       p->address);
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index e64a411d1a00..349a9ff6ca5b 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -128,7 +128,7 @@ int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
 	if (bmp->bitmap_from_slab)
 		bmp->bitmap = kzalloc(size, GFP_KERNEL);
 	else {
-		bmp->bitmap = memblock_virt_alloc(size, 0);
+		bmp->bitmap = memblock_alloc(size, 0);
 		/* the bitmap won't be freed from memblock allocator */
 		kmemleak_not_leak(bmp->bitmap);
 	}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 204ccfa54bf3..781c1053a773 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -378,7 +378,7 @@ static void __init setup_lowcore(void)
 	 * Setup lowcore for boot cpu
 	 */
 	BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
-	lc = memblock_virt_alloc_low(sizeof(*lc), sizeof(*lc));
+	lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
 	lc->restart_psw.mask = PSW_KERNEL_BITS;
 	lc->restart_psw.addr = (unsigned long) restart_int_handler;
 	lc->external_new_psw.mask = PSW_KERNEL_BITS |
@@ -422,7 +422,7 @@ static void __init setup_lowcore(void)
 	 * Allocate the global restart stack which is the same for
 	 * all CPUs in cast *one* of them does a PSW restart.
 	 */
-	restart_stack = memblock_virt_alloc(THREAD_SIZE, THREAD_SIZE);
+	restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
 	restart_stack += STACK_INIT_OFFSET;
 
 	/*
@@ -488,7 +488,7 @@ static void __init setup_resources(void)
 	bss_resource.end = (unsigned long) __bss_stop - 1;
 
 	for_each_memblock(memory, reg) {
-		res = memblock_virt_alloc(sizeof(*res), 8);
+		res = memblock_alloc(sizeof(*res), 8);
 		res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
 
 		res->name = "System RAM";
@@ -502,7 +502,7 @@ static void __init setup_resources(void)
 			    std_res->start > res->end)
 				continue;
 			if (std_res->end > res->end) {
-				sub_res = memblock_virt_alloc(sizeof(*sub_res), 8);
+				sub_res = memblock_alloc(sizeof(*sub_res), 8);
 				*sub_res = *std_res;
 				sub_res->end = res->end;
 				std_res->start = res->end + 1;
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 1b3188f57b58..44f9a7d6450b 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -761,7 +761,7 @@ void __init smp_detect_cpus(void)
 	u16 address;
 
 	/* Get CPU information */
-	info = memblock_virt_alloc(sizeof(*info), 8);
+	info = memblock_alloc(sizeof(*info), 8);
 	smp_get_core_info(info, 1);
 	/* Find boot CPU type */
 	if (sclp.has_core_type) {
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index e8184a15578a..799a91882a76 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -519,7 +519,7 @@ static void __init alloc_masks(struct sysinfo_15_1_x *info,
 		nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
 	nr_masks = max(nr_masks, 1);
 	for (i = 0; i < nr_masks; i++) {
-		mask->next = memblock_virt_alloc(sizeof(*mask->next), 8);
+		mask->next = memblock_alloc(sizeof(*mask->next), 8);
 		mask = mask->next;
 	}
 }
@@ -537,7 +537,7 @@ void __init topology_init_early(void)
 	}
 	if (!MACHINE_HAS_TOPOLOGY)
 		goto out;
-	tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
+	tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 	info = tl_info;
 	store_topology(info);
 	pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 83b222c57609..5a381fc8e958 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -313,7 +313,7 @@ static void __ref create_core_to_node_map(void)
 {
 	int i;
 
-	emu_cores = memblock_virt_alloc(sizeof(*emu_cores), 8);
+	emu_cores = memblock_alloc(sizeof(*emu_cores), 8);
 	for (i = 0; i < ARRAY_SIZE(emu_cores->to_node_id); i++)
 		emu_cores->to_node_id[i] = NODE_ID_FREE;
 }
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
index 21d1e8a1546d..7f61cc3fd4d1 100644
--- a/arch/s390/numa/toptree.c
+++ b/arch/s390/numa/toptree.c
@@ -34,7 +34,7 @@ struct toptree __ref *toptree_alloc(int level, int id)
 	if (slab_is_available())
 		res = kzalloc(sizeof(*res), GFP_KERNEL);
 	else
-		res = memblock_virt_alloc(sizeof(*res), 8);
+		res = memblock_alloc(sizeof(*res), 8);
 	if (!res)
 		return res;
 
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index e3e77527f8df..77b857cb036f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -28,10 +28,10 @@ static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
 static __init void *early_alloc(size_t size, int nid, bool panic)
 {
 	if (panic)
-		return memblock_virt_alloc_try_nid(size, size,
+		return memblock_alloc_try_nid(size, size,
 			__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	else
-		return memblock_virt_alloc_try_nid_nopanic(size, size,
+		return memblock_alloc_try_nid_nopanic(size, size,
 			__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index 6b532b6bd785..1a30a258ccd0 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -43,7 +43,7 @@ static void __init populate(void *start, void *end)
 	unsigned long vaddr = (unsigned long)start;
 	pgd_t *pgd = pgd_offset_k(vaddr);
 	pmd_t *pmd = pmd_offset(pgd, vaddr);
-	pte_t *pte = memblock_virt_alloc(n_pages * sizeof(pte_t), PAGE_SIZE);
+	pte_t *pte = memblock_alloc(n_pages * sizeof(pte_t), PAGE_SIZE);
 
 	pr_debug("%s: %p - %p\n", __func__, start, end);
 
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 7d22e1af2247..5c54d3734daf 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -342,7 +342,7 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem)
 {
 	struct clk_iomap *io;
 
-	io = memblock_virt_alloc(sizeof(*io), 0);
+	io = memblock_alloc(sizeof(*io), 0);
 
 	io->mem = mem;
 
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 5de3ed29282c..03cead6d5f97 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -333,7 +333,7 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = memblock_virt_alloc(sizeof(struct firmware_map_entry), 0);
+	entry = memblock_alloc(sizeof(struct firmware_map_entry), 0);
 	if (WARN_ON(!entry))
 		return -ENOMEM;
 
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 4f915cea6f75..ffe62a7ae19b 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1179,7 +1179,7 @@ int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
 
 static void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align)
 {
-	return memblock_virt_alloc(size, align);
+	return memblock_alloc(size, align);
 }
 
 bool __init early_init_dt_verify(void *params)
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index a3a6866765f2..01e23b85e798 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -2192,7 +2192,7 @@ static struct device_node *overlay_base_root;
 
 static void * __init dt_alloc_memory(u64 size, u64 align)
 {
-	return memblock_virt_alloc(size, align);
+	return memblock_alloc(size, align);
 }
 
 /*
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index b74bafd110b9..7d91f0f5ee44 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -95,78 +95,78 @@ extern void *__alloc_bootmem_low(unsigned long size,
 #define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
 
 /* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
-void *memblock_virt_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
+void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
 				      phys_addr_t min_addr,
 				      phys_addr_t max_addr, int nid);
-void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size,
+void *memblock_alloc_try_nid_nopanic(phys_addr_t size,
 		phys_addr_t align, phys_addr_t min_addr,
 		phys_addr_t max_addr, int nid);
-void *memblock_virt_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
 		phys_addr_t min_addr, phys_addr_t max_addr, int nid);
 void __memblock_free_early(phys_addr_t base, phys_addr_t size);
 void __memblock_free_late(phys_addr_t base, phys_addr_t size);
 
-static inline void * __init memblock_virt_alloc(
+static inline void * __init memblock_alloc(
 					phys_addr_t size,  phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
 					    BOOTMEM_ALLOC_ACCESSIBLE,
 					    NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_raw(
+static inline void * __init memblock_alloc_raw(
 					phys_addr_t size,  phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT,
 					    BOOTMEM_ALLOC_ACCESSIBLE,
 					    NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_nopanic(
+static inline void * __init memblock_alloc_nopanic(
 					phys_addr_t size, phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, align,
+	return memblock_alloc_try_nid_nopanic(size, align,
 						    BOOTMEM_LOW_LIMIT,
 						    BOOTMEM_ALLOC_ACCESSIBLE,
 						    NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_low(
+static inline void * __init memblock_alloc_low(
 					phys_addr_t size, phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid(size, align,
+	return memblock_alloc_try_nid(size, align,
 						   BOOTMEM_LOW_LIMIT,
 						   ARCH_LOW_ADDRESS_LIMIT,
 						   NUMA_NO_NODE);
 }
-static inline void * __init memblock_virt_alloc_low_nopanic(
+static inline void * __init memblock_alloc_low_nopanic(
 					phys_addr_t size, phys_addr_t align)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, align,
+	return memblock_alloc_try_nid_nopanic(size, align,
 						   BOOTMEM_LOW_LIMIT,
 						   ARCH_LOW_ADDRESS_LIMIT,
 						   NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_from_nopanic(
+static inline void * __init memblock_alloc_from_nopanic(
 		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, align, min_addr,
+	return memblock_alloc_try_nid_nopanic(size, align, min_addr,
 						    BOOTMEM_ALLOC_ACCESSIBLE,
 						    NUMA_NO_NODE);
 }
 
-static inline void * __init memblock_virt_alloc_node(
+static inline void * __init memblock_alloc_node(
 						phys_addr_t size, int nid)
 {
-	return memblock_virt_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
 					    BOOTMEM_ALLOC_ACCESSIBLE, nid);
 }
 
-static inline void * __init memblock_virt_alloc_node_nopanic(
+static inline void * __init memblock_alloc_node_nopanic(
 						phys_addr_t size, int nid)
 {
-	return memblock_virt_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
 						    BOOTMEM_ALLOC_ACCESSIBLE,
 						    nid);
 }
diff --git a/init/main.c b/init/main.c
index 1c3f90264280..86b59cf3bec7 100644
--- a/init/main.c
+++ b/init/main.c
@@ -375,10 +375,10 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 static void __init setup_command_line(char *command_line)
 {
 	saved_command_line =
-		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
+		memblock_alloc(strlen(boot_command_line) + 1, 0);
 	initcall_command_line =
-		memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
-	static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
+		memblock_alloc(strlen(boot_command_line) + 1, 0);
+	static_command_line = memblock_alloc(strlen(command_line) + 1, 0);
 	strcpy(saved_command_line, boot_command_line);
 	strcpy(static_command_line, command_line);
 }
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index ebecaf255ea2..801da67e957b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -204,10 +204,10 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
 	 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
 	 * between io_tlb_start and io_tlb_end.
 	 */
-	io_tlb_list = memblock_virt_alloc(
+	io_tlb_list = memblock_alloc(
 				PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
 				PAGE_SIZE);
-	io_tlb_orig_addr = memblock_virt_alloc(
+	io_tlb_orig_addr = memblock_alloc(
 				PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
 				PAGE_SIZE);
 	for (i = 0; i < io_tlb_nslabs; i++) {
@@ -242,7 +242,7 @@ swiotlb_init(int verbose)
 	bytes = io_tlb_nslabs << IO_TLB_SHIFT;
 
 	/* Get IO TLB memory from the low pages */
-	vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
+	vstart = memblock_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
 	if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
 		return;
 
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3d37c279c090..34116a6097be 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -963,7 +963,7 @@ void __init __register_nosave_region(unsigned long start_pfn,
 		BUG_ON(!region);
 	} else {
 		/* This allocation cannot fail */
-		region = memblock_virt_alloc(sizeof(struct nosave_region), 0);
+		region = memblock_alloc(sizeof(struct nosave_region), 0);
 	}
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index b77150ad1965..429e4a3833ca 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1111,9 +1111,9 @@ void __init setup_log_buf(int early)
 
 	if (early) {
 		new_log_buf =
-			memblock_virt_alloc(new_log_buf_len, LOG_ALIGN);
+			memblock_alloc(new_log_buf_len, LOG_ALIGN);
 	} else {
-		new_log_buf = memblock_virt_alloc_nopanic(new_log_buf_len,
+		new_log_buf = memblock_alloc_nopanic(new_log_buf_len,
 							  LOG_ALIGN);
 	}
 
diff --git a/lib/cpumask.c b/lib/cpumask.c
index beca6244671a..1405cb22e6bc 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var);
  */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
-	*mask = memblock_virt_alloc(cpumask_size(), 0);
+	*mask = memblock_alloc(cpumask_size(), 0);
 }
 
 /**
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7b5c0ad9a6bd..51e9f17dbd5c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2100,7 +2100,7 @@ int __alloc_bootmem_huge_page(struct hstate *h)
 	for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
 		void *addr;
 
-		addr = memblock_virt_alloc_try_nid_raw(
+		addr = memblock_alloc_try_nid_raw(
 				huge_page_size(h), huge_page_size(h),
 				0, BOOTMEM_ALLOC_ACCESSIBLE, node);
 		if (addr) {
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 7a2a2f13f86f..24d734bdff6b 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -83,7 +83,7 @@ static inline bool kasan_zero_page_entry(pte_t pte)
 
 static __init void *early_alloc(size_t size, int node)
 {
-	return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
+	return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
 					BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 20358374e8a8..58340de3ebc6 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1319,7 +1319,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
 }
 
 /**
- * memblock_virt_alloc_internal - allocate boot memory block
+ * memblock_alloc_internal - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
  * @min_addr: the lower bound of the memory region to allocate (phys address)
@@ -1345,7 +1345,7 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-static void * __init memblock_virt_alloc_internal(
+static void * __init memblock_alloc_internal(
 				phys_addr_t size, phys_addr_t align,
 				phys_addr_t min_addr, phys_addr_t max_addr,
 				int nid)
@@ -1412,7 +1412,7 @@ done:
 }
 
 /**
- * memblock_virt_alloc_try_nid_raw - allocate boot memory block without zeroing
+ * memblock_alloc_try_nid_raw - allocate boot memory block without zeroing
  * memory and without panicking
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
@@ -1430,7 +1430,7 @@ done:
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-void * __init memblock_virt_alloc_try_nid_raw(
+void * __init memblock_alloc_try_nid_raw(
 			phys_addr_t size, phys_addr_t align,
 			phys_addr_t min_addr, phys_addr_t max_addr,
 			int nid)
@@ -1441,7 +1441,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
 		     __func__, (u64)size, (u64)align, nid, &min_addr,
 		     &max_addr, (void *)_RET_IP_);
 
-	ptr = memblock_virt_alloc_internal(size, align,
+	ptr = memblock_alloc_internal(size, align,
 					   min_addr, max_addr, nid);
 	if (ptr && size > 0)
 		page_init_poison(ptr, size);
@@ -1450,7 +1450,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
 }
 
 /**
- * memblock_virt_alloc_try_nid_nopanic - allocate boot memory block
+ * memblock_alloc_try_nid_nopanic - allocate boot memory block
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
  * @min_addr: the lower bound of the memory region from where the allocation
@@ -1466,7 +1466,7 @@ void * __init memblock_virt_alloc_try_nid_raw(
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-void * __init memblock_virt_alloc_try_nid_nopanic(
+void * __init memblock_alloc_try_nid_nopanic(
 				phys_addr_t size, phys_addr_t align,
 				phys_addr_t min_addr, phys_addr_t max_addr,
 				int nid)
@@ -1477,7 +1477,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic(
 		     __func__, (u64)size, (u64)align, nid, &min_addr,
 		     &max_addr, (void *)_RET_IP_);
 
-	ptr = memblock_virt_alloc_internal(size, align,
+	ptr = memblock_alloc_internal(size, align,
 					   min_addr, max_addr, nid);
 	if (ptr)
 		memset(ptr, 0, size);
@@ -1485,7 +1485,7 @@ void * __init memblock_virt_alloc_try_nid_nopanic(
 }
 
 /**
- * memblock_virt_alloc_try_nid - allocate boot memory block with panicking
+ * memblock_alloc_try_nid - allocate boot memory block with panicking
  * @size: size of memory block to be allocated in bytes
  * @align: alignment of the region and block's size
  * @min_addr: the lower bound of the memory region from where the allocation
@@ -1495,14 +1495,14 @@ void * __init memblock_virt_alloc_try_nid_nopanic(
  *	      allocate only from memory limited by memblock.current_limit value
  * @nid: nid of the free area to find, %NUMA_NO_NODE for any node
  *
- * Public panicking version of memblock_virt_alloc_try_nid_nopanic()
+ * Public panicking version of memblock_alloc_try_nid_nopanic()
  * which provides debug information (including caller info), if enabled,
  * and panics if the request can not be satisfied.
  *
  * Return:
  * Virtual address of allocated memory block on success, NULL on failure.
  */
-void * __init memblock_virt_alloc_try_nid(
+void * __init memblock_alloc_try_nid(
 			phys_addr_t size, phys_addr_t align,
 			phys_addr_t min_addr, phys_addr_t max_addr,
 			int nid)
@@ -1512,7 +1512,7 @@ void * __init memblock_virt_alloc_try_nid(
 	memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pF\n",
 		     __func__, (u64)size, (u64)align, nid, &min_addr,
 		     &max_addr, (void *)_RET_IP_);
-	ptr = memblock_virt_alloc_internal(size, align,
+	ptr = memblock_alloc_internal(size, align,
 					   min_addr, max_addr, nid);
 	if (ptr) {
 		memset(ptr, 0, size);
@@ -1529,7 +1529,7 @@ void * __init memblock_virt_alloc_try_nid(
  * @base: phys starting address of the  boot memory block
  * @size: size of the boot memory block in bytes
  *
- * Free boot memory block previously allocated by memblock_virt_alloc_xx() API.
+ * Free boot memory block previously allocated by memblock_alloc_xx() API.
  * The freeing memory will not be released to the buddy allocator.
  */
 void __init __memblock_free_early(phys_addr_t base, phys_addr_t size)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 59d171f84445..8ca6954fdcdc 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6209,7 +6209,7 @@ static void __ref setup_usemap(struct pglist_data *pgdat,
 	zone->pageblock_flags = NULL;
 	if (usemapsize)
 		zone->pageblock_flags =
-			memblock_virt_alloc_node_nopanic(usemapsize,
+			memblock_alloc_node_nopanic(usemapsize,
 							 pgdat->node_id);
 }
 #else
@@ -6439,7 +6439,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 		end = pgdat_end_pfn(pgdat);
 		end = ALIGN(end, MAX_ORDER_NR_PAGES);
 		size =  (end - start) * sizeof(struct page);
-		map = memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
+		map = memblock_alloc_node_nopanic(size, pgdat->node_id);
 		pgdat->node_mem_map = map + offset;
 	}
 	pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
@@ -7711,9 +7711,9 @@ void *__init alloc_large_system_hash(const char *tablename,
 		size = bucketsize << log2qty;
 		if (flags & HASH_EARLY) {
 			if (flags & HASH_ZERO)
-				table = memblock_virt_alloc_nopanic(size, 0);
+				table = memblock_alloc_nopanic(size, 0);
 			else
-				table = memblock_virt_alloc_raw(size, 0);
+				table = memblock_alloc_raw(size, 0);
 		} else if (hashdist) {
 			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
 		} else {
diff --git a/mm/page_ext.c b/mm/page_ext.c
index a9826da84ccb..e77c0f031dd0 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -161,7 +161,7 @@ static int __init alloc_node_page_ext(int nid)
 
 	table_size = get_entry_size() * nr_pages;
 
-	base = memblock_virt_alloc_try_nid_nopanic(
+	base = memblock_alloc_try_nid_nopanic(
 			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
 			BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	if (!base)
diff --git a/mm/percpu.c b/mm/percpu.c
index 4b90682623e9..3050c1d37d37 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1101,7 +1101,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 	region_size = ALIGN(start_offset + map_size, lcm_align);
 
 	/* allocate chunk */
-	chunk = memblock_virt_alloc(sizeof(struct pcpu_chunk) +
+	chunk = memblock_alloc(sizeof(struct pcpu_chunk) +
 				    BITS_TO_LONGS(region_size >> PAGE_SHIFT),
 				    0);
 
@@ -1114,11 +1114,11 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 	chunk->nr_pages = region_size >> PAGE_SHIFT;
 	region_bits = pcpu_chunk_map_bits(chunk);
 
-	chunk->alloc_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits) *
+	chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) *
 					       sizeof(chunk->alloc_map[0]), 0);
-	chunk->bound_map = memblock_virt_alloc(BITS_TO_LONGS(region_bits + 1) *
+	chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) *
 					       sizeof(chunk->bound_map[0]), 0);
-	chunk->md_blocks = memblock_virt_alloc(pcpu_chunk_nr_blocks(chunk) *
+	chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) *
 					       sizeof(chunk->md_blocks[0]), 0);
 	pcpu_init_md_blocks(chunk);
 
@@ -1888,7 +1888,7 @@ struct pcpu_alloc_info * __init pcpu_alloc_alloc_info(int nr_groups,
 			  __alignof__(ai->groups[0].cpu_map[0]));
 	ai_size = base_size + nr_units * sizeof(ai->groups[0].cpu_map[0]);
 
-	ptr = memblock_virt_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE);
+	ptr = memblock_alloc_nopanic(PFN_ALIGN(ai_size), PAGE_SIZE);
 	if (!ptr)
 		return NULL;
 	ai = ptr;
@@ -2075,12 +2075,12 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
 	/* process group information and build config tables accordingly */
-	group_offsets = memblock_virt_alloc(ai->nr_groups *
+	group_offsets = memblock_alloc(ai->nr_groups *
 					     sizeof(group_offsets[0]), 0);
-	group_sizes = memblock_virt_alloc(ai->nr_groups *
+	group_sizes = memblock_alloc(ai->nr_groups *
 					   sizeof(group_sizes[0]), 0);
-	unit_map = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
-	unit_off = memblock_virt_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
+	unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
+	unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
 
 	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		unit_map[cpu] = UINT_MAX;
@@ -2144,7 +2144,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	 * empty chunks.
 	 */
 	pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
-	pcpu_slot = memblock_virt_alloc(
+	pcpu_slot = memblock_alloc(
 			pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
 	for (i = 0; i < pcpu_nr_slots; i++)
 		INIT_LIST_HEAD(&pcpu_slot[i]);
@@ -2458,7 +2458,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
 	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-	areas = memblock_virt_alloc_nopanic(areas_size, 0);
+	areas = memblock_alloc_nopanic(areas_size, 0);
 	if (!areas) {
 		rc = -ENOMEM;
 		goto out_free;
@@ -2599,7 +2599,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 	/* unaligned allocations can't be freed, round up to page size */
 	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
 			       sizeof(pages[0]));
-	pages = memblock_virt_alloc(pages_size, 0);
+	pages = memblock_alloc(pages_size, 0);
 
 	/* allocate pages */
 	j = 0;
@@ -2688,7 +2688,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
 static void * __init pcpu_dfl_fc_alloc(unsigned int cpu, size_t size,
 				       size_t align)
 {
-	return  memblock_virt_alloc_from_nopanic(
+	return  memblock_alloc_from_nopanic(
 			size, align, __pa(MAX_DMA_ADDRESS));
 }
 
@@ -2737,7 +2737,7 @@ void __init setup_per_cpu_areas(void)
 	void *fc;
 
 	ai = pcpu_alloc_alloc_info(1, 1);
-	fc = memblock_virt_alloc_from_nopanic(unit_size,
+	fc = memblock_alloc_from_nopanic(unit_size,
 					      PAGE_SIZE,
 					      __pa(MAX_DMA_ADDRESS));
 	if (!ai || !fc)
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 8301293331a2..91c2c3d25827 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -42,7 +42,7 @@ static void * __ref __earlyonly_bootmem_alloc(int node,
 				unsigned long align,
 				unsigned long goal)
 {
-	return memblock_virt_alloc_try_nid_raw(size, align, goal,
+	return memblock_alloc_try_nid_raw(size, align, goal,
 					       BOOTMEM_ALLOC_ACCESSIBLE, node);
 }
 
diff --git a/mm/sparse.c b/mm/sparse.c
index 67ad061f7fb8..cb900dda7fd2 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -68,7 +68,7 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid)
 	if (slab_is_available())
 		section = kzalloc_node(array_size, GFP_KERNEL, nid);
 	else
-		section = memblock_virt_alloc_node(array_size, nid);
+		section = memblock_alloc_node(array_size, nid);
 
 	return section;
 }
@@ -216,7 +216,7 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
 
 		size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
 		align = 1 << (INTERNODE_CACHE_SHIFT);
-		mem_section = memblock_virt_alloc(size, align);
+		mem_section = memblock_alloc(size, align);
 	}
 #endif
 
@@ -306,7 +306,7 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 	limit = goal + (1UL << PA_SECTION_SHIFT);
 	nid = early_pfn_to_nid(goal >> PAGE_SHIFT);
 again:
-	p = memblock_virt_alloc_try_nid_nopanic(size,
+	p = memblock_alloc_try_nid_nopanic(size,
 						SMP_CACHE_BYTES, goal, limit,
 						nid);
 	if (!p && limit) {
@@ -362,7 +362,7 @@ static unsigned long * __init
 sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 					 unsigned long size)
 {
-	return memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
+	return memblock_alloc_node_nopanic(size, pgdat->node_id);
 }
 
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
@@ -391,7 +391,7 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
 	if (map)
 		return map;
 
-	map = memblock_virt_alloc_try_nid(size,
+	map = memblock_alloc_try_nid(size,
 					  PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
 					  BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	return map;
@@ -405,7 +405,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid)
 {
 	WARN_ON(sparsemap_buf);	/* forgot to call sparse_buffer_fini()? */
 	sparsemap_buf =
-		memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE,
+		memblock_alloc_try_nid_raw(size, PAGE_SIZE,
 						__pa(MAX_DMA_ADDRESS),
 						BOOTMEM_ALLOC_ACCESSIBLE, nid);
 	sparsemap_buf_end = sparsemap_buf + size;
-- 
cgit v1.2.3


From 3913c8f9f96bb75a062ad16ea10a1cdad48bb716 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:08:36 -0700
Subject: memblock: add align parameter to memblock_alloc_node()

With the align parameter memblock_alloc_node() can be used as drop in
replacement for alloc_bootmem_pages_node() and __alloc_bootmem_node(),
which is done in the following patches.

Link: http://lkml.kernel.org/r/1536927045-23536-15-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 4 ++--
 mm/sparse.c             | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 7d91f0f5ee44..3896af205303 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -157,9 +157,9 @@ static inline void * __init memblock_alloc_from_nopanic(
 }
 
 static inline void * __init memblock_alloc_node(
-						phys_addr_t size, int nid)
+		phys_addr_t size, phys_addr_t align, int nid)
 {
-	return memblock_alloc_try_nid(size, 0, BOOTMEM_LOW_LIMIT,
+	return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
 					    BOOTMEM_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/mm/sparse.c b/mm/sparse.c
index cb900dda7fd2..d1296610562b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -68,7 +68,7 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid)
 	if (slab_is_available())
 		section = kzalloc_node(array_size, GFP_KERNEL, nid);
 	else
-		section = memblock_alloc_node(array_size, nid);
+		section = memblock_alloc_node(array_size, 0, nid);
 
 	return section;
 }
-- 
cgit v1.2.3


From 4fc4a09e4cc1126c4e8a86c293425cffa2a2eb3c Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:03 -0700
Subject: memblock: replace __alloc_bootmem with memblock_alloc_from

The functions are equivalent, just the later does not require nobootmem
translation layer.

The conversion is done using the following semantic patch:

@@
expression size, align, goal;
@@
- __alloc_bootmem(size, align, goal)
+ memblock_alloc_from(size, align, goal)

Link: http://lkml.kernel.org/r/1536927045-23536-21-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_cia.c  |  2 +-
 arch/alpha/kernel/pci_iommu.c |  4 ++--
 arch/alpha/kernel/setup.c     |  2 +-
 arch/ia64/kernel/mca.c        |  4 ++--
 arch/ia64/mm/contig.c         |  5 +++--
 arch/mips/kernel/traps.c      |  2 +-
 arch/sparc/kernel/prom_32.c   |  2 +-
 arch/sparc/kernel/smp_64.c    | 10 +++++-----
 arch/sparc/mm/init_32.c       |  2 +-
 arch/sparc/mm/init_64.c       |  9 ++++++---
 arch/sparc/mm/srmmu.c         | 10 +++++-----
 include/linux/bootmem.h       |  8 ++++++++
 12 files changed, 36 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 4b38386f6e62..026ee955fd10 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -331,7 +331,7 @@ cia_prepare_tbia_workaround(int window)
 	long i;
 
 	/* Use minimal 1K map. */
-	ppte = __alloc_bootmem(CIA_BROKEN_TBIA_SIZE, 32768, 0);
+	ppte = memblock_alloc_from(CIA_BROKEN_TBIA_SIZE, 32768, 0);
 	pte = (virt_to_phys(ppte) >> (PAGE_SHIFT - 1)) | 1;
 
 	for (i = 0; i < CIA_BROKEN_TBIA_SIZE / sizeof(unsigned long); ++i)
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index b52d76fd534e..0c05493e6495 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -87,13 +87,13 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 		printk("%s: couldn't allocate arena ptes from node %d\n"
 		       "    falling back to system-wide allocation\n",
 		       __func__, nid);
-		arena->ptes = __alloc_bootmem(mem_size, align, 0);
+		arena->ptes = memblock_alloc_from(mem_size, align, 0);
 	}
 
 #else /* CONFIG_DISCONTIGMEM */
 
 	arena = alloc_bootmem(sizeof(*arena));
-	arena->ptes = __alloc_bootmem(mem_size, align, 0);
+	arena->ptes = memblock_alloc_from(mem_size, align, 0);
 
 #endif /* CONFIG_DISCONTIGMEM */
 
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 4f0d94471bc9..64c06a0adf3d 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -294,7 +294,7 @@ move_initrd(unsigned long mem_limit)
 	unsigned long size;
 
 	size = initrd_end - initrd_start;
-	start = __alloc_bootmem(PAGE_ALIGN(size), PAGE_SIZE, 0);
+	start = memblock_alloc_from(PAGE_ALIGN(size), PAGE_SIZE, 0);
 	if (!start || __pa(start) + size > mem_limit) {
 		initrd_start = initrd_end = 0;
 		return NULL;
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 6115464d5f03..5586926dd85d 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1835,8 +1835,8 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
 /* Caller prevents this from being called after init */
 static void * __ref mca_bootmem(void)
 {
-	return __alloc_bootmem(sizeof(struct ia64_mca_cpu),
-	                    KERNEL_STACK_SIZE, 0);
+	return memblock_alloc_from(sizeof(struct ia64_mca_cpu),
+				   KERNEL_STACK_SIZE, 0);
 }
 
 /* Do per-CPU MCA-related initialization.  */
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index e2e40bbd391c..9e5c23a6b8b4 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -85,8 +85,9 @@ skip:
 static inline void
 alloc_per_cpu_data(void)
 {
-	cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(),
-				   PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
+	cpu_data = memblock_alloc_from(PERCPU_PAGE_SIZE * num_possible_cpus(),
+				       PERCPU_PAGE_SIZE,
+				       __pa(MAX_DMA_ADDRESS));
 }
 
 /**
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 5feef28deac8..623dc18f7f2f 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2263,7 +2263,7 @@ void __init trap_init(void)
 
 		memblock_set_bottom_up(true);
 		ebase = (unsigned long)
-			__alloc_bootmem(size, 1 << fls(size), 0);
+			memblock_alloc_from(size, 1 << fls(size), 0);
 		memblock_set_bottom_up(false);
 
 		/*
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index b51cbb9e87dc..4389944735c6 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -32,7 +32,7 @@ void * __init prom_early_alloc(unsigned long size)
 {
 	void *ret;
 
-	ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
+	ret = memblock_alloc_from(size, SMP_CACHE_BYTES, 0UL);
 	if (ret != NULL)
 		memset(ret, 0, size);
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 83ff88df6bdf..337febdf94b8 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1588,7 +1588,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 	void *ptr;
 
 	if (!node_online(node) || !NODE_DATA(node)) {
-		ptr = __alloc_bootmem(size, align, goal);
+		ptr = memblock_alloc_from(size, align, goal);
 		pr_info("cpu %d has no node %d or node-local memory\n",
 			cpu, node);
 		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
@@ -1601,7 +1601,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 	}
 	return ptr;
 #else
-	return __alloc_bootmem(size, align, goal);
+	return memblock_alloc_from(size, align, goal);
 #endif
 }
 
@@ -1627,7 +1627,7 @@ static void __init pcpu_populate_pte(unsigned long addr)
 	if (pgd_none(*pgd)) {
 		pud_t *new;
 
-		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 		pgd_populate(&init_mm, pgd, new);
 	}
 
@@ -1635,7 +1635,7 @@ static void __init pcpu_populate_pte(unsigned long addr)
 	if (pud_none(*pud)) {
 		pmd_t *new;
 
-		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 		pud_populate(&init_mm, pud, new);
 	}
 
@@ -1643,7 +1643,7 @@ static void __init pcpu_populate_pte(unsigned long addr)
 	if (!pmd_present(*pmd)) {
 		pte_t *new;
 
-		new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+		new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
 		pmd_populate_kernel(&init_mm, pmd, new);
 	}
 }
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 92634d4e440c..885dd3881874 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -265,7 +265,7 @@ void __init mem_init(void)
 	i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
 	i += 1;
 	sparc_valid_addr_bitmap = (unsigned long *)
-		__alloc_bootmem(i << 2, SMP_CACHE_BYTES, 0UL);
+		memblock_alloc_from(i << 2, SMP_CACHE_BYTES, 0UL);
 
 	if (sparc_valid_addr_bitmap == NULL) {
 		prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index b338f0440f6b..6f965e6d01cc 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1811,7 +1811,8 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 		if (pgd_none(*pgd)) {
 			pud_t *new;
 
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
+						  PAGE_SIZE);
 			alloc_bytes += PAGE_SIZE;
 			pgd_populate(&init_mm, pgd, new);
 		}
@@ -1823,7 +1824,8 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 				vstart = kernel_map_hugepud(vstart, vend, pud);
 				continue;
 			}
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
+						  PAGE_SIZE);
 			alloc_bytes += PAGE_SIZE;
 			pud_populate(&init_mm, pud, new);
 		}
@@ -1836,7 +1838,8 @@ static unsigned long __ref kernel_map_range(unsigned long pstart,
 				vstart = kernel_map_hugepmd(vstart, vend, pmd);
 				continue;
 			}
-			new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
+			new = memblock_alloc_from(PAGE_SIZE, PAGE_SIZE,
+						  PAGE_SIZE);
 			alloc_bytes += PAGE_SIZE;
 			pmd_populate_kernel(&init_mm, pmd, new);
 		}
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be9cb0065179..b48fea5ad9ef 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -303,13 +303,13 @@ static void __init srmmu_nocache_init(void)
 
 	bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT;
 
-	srmmu_nocache_pool = __alloc_bootmem(srmmu_nocache_size,
-		SRMMU_NOCACHE_ALIGN_MAX, 0UL);
+	srmmu_nocache_pool = memblock_alloc_from(srmmu_nocache_size,
+						 SRMMU_NOCACHE_ALIGN_MAX, 0UL);
 	memset(srmmu_nocache_pool, 0, srmmu_nocache_size);
 
 	srmmu_nocache_bitmap =
-		__alloc_bootmem(BITS_TO_LONGS(bitmap_bits) * sizeof(long),
-				SMP_CACHE_BYTES, 0UL);
+		memblock_alloc_from(BITS_TO_LONGS(bitmap_bits) * sizeof(long),
+				    SMP_CACHE_BYTES, 0UL);
 	bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits);
 
 	srmmu_swapper_pg_dir = __srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE);
@@ -467,7 +467,7 @@ static void __init sparc_context_init(int numctx)
 	unsigned long size;
 
 	size = numctx * sizeof(struct ctx_list);
-	ctx_list_pool = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL);
+	ctx_list_pool = memblock_alloc_from(size, SMP_CACHE_BYTES, 0UL);
 
 	for (ctx = 0; ctx < numctx; ctx++) {
 		struct ctx_list *clist;
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 3896af205303..c97c105cfc67 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -122,6 +122,14 @@ static inline void * __init memblock_alloc_raw(
 					    NUMA_NO_NODE);
 }
 
+static inline void * __init memblock_alloc_from(
+		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
+{
+	return memblock_alloc_try_nid(size, align, min_addr,
+				      BOOTMEM_ALLOC_ACCESSIBLE,
+				      NUMA_NO_NODE);
+}
+
 static inline void * __init memblock_alloc_nopanic(
 					phys_addr_t size, phys_addr_t align)
 {
-- 
cgit v1.2.3


From 6c7835f8d0d1839ca93bd3cf6faa15706f03d604 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:15 -0700
Subject: mm: nobootmem: remove bootmem allocation APIs

The bootmem compatibility APIs are not used and can be removed.

Link: http://lkml.kernel.org/r/1536927045-23536-23-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h |  47 ----------
 mm/nobootmem.c          | 224 ------------------------------------------------
 2 files changed, 271 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index c97c105cfc67..73f1272fce20 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -36,33 +36,6 @@ extern void free_bootmem_node(pg_data_t *pgdat,
 extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
-extern void *__alloc_bootmem(unsigned long size,
-			     unsigned long align,
-			     unsigned long goal);
-extern void *__alloc_bootmem_nopanic(unsigned long size,
-				     unsigned long align,
-				     unsigned long goal) __malloc;
-extern void *__alloc_bootmem_node(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal) __malloc;
-void *__alloc_bootmem_node_high(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal) __malloc;
-extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal) __malloc;
-void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-				  unsigned long size,
-				  unsigned long align,
-				  unsigned long goal,
-				  unsigned long limit) __malloc;
-extern void *__alloc_bootmem_low(unsigned long size,
-				 unsigned long align,
-				 unsigned long goal) __malloc;
-
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
 
@@ -70,26 +43,6 @@ extern void *__alloc_bootmem_low(unsigned long size,
 #define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
 #endif
 
-#define alloc_bootmem(x) \
-	__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_align(x, align) \
-	__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages(x) \
-	__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_nopanic(x) \
-	__alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_node_nopanic(pgdat, x) \
-	__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
-#define alloc_bootmem_pages_node(pgdat, x) \
-	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
-
-#define alloc_bootmem_low(x) \
-	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
-#define alloc_bootmem_low_pages(x) \
-	__alloc_bootmem_low(x, PAGE_SIZE, 0)
-
 /* FIXME: use MEMBLOCK_ALLOC_* variants here */
 #define BOOTMEM_ALLOC_ACCESSIBLE	0
 #define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 44ce7de1be8f..bc38e5673d31 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -33,41 +33,6 @@ unsigned long min_low_pfn;
 unsigned long max_pfn;
 unsigned long long max_possible_pfn;
 
-static void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
-					u64 goal, u64 limit)
-{
-	void *ptr;
-	u64 addr;
-	enum memblock_flags flags = choose_memblock_flags();
-
-	if (limit > memblock.current_limit)
-		limit = memblock.current_limit;
-
-again:
-	addr = memblock_find_in_range_node(size, align, goal, limit, nid,
-					   flags);
-	if (!addr && (flags & MEMBLOCK_MIRROR)) {
-		flags &= ~MEMBLOCK_MIRROR;
-		pr_warn("Could not allocate %pap bytes of mirrored memory\n",
-			&size);
-		goto again;
-	}
-	if (!addr)
-		return NULL;
-
-	if (memblock_reserve(addr, size))
-		return NULL;
-
-	ptr = phys_to_virt(addr);
-	memset(ptr, 0, size);
-	/*
-	 * The min_count is set to 0 so that bootmem allocated blocks
-	 * are never reported as leaks.
-	 */
-	kmemleak_alloc(ptr, size, 0, 0);
-	return ptr;
-}
-
 /**
  * free_bootmem_late - free bootmem pages directly to page allocator
  * @addr: starting address of the range
@@ -215,192 +180,3 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
 {
 	memblock_free(addr, size);
 }
-
-static void * __init ___alloc_bootmem_nopanic(unsigned long size,
-					unsigned long align,
-					unsigned long goal,
-					unsigned long limit)
-{
-	void *ptr;
-
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc(size, GFP_NOWAIT);
-
-restart:
-
-	ptr = __alloc_memory_core_early(NUMA_NO_NODE, size, align, goal, limit);
-
-	if (ptr)
-		return ptr;
-
-	if (goal != 0) {
-		goal = 0;
-		goto restart;
-	}
-
-	return NULL;
-}
-
-/**
- * __alloc_bootmem_nopanic - allocate boot memory without panicking
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may happen on any node in the system.
- *
- * Return: address of the allocated region or %NULL on failure.
- */
-void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
-					unsigned long goal)
-{
-	unsigned long limit = -1UL;
-
-	return ___alloc_bootmem_nopanic(size, align, goal, limit);
-}
-
-static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
-					unsigned long goal, unsigned long limit)
-{
-	void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
-
-	if (mem)
-		return mem;
-	/*
-	 * Whoops, we cannot satisfy the allocation request.
-	 */
-	pr_alert("bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
-}
-
-/**
- * __alloc_bootmem - allocate boot memory
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may happen on any node in the system.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem(unsigned long size, unsigned long align,
-			      unsigned long goal)
-{
-	unsigned long limit = -1UL;
-
-	return ___alloc_bootmem(size, align, goal, limit);
-}
-
-void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
-						   unsigned long size,
-						   unsigned long align,
-						   unsigned long goal,
-						   unsigned long limit)
-{
-	void *ptr;
-
-again:
-	ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
-					goal, limit);
-	if (ptr)
-		return ptr;
-
-	ptr = __alloc_memory_core_early(NUMA_NO_NODE, size, align,
-					goal, limit);
-	if (ptr)
-		return ptr;
-
-	if (goal) {
-		goal = 0;
-		goto again;
-	}
-
-	return NULL;
-}
-
-void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
-}
-
-static void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
-				    unsigned long align, unsigned long goal,
-				    unsigned long limit)
-{
-	void *ptr;
-
-	ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, limit);
-	if (ptr)
-		return ptr;
-
-	pr_alert("bootmem alloc of %lu bytes failed!\n", size);
-	panic("Out of memory");
-	return NULL;
-}
-
-/**
- * __alloc_bootmem_node - allocate boot memory from a specific node
- * @pgdat: node to allocate from
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may fall back to any node in the system if the specified node
- * can not hold the requested memory.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	if (WARN_ON_ONCE(slab_is_available()))
-		return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
-
-	return ___alloc_bootmem_node(pgdat, size, align, goal, 0);
-}
-
-void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
-				   unsigned long align, unsigned long goal)
-{
-	return __alloc_bootmem_node(pgdat, size, align, goal);
-}
-
-
-/**
- * __alloc_bootmem_low - allocate low boot memory
- * @size: size of the request in bytes
- * @align: alignment of the region
- * @goal: preferred starting address of the region
- *
- * The goal is dropped if it can not be satisfied and the allocation will
- * fall back to memory below @goal.
- *
- * Allocation may happen on any node in the system.
- *
- * The function panics if the request can not be satisfied.
- *
- * Return: address of the allocated region.
- */
-void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
-				  unsigned long goal)
-{
-	return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
-}
-- 
cgit v1.2.3


From 2013288f723887837d2f1cebef5fcf663b2319de Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:21 -0700
Subject: memblock: replace free_bootmem{_node} with memblock_free

The free_bootmem and free_bootmem_node are merely wrappers for
memblock_free. Replace their usage with a call to memblock_free using the
following semantic patch:

@@
expression e1, e2, e3;
@@
(
- free_bootmem(e1, e2)
+ memblock_free(e1, e2)
|
- free_bootmem_node(e1, e2, e3)
+ memblock_free(e2, e3)
)

Link: http://lkml.kernel.org/r/1536927045-23536-24-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_irongate.c |  3 +--
 arch/arm64/mm/init.c              |  2 +-
 arch/mips/kernel/setup.c          |  2 +-
 arch/powerpc/kernel/setup_64.c    |  2 +-
 arch/sparc/kernel/smp_64.c        |  2 +-
 arch/um/kernel/mem.c              |  3 ++-
 arch/unicore32/mm/init.c          |  2 +-
 arch/x86/kernel/setup_percpu.c    |  3 ++-
 arch/x86/kernel/tce_64.c          |  3 ++-
 arch/x86/xen/p2m.c                |  3 ++-
 drivers/macintosh/smu.c           |  2 +-
 drivers/usb/early/xhci-dbc.c      | 11 ++++++-----
 drivers/xen/swiotlb-xen.c         |  4 +++-
 include/linux/bootmem.h           |  4 ----
 mm/nobootmem.c                    | 30 ------------------------------
 15 files changed, 24 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
index f70986683fc6..35572be9deb5 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -234,8 +234,7 @@ albacore_init_arch(void)
 			unsigned long size;
 
 			size = initrd_end - initrd_start;
-			free_bootmem_node(NODE_DATA(0), __pa(initrd_start),
-					  PAGE_ALIGN(size));
+			memblock_free(__pa(initrd_start), PAGE_ALIGN(size));
 			if (!move_initrd(pci_mem))
 				printk("irongate_init_arch: initrd too big "
 				       "(%ldK)\ndisabling initrd\n",
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 3cf87341859f..2ddb1c5e988d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -536,7 +536,7 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * memmap array.
 	 */
 	if (pg < pgend)
-		free_bootmem(pg, pgend - pg);
+		memblock_free(pg, pgend - pg);
 }
 
 /*
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index c1f95359d298..31522d3bc8bf 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -561,7 +561,7 @@ static void __init bootmem_init(void)
 		extern void show_kernel_relocation(const char *level);
 
 		offset = __pa_symbol(_text) - __pa_symbol(VMLINUX_LOAD_ADDRESS);
-		free_bootmem(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset);
+		memblock_free(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset);
 
 #if defined(CONFIG_DEBUG_KERNEL) && defined(CONFIG_DEBUG_INFO)
 		/*
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 26d7c49a157b..f90ab3ea9af3 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -771,7 +771,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	free_bootmem(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 static int pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 337febdf94b8..a087a6a25f06 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1607,7 +1607,7 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
 
 static void __init pcpu_free_bootmem(void *ptr, size_t size)
 {
-	free_bootmem(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 185f6bb79269..3555c139389c 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -6,6 +6,7 @@
 #include <linux/stddef.h>
 #include <linux/module.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/swap.h>
@@ -46,7 +47,7 @@ void __init mem_init(void)
 	 */
 	brk_end = (unsigned long) UML_ROUND_UP(sbrk(0));
 	map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
-	free_bootmem(__pa(brk_end), uml_reserved - brk_end);
+	memblock_free(__pa(brk_end), uml_reserved - brk_end);
 	uml_reserved = brk_end;
 
 	/* this will put all low memory onto the freelists */
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 8f8699e62bd5..4ba51991c7de 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -238,7 +238,7 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
 	 * free the section of the memmap array.
 	 */
 	if (pg < pgend)
-		free_bootmem(pg, pgend - pg);
+		memblock_free(pg, pgend - pg);
 }
 
 /*
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 041663abc028..a006f1ba4c39 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -5,6 +5,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/percpu.h>
 #include <linux/kexec.h>
 #include <linux/crash_dump.h>
@@ -135,7 +136,7 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 
 static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	free_bootmem(__pa(ptr), size);
+	memblock_free(__pa(ptr), size);
 }
 
 static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 54c9b5a696b1..75730ce01f8d 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -31,6 +31,7 @@
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/tce.h>
 #include <asm/calgary.h>
 #include <asm/proto.h>
@@ -186,5 +187,5 @@ void __init free_tce_table(void *tbl)
 	size = table_size_to_number_of_entries(specified_table_size);
 	size *= TCE_ENTRY_SIZE;
 
-	free_bootmem(__pa(tbl), size);
+	memblock_free(__pa(tbl), size);
 }
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 5de761b4cec8..b3e11afed25b 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -68,6 +68,7 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
@@ -190,7 +191,7 @@ static void * __ref alloc_p2m_page(void)
 static void __ref free_p2m_page(void *p)
 {
 	if (unlikely(!slab_is_available())) {
-		free_bootmem((unsigned long)p, PAGE_SIZE);
+		memblock_free((unsigned long)p, PAGE_SIZE);
 		return;
 	}
 
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 332fcca30944..0069f9084f9f 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -569,7 +569,7 @@ fail_msg_node:
 fail_db_node:
 	of_node_put(smu->db_node);
 fail_bootmem:
-	free_bootmem(__pa(smu), sizeof(struct smu_device));
+	memblock_free(__pa(smu), sizeof(struct smu_device));
 	smu = NULL;
 fail_np:
 	of_node_put(np);
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index 21494c8973b6..ddc5fa88f268 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -13,6 +13,7 @@
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/io.h>
 #include <asm/pci-direct.h>
 #include <asm/fixmap.h>
@@ -191,7 +192,7 @@ static void __init xdbc_free_ring(struct xdbc_ring *ring)
 	if (!seg)
 		return;
 
-	free_bootmem(seg->dma, PAGE_SIZE);
+	memblock_free(seg->dma, PAGE_SIZE);
 	ring->segment = NULL;
 }
 
@@ -675,10 +676,10 @@ int __init early_xdbc_setup_hardware(void)
 		xdbc_free_ring(&xdbc.in_ring);
 
 		if (xdbc.table_dma)
-			free_bootmem(xdbc.table_dma, PAGE_SIZE);
+			memblock_free(xdbc.table_dma, PAGE_SIZE);
 
 		if (xdbc.out_dma)
-			free_bootmem(xdbc.out_dma, PAGE_SIZE);
+			memblock_free(xdbc.out_dma, PAGE_SIZE);
 
 		xdbc.table_base = NULL;
 		xdbc.out_buf = NULL;
@@ -997,8 +998,8 @@ free_and_quit:
 	xdbc_free_ring(&xdbc.evt_ring);
 	xdbc_free_ring(&xdbc.out_ring);
 	xdbc_free_ring(&xdbc.in_ring);
-	free_bootmem(xdbc.table_dma, PAGE_SIZE);
-	free_bootmem(xdbc.out_dma, PAGE_SIZE);
+	memblock_free(xdbc.table_dma, PAGE_SIZE);
+	memblock_free(xdbc.out_dma, PAGE_SIZE);
 	writel(0, &xdbc.xdbc_reg->control);
 	early_iounmap(xdbc.xhci_base, xdbc.xhci_length);
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 91a6208ec1a5..c5f26a87d238 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -36,6 +36,7 @@
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-direct.h>
 #include <linux/export.h>
 #include <xen/swiotlb-xen.h>
@@ -248,7 +249,8 @@ retry:
 			       xen_io_tlb_nslabs);
 	if (rc) {
 		if (early)
-			free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
+			memblock_free(__pa(xen_io_tlb_start),
+				      PAGE_ALIGN(bytes));
 		else {
 			free_pages((unsigned long)xen_io_tlb_start, order);
 			xen_io_tlb_start = NULL;
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 73f1272fce20..706cf8ef6678 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -30,10 +30,6 @@ extern unsigned long free_all_bootmem(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
-extern void free_bootmem_node(pg_data_t *pgdat,
-			      unsigned long addr,
-			      unsigned long size);
-extern void free_bootmem(unsigned long physaddr, unsigned long size);
 extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
 
 /* We are using top down, so it is safe to use 0 here */
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index bc38e5673d31..85e1822ce918 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -150,33 +150,3 @@ unsigned long __init free_all_bootmem(void)
 
 	return pages;
 }
-
-/**
- * free_bootmem_node - mark a page range as usable
- * @pgdat: node the range resides on
- * @physaddr: starting physical address of the range
- * @size: size of the range in bytes
- *
- * Partial pages will be considered reserved and left as they are.
- *
- * The range must reside completely on the specified node.
- */
-void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
-			      unsigned long size)
-{
-	memblock_free(physaddr, size);
-}
-
-/**
- * free_bootmem - mark a page range as usable
- * @addr: starting physical address of the range
- * @size: size of the range in bytes
- *
- * Partial pages will be considered reserved and left as they are.
- *
- * The range must be contiguous but may span node boundaries.
- */
-void __init free_bootmem(unsigned long addr, unsigned long size)
-{
-	memblock_free(addr, size);
-}
-- 
cgit v1.2.3


From 53ab85ebfd27cdf16c8ddc72781c072a63bef3cb Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:25 -0700
Subject: memblock: replace free_bootmem_late with memblock_free_late

The free_bootmem_late and memblock_free_late do exactly the same thing:
they iterate over a range and give pages to the page allocator.

Replace calls to free_bootmem_late with calls to memblock_free_late and
remove the bootmem variant.

Link: http://lkml.kernel.org/r/1536927045-23536-25-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/sparc/kernel/mdesc.c               |  3 ++-
 arch/x86/platform/efi/quirks.c          |  6 +++---
 drivers/firmware/efi/apple-properties.c |  2 +-
 include/linux/bootmem.h                 |  2 --
 mm/nobootmem.c                          | 24 ------------------------
 5 files changed, 6 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index 59131e72ee78..a41526bd91e2 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -12,6 +12,7 @@
 #include <linux/mm.h>
 #include <linux/miscdevice.h>
 #include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/refcount.h>
 
@@ -190,7 +191,7 @@ static void __init mdesc_memblock_free(struct mdesc_handle *hp)
 
 	alloc_size = PAGE_ALIGN(hp->handle_size);
 	start = __pa(hp);
-	free_bootmem_late(start, alloc_size);
+	memblock_free_late(start, alloc_size);
 }
 
 static struct mdesc_mem_ops memblock_mdesc_ops = {
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 669babcaf245..4b70d0f5a803 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -333,7 +333,7 @@ void __init efi_reserve_boot_services(void)
 
 		/*
 		 * Because the following memblock_reserve() is paired
-		 * with free_bootmem_late() for this region in
+		 * with memblock_free_late() for this region in
 		 * efi_free_boot_services(), we must be extremely
 		 * careful not to reserve, and subsequently free,
 		 * critical regions of memory (like the kernel image) or
@@ -364,7 +364,7 @@ void __init efi_reserve_boot_services(void)
 		 * doesn't make sense as far as the firmware is
 		 * concerned, but it does provide us with a way to tag
 		 * those regions that must not be paired with
-		 * free_bootmem_late().
+		 * memblock_free_late().
 		 */
 		md->attribute |= EFI_MEMORY_RUNTIME;
 	}
@@ -414,7 +414,7 @@ void __init efi_free_boot_services(void)
 			size -= rm_size;
 		}
 
-		free_bootmem_late(start, size);
+		memblock_free_late(start, size);
 	}
 
 	if (!num_entries)
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 60a95719ecb8..2b675f788b61 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -235,7 +235,7 @@ static int __init map_properties(void)
 		 */
 		data->len = 0;
 		memunmap(data);
-		free_bootmem_late(pa_data + sizeof(*data), data_len);
+		memblock_free_late(pa_data + sizeof(*data), data_len);
 
 		return ret;
 	}
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 706cf8ef6678..bcc7e2fcb6a6 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -30,8 +30,6 @@ extern unsigned long free_all_bootmem(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
-extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
-
 /* We are using top down, so it is safe to use 0 here */
 #define BOOTMEM_LOW_LIMIT 0
 
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 85e1822ce918..ee0f7fc37fd1 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -33,30 +33,6 @@ unsigned long min_low_pfn;
 unsigned long max_pfn;
 unsigned long long max_possible_pfn;
 
-/**
- * free_bootmem_late - free bootmem pages directly to page allocator
- * @addr: starting address of the range
- * @size: size of the range in bytes
- *
- * This is only useful when the bootmem allocator has already been torn
- * down, but we are still initializing the system.  Pages are given directly
- * to the page allocator, no bootmem metadata is updated because it is gone.
- */
-void __init free_bootmem_late(unsigned long addr, unsigned long size)
-{
-	unsigned long cursor, end;
-
-	kmemleak_free_part_phys(addr, size);
-
-	cursor = PFN_UP(addr);
-	end = PFN_DOWN(addr + size);
-
-	for (; cursor < end; cursor++) {
-		__free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
-		totalram_pages++;
-	}
-}
-
 static void __init __free_pages_memory(unsigned long start, unsigned long end)
 {
 	int order;
-- 
cgit v1.2.3


From c6ffc5ca8fb311a89cb6de5c31b6511308ddac8d Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:30 -0700
Subject: memblock: rename free_all_bootmem to memblock_free_all

The conversion is done using

sed -i 's@free_all_bootmem@memblock_free_all@' \
    $(git grep -l free_all_bootmem)

Link: http://lkml.kernel.org/r/1536927045-23536-26-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/mm/init.c                   | 2 +-
 arch/arc/mm/init.c                     | 2 +-
 arch/arm/mm/init.c                     | 2 +-
 arch/arm64/mm/init.c                   | 2 +-
 arch/c6x/mm/init.c                     | 2 +-
 arch/h8300/mm/init.c                   | 2 +-
 arch/hexagon/mm/init.c                 | 2 +-
 arch/ia64/mm/init.c                    | 2 +-
 arch/m68k/mm/init.c                    | 2 +-
 arch/microblaze/mm/init.c              | 2 +-
 arch/mips/loongson64/loongson-3/numa.c | 2 +-
 arch/mips/mm/init.c                    | 2 +-
 arch/mips/sgi-ip27/ip27-memory.c       | 2 +-
 arch/nds32/mm/init.c                   | 2 +-
 arch/nios2/mm/init.c                   | 2 +-
 arch/openrisc/mm/init.c                | 2 +-
 arch/parisc/mm/init.c                  | 2 +-
 arch/powerpc/mm/mem.c                  | 2 +-
 arch/riscv/mm/init.c                   | 2 +-
 arch/s390/mm/init.c                    | 2 +-
 arch/sh/mm/init.c                      | 2 +-
 arch/sparc/mm/init_32.c                | 2 +-
 arch/sparc/mm/init_64.c                | 4 ++--
 arch/um/kernel/mem.c                   | 2 +-
 arch/unicore32/mm/init.c               | 2 +-
 arch/x86/mm/highmem_32.c               | 2 +-
 arch/x86/mm/init_32.c                  | 4 ++--
 arch/x86/mm/init_64.c                  | 4 ++--
 arch/x86/xen/mmu_pv.c                  | 2 +-
 arch/xtensa/mm/init.c                  | 2 +-
 include/linux/bootmem.h                | 2 +-
 mm/memblock.c                          | 2 +-
 mm/nobootmem.c                         | 4 ++--
 mm/page_alloc.c                        | 2 +-
 mm/page_poison.c                       | 2 +-
 35 files changed, 39 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 9d74520298ab..853d15344934 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -282,7 +282,7 @@ mem_init(void)
 {
 	set_max_mapnr(max_low_pfn);
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 }
 
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index ba145065c579..0f29c6548779 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -218,7 +218,7 @@ void __init mem_init(void)
 		free_highmem_page(pfn_to_page(tmp));
 #endif
 
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 }
 
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 0cc8e04295a4..d421a10c93a8 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -508,7 +508,7 @@ void __init mem_init(void)
 
 	/* this will put all unused low memory onto the freelists */
 	free_unused_memmap();
-	free_all_bootmem();
+	memblock_free_all();
 
 #ifdef CONFIG_SA1111
 	/* now that our DMA memory is actually so designated, we can free it */
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 2ddb1c5e988d..d8d73073835f 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -599,7 +599,7 @@ void __init mem_init(void)
 	free_unused_memmap();
 #endif
 	/* this will put all unused low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	kexec_reserve_crashkres_pages();
 
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index dc369ad8b0ba..3383df8b3508 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -62,7 +62,7 @@ void __init mem_init(void)
 	high_memory = (void *)(memory_end & PAGE_MASK);
 
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 }
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 5d31ac9d7a8d..f2bf4487aabd 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -96,7 +96,7 @@ void __init mem_init(void)
 	max_mapnr = MAP_NR(high_memory);
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 }
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index d789b9cc0189..88643faf3981 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -68,7 +68,7 @@ unsigned long long kmap_generation;
 void __init mem_init(void)
 {
 	/*  No idea where this is actually declared.  Seems to evade LXR.  */
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 
 	/*
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 2169ca52bdf4..43ea4a47163d 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -627,7 +627,7 @@ mem_init (void)
 
 	set_max_mapnr(max_low_pfn);
 	high_memory = __va(max_low_pfn * PAGE_SIZE);
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 
 	/*
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 977363eda125..ae49ae4d3049 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -140,7 +140,7 @@ static inline void init_pointer_tables(void)
 void __init mem_init(void)
 {
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	init_pointer_tables();
 	mem_init_print_info(NULL);
 }
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 8c7f074ec20f..9989740d397a 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -204,7 +204,7 @@ void __init mem_init(void)
 	high_memory = (void *)__va(memory_start + lowmem_size - 1);
 
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 #ifdef CONFIG_HIGHMEM
 	highmem_setup();
 #endif
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index c1e6ec52c614..703ad4536fe0 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -272,7 +272,7 @@ void __init paging_init(void)
 void __init mem_init(void)
 {
 	high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
-	free_all_bootmem();
+	memblock_free_all();
 	setup_zero_pages();	/* This comes from node 0 */
 	mem_init_print_info(NULL);
 }
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 842a49ef9909..0893b6136498 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -463,7 +463,7 @@ void __init mem_init(void)
 	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
 	maar_init();
-	free_all_bootmem();
+	memblock_free_all();
 	setup_zero_pages();	/* Setup zeroed pages.  */
 	mem_init_free_highmem();
 	mem_init_print_info(NULL);
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 6f7bef052b7f..cb1f1a6a166d 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -475,7 +475,7 @@ void __init paging_init(void)
 void __init mem_init(void)
 {
 	high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
-	free_all_bootmem();
+	memblock_free_all();
 	setup_zero_pages();	/* This comes from node 0 */
 	mem_init_print_info(NULL);
 }
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 5af81b866aa5..66d3e9cf498d 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -192,7 +192,7 @@ void __init mem_init(void)
 	free_highmem();
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 
 	pr_info("virtual kernel memory layout:\n"
diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c
index c92fe4234009..12923501d94f 100644
--- a/arch/nios2/mm/init.c
+++ b/arch/nios2/mm/init.c
@@ -73,7 +73,7 @@ void __init mem_init(void)
 	high_memory = __va(end_mem);
 
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	mem_init_print_info(NULL);
 }
 
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index b7670de26c11..91a6a9ab7598 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -213,7 +213,7 @@ void __init mem_init(void)
 	memset((void *)empty_zero_page, 0, PAGE_SIZE);
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index f88a52b8531c..7e7a3126c5e9 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -621,7 +621,7 @@ void __init mem_init(void)
 
 	high_memory = __va((max_pfn << PAGE_SHIFT));
 	set_max_mapnr(page_to_pfn(virt_to_page(high_memory - 1)) + 1);
-	free_all_bootmem();
+	memblock_free_all();
 
 #ifdef CONFIG_PA11
 	if (boot_cpu_data.cpu_type == pcxl2 || boot_cpu_data.cpu_type == pcxl) {
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index dd949d6649a2..b3fe79064a69 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -349,7 +349,7 @@ void __init mem_init(void)
 
 	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 	set_max_mapnr(max_pfn);
-	free_all_bootmem();
+	memblock_free_all();
 
 #ifdef CONFIG_HIGHMEM
 	{
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 58a522f9bcc3..d58c111099b3 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -55,7 +55,7 @@ void __init mem_init(void)
 #endif /* CONFIG_FLATMEM */
 
 	high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 92d7a153e72a..873f6ee1c46d 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -139,7 +139,7 @@ void __init mem_init(void)
 	cmma_init();
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	setup_zero_pages();	/* Setup zeroed pages. */
 
 	cmma_init_nodat();
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index c884b760e52f..21447f866415 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -350,7 +350,7 @@ void __init mem_init(void)
 		high_memory = max_t(void *, high_memory,
 				    __va(pgdat_end_pfn(pgdat) << PAGE_SHIFT));
 
-	free_all_bootmem();
+	memblock_free_all();
 
 	/* Set this up early, so we can take care of the zero page */
 	cpu_cache_init();
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 885dd3881874..880714565c40 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -277,7 +277,7 @@ void __init mem_init(void)
 
 	max_mapnr = last_valid_pfn - pfn_base;
 	high_memory = __va(max_low_pfn << PAGE_SHIFT);
-	free_all_bootmem();
+	memblock_free_all();
 
 	for (i = 0; sp_banks[i].num_bytes != 0; i++) {
 		unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 6f965e6d01cc..a8c3453195e6 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2545,12 +2545,12 @@ void __init mem_init(void)
 {
 	high_memory = __va(last_valid_pfn << PAGE_SHIFT);
 
-	free_all_bootmem();
+	memblock_free_all();
 
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
 	 * might set fields in deferred struct pages that have not yet been
-	 * initialized, and free_all_bootmem() initializes all the reserved
+	 * initialized, and memblock_free_all() initializes all the reserved
 	 * deferred pages for us.
 	 */
 	register_page_bootmem_info();
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 3555c139389c..2c672a8f4571 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -51,7 +51,7 @@ void __init mem_init(void)
 	uml_reserved = brk_end;
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	max_low_pfn = totalram_pages;
 	max_pfn = totalram_pages;
 	mem_init_print_info(NULL);
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 4ba51991c7de..44fd0e8fbe87 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -286,7 +286,7 @@ void __init mem_init(void)
 	free_unused_memmap(&meminfo);
 
 	/* this will put all unused low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 	printk(KERN_NOTICE "Virtual kernel memory layout:\n"
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 6d18b70ed5a9..62915a5e0fa2 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -111,7 +111,7 @@ void __init set_highmem_pages_init(void)
 
 	/*
 	 * Explicitly reset zone->managed_pages because set_highmem_pages_init()
-	 * is invoked before free_all_bootmem()
+	 * is invoked before memblock_free_all()
 	 */
 	reset_all_zones_managed_pages();
 	for_each_zone(zone) {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 142c7d9f89cc..3bbe5f58a67d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -771,7 +771,7 @@ void __init mem_init(void)
 #endif
 	/*
 	 * With CONFIG_DEBUG_PAGEALLOC initialization of highmem pages has to
-	 * be done before free_all_bootmem(). Memblock use free low memory for
+	 * be done before memblock_free_all(). Memblock use free low memory for
 	 * temporary data (see find_range_array()) and for this purpose can use
 	 * pages that was already passed to the buddy allocator, hence marked as
 	 * not accessible in the page tables when compiled with
@@ -781,7 +781,7 @@ void __init mem_init(void)
 	set_highmem_pages_init();
 
 	/* this will put all low memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 
 	after_bootmem = 1;
 	x86_init.hyper.init_after_bootmem();
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f39b51244fe2..bfb0bedc21d3 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1188,14 +1188,14 @@ void __init mem_init(void)
 	/* clear_bss() already clear the empty_zero_page */
 
 	/* this will put all memory onto the freelists */
-	free_all_bootmem();
+	memblock_free_all();
 	after_bootmem = 1;
 	x86_init.hyper.init_after_bootmem();
 
 	/*
 	 * Must be done after boot memory is put on freelist, because here we
 	 * might set fields in deferred struct pages that have not yet been
-	 * initialized, and free_all_bootmem() initializes all the reserved
+	 * initialized, and memblock_free_all() initializes all the reserved
 	 * deferred pages for us.
 	 */
 	register_page_bootmem_info();
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 70ea598a37d2..0d7b3ae4960b 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -864,7 +864,7 @@ static int __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
  * The init_mm pagetable is really pinned as soon as its created, but
  * that's before we have page structures to store the bits.  So do all
  * the book-keeping now once struct pages for allocated pages are
- * initialized. This happens only after free_all_bootmem() is called.
+ * initialized. This happens only after memblock_free_all() is called.
  */
 static void __init xen_after_bootmem(void)
 {
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 34aead7dcb48..f7fbe6334939 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -152,7 +152,7 @@ void __init mem_init(void)
 	max_mapnr = max_pfn - ARCH_PFN_OFFSET;
 	high_memory = (void *)__va(max_low_pfn << PAGE_SHIFT);
 
-	free_all_bootmem();
+	memblock_free_all();
 
 	mem_init_print_info(NULL);
 	pr_info("virtual kernel memory layout:\n"
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index bcc7e2fcb6a6..b58873a567b2 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -26,7 +26,7 @@ extern unsigned long max_pfn;
  */
 extern unsigned long long max_possible_pfn;
 
-extern unsigned long free_all_bootmem(void);
+extern unsigned long memblock_free_all(void);
 extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 58340de3ebc6..e2f397174734 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1360,7 +1360,7 @@ static void * __init memblock_alloc_internal(
 	/*
 	 * Detect any accidental use of these APIs after slab is ready, as at
 	 * this moment memblock may be deinitialized already and its
-	 * internal data may be destroyed (after execution of free_all_bootmem)
+	 * internal data may be destroyed (after execution of memblock_free_all)
 	 */
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, nid);
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index ee0f7fc37fd1..bb64b09ca4d2 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -111,11 +111,11 @@ void __init reset_all_zones_managed_pages(void)
 }
 
 /**
- * free_all_bootmem - release free pages to the buddy allocator
+ * memblock_free_all - release free pages to the buddy allocator
  *
  * Return: the number of pages actually released.
  */
-unsigned long __init free_all_bootmem(void)
+unsigned long __init memblock_free_all(void)
 {
 	unsigned long pages;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8ca6954fdcdc..6e9b8387a706 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5476,7 +5476,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
 
 /*
  * Initially all pages are reserved - free ones are freed
- * up by free_all_bootmem() once the early boot process is
+ * up by memblock_free_all() once the early boot process is
  * done. Non-atomic initialization, single-pass.
  */
 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
diff --git a/mm/page_poison.c b/mm/page_poison.c
index aa2b3d34e8ea..f7e2a676365a 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -21,7 +21,7 @@ bool page_poisoning_enabled(void)
 {
 	/*
 	 * Assumes that debug_pagealloc_enabled is set before
-	 * free_all_bootmem.
+	 * memblock_free_all.
 	 * Page poisoning is debug page alloc for some arches. If
 	 * either of those options are enabled, enable poisoning.
 	 */
-- 
cgit v1.2.3


From 57c8a661d95dff48dd9c2f2496139082bbaf241a Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:49 -0700
Subject: mm: remove include/linux/bootmem.h

Move remaining definitions and declarations from include/linux/bootmem.h
into include/linux/memblock.h and remove the redundant header.

The includes were replaced with the semantic patch below and then
semi-automated removal of duplicated '#include <linux/memblock.h>

@@
@@
- #include <linux/bootmem.h>
+ #include <linux/memblock.h>

[sfr@canb.auug.org.au: dma-direct: fix up for the removal of linux/bootmem.h]
  Link: http://lkml.kernel.org/r/20181002185342.133d1680@canb.auug.org.au
[sfr@canb.auug.org.au: powerpc: fix up for removal of linux/bootmem.h]
  Link: http://lkml.kernel.org/r/20181005161406.73ef8727@canb.auug.org.au
[sfr@canb.auug.org.au: x86/kaslr, ACPI/NUMA: fix for linux/bootmem.h removal]
  Link: http://lkml.kernel.org/r/20181008190341.5e396491@canb.auug.org.au
Link: http://lkml.kernel.org/r/1536927045-23536-30-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_cia.c                |   2 +-
 arch/alpha/kernel/core_irongate.c           |   1 -
 arch/alpha/kernel/core_marvel.c             |   2 +-
 arch/alpha/kernel/core_titan.c              |   2 +-
 arch/alpha/kernel/core_tsunami.c            |   2 +-
 arch/alpha/kernel/pci-noop.c                |   2 +-
 arch/alpha/kernel/pci.c                     |   2 +-
 arch/alpha/kernel/pci_iommu.c               |   2 +-
 arch/alpha/kernel/setup.c                   |   1 -
 arch/alpha/kernel/sys_nautilus.c            |   2 +-
 arch/alpha/mm/init.c                        |   2 +-
 arch/alpha/mm/numa.c                        |   1 -
 arch/arc/kernel/unwind.c                    |   2 +-
 arch/arc/mm/highmem.c                       |   2 +-
 arch/arc/mm/init.c                          |   1 -
 arch/arm/kernel/devtree.c                   |   1 -
 arch/arm/kernel/setup.c                     |   1 -
 arch/arm/mach-omap2/omap_hwmod.c            |   2 +-
 arch/arm/mm/dma-mapping.c                   |   1 -
 arch/arm/mm/init.c                          |   1 -
 arch/arm/xen/mm.c                           |   1 -
 arch/arm/xen/p2m.c                          |   2 +-
 arch/arm64/kernel/acpi.c                    |   1 -
 arch/arm64/kernel/acpi_numa.c               |   1 -
 arch/arm64/kernel/setup.c                   |   1 -
 arch/arm64/mm/dma-mapping.c                 |   2 +-
 arch/arm64/mm/init.c                        |   1 -
 arch/arm64/mm/kasan_init.c                  |   1 -
 arch/arm64/mm/numa.c                        |   1 -
 arch/c6x/kernel/setup.c                     |   1 -
 arch/c6x/mm/init.c                          |   2 +-
 arch/h8300/kernel/setup.c                   |   1 -
 arch/h8300/mm/init.c                        |   2 +-
 arch/hexagon/kernel/dma.c                   |   2 +-
 arch/hexagon/kernel/setup.c                 |   2 +-
 arch/hexagon/mm/init.c                      |   1 -
 arch/ia64/kernel/crash.c                    |   2 +-
 arch/ia64/kernel/efi.c                      |   2 +-
 arch/ia64/kernel/ia64_ksyms.c               |   2 +-
 arch/ia64/kernel/iosapic.c                  |   2 +-
 arch/ia64/kernel/mca.c                      |   2 +-
 arch/ia64/kernel/mca_drv.c                  |   2 +-
 arch/ia64/kernel/setup.c                    |   1 -
 arch/ia64/kernel/smpboot.c                  |   2 +-
 arch/ia64/kernel/topology.c                 |   2 +-
 arch/ia64/kernel/unwind.c                   |   2 +-
 arch/ia64/mm/contig.c                       |   1 -
 arch/ia64/mm/discontig.c                    |   1 -
 arch/ia64/mm/init.c                         |   1 -
 arch/ia64/mm/numa.c                         |   2 +-
 arch/ia64/mm/tlb.c                          |   2 +-
 arch/ia64/pci/pci.c                         |   2 +-
 arch/ia64/sn/kernel/bte.c                   |   2 +-
 arch/ia64/sn/kernel/io_common.c             |   2 +-
 arch/ia64/sn/kernel/setup.c                 |   2 +-
 arch/m68k/atari/stram.c                     |   2 +-
 arch/m68k/coldfire/m54xx.c                  |   2 +-
 arch/m68k/kernel/setup_mm.c                 |   1 -
 arch/m68k/kernel/setup_no.c                 |   1 -
 arch/m68k/kernel/uboot.c                    |   2 +-
 arch/m68k/mm/init.c                         |   2 +-
 arch/m68k/mm/mcfmmu.c                       |   1 -
 arch/m68k/mm/motorola.c                     |   1 -
 arch/m68k/mm/sun3mmu.c                      |   2 +-
 arch/m68k/sun3/config.c                     |   2 +-
 arch/m68k/sun3/dvma.c                       |   2 +-
 arch/m68k/sun3/mmu_emu.c                    |   2 +-
 arch/m68k/sun3/sun3dvma.c                   |   2 +-
 arch/m68k/sun3x/dvma.c                      |   2 +-
 arch/microblaze/mm/consistent.c             |   2 +-
 arch/microblaze/mm/init.c                   |   3 +-
 arch/microblaze/pci/pci-common.c            |   2 +-
 arch/mips/ar7/memory.c                      |   2 +-
 arch/mips/ath79/setup.c                     |   2 +-
 arch/mips/bcm63xx/prom.c                    |   2 +-
 arch/mips/bcm63xx/setup.c                   |   2 +-
 arch/mips/bmips/setup.c                     |   2 +-
 arch/mips/cavium-octeon/dma-octeon.c        |   2 +-
 arch/mips/dec/prom/memory.c                 |   2 +-
 arch/mips/emma/common/prom.c                |   2 +-
 arch/mips/fw/arc/memory.c                   |   2 +-
 arch/mips/jazz/jazzdma.c                    |   2 +-
 arch/mips/kernel/crash.c                    |   2 +-
 arch/mips/kernel/crash_dump.c               |   2 +-
 arch/mips/kernel/prom.c                     |   2 +-
 arch/mips/kernel/setup.c                    |   1 -
 arch/mips/kernel/traps.c                    |   1 -
 arch/mips/kernel/vpe.c                      |   2 +-
 arch/mips/kvm/commpage.c                    |   2 +-
 arch/mips/kvm/dyntrans.c                    |   2 +-
 arch/mips/kvm/emulate.c                     |   2 +-
 arch/mips/kvm/interrupt.c                   |   2 +-
 arch/mips/kvm/mips.c                        |   2 +-
 arch/mips/lantiq/prom.c                     |   2 +-
 arch/mips/lasat/prom.c                      |   2 +-
 arch/mips/loongson64/common/init.c          |   2 +-
 arch/mips/loongson64/loongson-3/numa.c      |   1 -
 arch/mips/mm/init.c                         |   2 +-
 arch/mips/mm/pgtable-32.c                   |   2 +-
 arch/mips/mti-malta/malta-memory.c          |   2 +-
 arch/mips/netlogic/xlp/dt.c                 |   2 +-
 arch/mips/pci/pci-legacy.c                  |   2 +-
 arch/mips/pci/pci.c                         |   2 +-
 arch/mips/ralink/of.c                       |   2 +-
 arch/mips/rb532/prom.c                      |   2 +-
 arch/mips/sgi-ip27/ip27-memory.c            |   1 -
 arch/mips/sibyte/common/cfe.c               |   2 +-
 arch/mips/sibyte/swarm/setup.c              |   2 +-
 arch/mips/txx9/rbtx4938/prom.c              |   2 +-
 arch/nds32/kernel/setup.c                   |   3 +-
 arch/nds32/mm/highmem.c                     |   2 +-
 arch/nds32/mm/init.c                        |   3 +-
 arch/nios2/kernel/prom.c                    |   2 +-
 arch/nios2/kernel/setup.c                   |   1 -
 arch/nios2/mm/init.c                        |   2 +-
 arch/openrisc/kernel/setup.c                |   3 +-
 arch/openrisc/mm/init.c                     |   3 +-
 arch/parisc/mm/init.c                       |   1 -
 arch/powerpc/kernel/pci_32.c                |   2 +-
 arch/powerpc/kernel/setup-common.c          |   1 -
 arch/powerpc/kernel/setup_64.c              |   3 +-
 arch/powerpc/lib/alloc.c                    |   2 +-
 arch/powerpc/mm/hugetlbpage.c               |   1 -
 arch/powerpc/mm/mem.c                       |   3 +-
 arch/powerpc/mm/mmu_context_nohash.c        |   2 +-
 arch/powerpc/mm/numa.c                      |   3 +-
 arch/powerpc/platforms/powermac/nvram.c     |   2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c   |   3 +-
 arch/powerpc/platforms/ps3/setup.c          |   2 +-
 arch/powerpc/sysdev/msi_bitmap.c            |   2 +-
 arch/riscv/mm/init.c                        |   3 +-
 arch/s390/kernel/crash_dump.c               |   3 +-
 arch/s390/kernel/setup.c                    |   1 -
 arch/s390/kernel/smp.c                      |   3 +-
 arch/s390/kernel/topology.c                 |   2 +-
 arch/s390/kernel/vdso.c                     |   2 +-
 arch/s390/mm/extmem.c                       |   2 +-
 arch/s390/mm/init.c                         |   3 +-
 arch/s390/mm/vmem.c                         |   3 +-
 arch/s390/numa/mode_emu.c                   |   1 -
 arch/s390/numa/numa.c                       |   1 -
 arch/s390/numa/toptree.c                    |   2 +-
 arch/sh/mm/init.c                           |   3 +-
 arch/sh/mm/ioremap_fixed.c                  |   2 +-
 arch/sparc/kernel/mdesc.c                   |   2 -
 arch/sparc/kernel/prom_32.c                 |   2 +-
 arch/sparc/kernel/setup_64.c                |   2 +-
 arch/sparc/kernel/smp_64.c                  |   2 +-
 arch/sparc/mm/init_32.c                     |   1 -
 arch/sparc/mm/init_64.c                     |   3 +-
 arch/sparc/mm/srmmu.c                       |   2 +-
 arch/um/drivers/net_kern.c                  |   2 +-
 arch/um/drivers/vector_kern.c               |   2 +-
 arch/um/kernel/initrd.c                     |   2 +-
 arch/um/kernel/mem.c                        |   1 -
 arch/um/kernel/physmem.c                    |   1 -
 arch/unicore32/kernel/hibernate.c           |   2 +-
 arch/unicore32/kernel/setup.c               |   3 +-
 arch/unicore32/mm/init.c                    |   3 +-
 arch/unicore32/mm/mmu.c                     |   1 -
 arch/x86/kernel/acpi/boot.c                 |   2 +-
 arch/x86/kernel/acpi/sleep.c                |   1 -
 arch/x86/kernel/apic/apic.c                 |   2 +-
 arch/x86/kernel/apic/io_apic.c              |   2 +-
 arch/x86/kernel/cpu/common.c                |   2 +-
 arch/x86/kernel/e820.c                      |   3 +-
 arch/x86/kernel/mpparse.c                   |   1 -
 arch/x86/kernel/pci-dma.c                   |   2 +-
 arch/x86/kernel/pci-swiotlb.c               |   2 +-
 arch/x86/kernel/pvclock.c                   |   2 +-
 arch/x86/kernel/setup.c                     |   1 -
 arch/x86/kernel/setup_percpu.c              |   1 -
 arch/x86/kernel/smpboot.c                   |   2 +-
 arch/x86/kernel/tce_64.c                    |   1 -
 arch/x86/mm/amdtopology.c                   |   1 -
 arch/x86/mm/fault.c                         |   2 +-
 arch/x86/mm/highmem_32.c                    |   2 +-
 arch/x86/mm/init.c                          |   1 -
 arch/x86/mm/init_32.c                       |   1 -
 arch/x86/mm/init_64.c                       |   1 -
 arch/x86/mm/ioremap.c                       |   2 +-
 arch/x86/mm/kasan_init_64.c                 |   3 +-
 arch/x86/mm/kaslr.c                         |   1 +
 arch/x86/mm/numa.c                          |   1 -
 arch/x86/mm/numa_32.c                       |   1 -
 arch/x86/mm/numa_64.c                       |   2 +-
 arch/x86/mm/numa_emulation.c                |   1 -
 arch/x86/mm/pageattr-test.c                 |   2 +-
 arch/x86/mm/pageattr.c                      |   2 +-
 arch/x86/mm/pat.c                           |   2 +-
 arch/x86/mm/physaddr.c                      |   2 +-
 arch/x86/pci/i386.c                         |   2 +-
 arch/x86/platform/efi/efi.c                 |   3 +-
 arch/x86/platform/efi/efi_64.c              |   2 +-
 arch/x86/platform/efi/quirks.c              |   1 -
 arch/x86/platform/olpc/olpc_dt.c            |   2 +-
 arch/x86/power/hibernate_32.c               |   2 +-
 arch/x86/xen/enlighten.c                    |   2 +-
 arch/x86/xen/enlighten_pv.c                 |   3 +-
 arch/x86/xen/p2m.c                          |   1 -
 arch/xtensa/kernel/pci.c                    |   2 +-
 arch/xtensa/mm/cache.c                      |   2 +-
 arch/xtensa/mm/init.c                       |   2 +-
 arch/xtensa/mm/kasan_init.c                 |   3 +-
 arch/xtensa/mm/mmu.c                        |   2 +-
 arch/xtensa/platforms/iss/network.c         |   2 +-
 arch/xtensa/platforms/iss/setup.c           |   2 +-
 block/blk-settings.c                        |   2 +-
 block/bounce.c                              |   2 +-
 drivers/acpi/numa.c                         |   1 -
 drivers/acpi/tables.c                       |   3 +-
 drivers/base/platform.c                     |   2 +-
 drivers/clk/ti/clk.c                        |   2 +-
 drivers/firmware/dmi_scan.c                 |   2 +-
 drivers/firmware/efi/apple-properties.c     |   2 +-
 drivers/firmware/iscsi_ibft_find.c          |   2 +-
 drivers/firmware/memmap.c                   |   2 +-
 drivers/iommu/mtk_iommu.c                   |   2 +-
 drivers/iommu/mtk_iommu_v1.c                |   2 +-
 drivers/macintosh/smu.c                     |   3 +-
 drivers/mtd/ar7part.c                       |   2 +-
 drivers/net/arcnet/arc-rimi.c               |   2 +-
 drivers/net/arcnet/com20020-isa.c           |   2 +-
 drivers/net/arcnet/com90io.c                |   2 +-
 drivers/of/fdt.c                            |   1 -
 drivers/of/unittest.c                       |   2 +-
 drivers/s390/char/fs3270.c                  |   2 +-
 drivers/s390/char/tty3270.c                 |   2 +-
 drivers/s390/cio/cmf.c                      |   2 +-
 drivers/s390/virtio/virtio_ccw.c            |   2 +-
 drivers/sfi/sfi_core.c                      |   2 +-
 drivers/tty/serial/cpm_uart/cpm_uart_core.c |   2 +-
 drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c |   2 +-
 drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c |   2 +-
 drivers/usb/early/xhci-dbc.c                |   1 -
 drivers/xen/balloon.c                       |   2 +-
 drivers/xen/events/events_base.c            |   2 +-
 drivers/xen/grant-table.c                   |   2 +-
 drivers/xen/swiotlb-xen.c                   |   1 -
 drivers/xen/xen-selfballoon.c               |   2 +-
 fs/dcache.c                                 |   2 +-
 fs/inode.c                                  |   2 +-
 fs/namespace.c                              |   2 +-
 fs/proc/kcore.c                             |   2 +-
 fs/proc/page.c                              |   2 +-
 fs/proc/vmcore.c                            |   2 +-
 include/linux/bootmem.h                     | 173 ----------------------------
 include/linux/memblock.h                    | 151 +++++++++++++++++++++++-
 init/main.c                                 |   2 +-
 kernel/dma/direct.c                         |   2 +-
 kernel/dma/swiotlb.c                        |   2 +-
 kernel/futex.c                              |   2 +-
 kernel/locking/qspinlock_paravirt.h         |   2 +-
 kernel/pid.c                                |   2 +-
 kernel/power/snapshot.c                     |   2 +-
 kernel/printk/printk.c                      |   1 -
 kernel/profile.c                            |   2 +-
 lib/cpumask.c                               |   2 +-
 mm/hugetlb.c                                |   1 -
 mm/kasan/kasan_init.c                       |   3 +-
 mm/kmemleak.c                               |   2 +-
 mm/memblock.c                               |   1 -
 mm/memory_hotplug.c                         |   1 -
 mm/page_alloc.c                             |   1 -
 mm/page_ext.c                               |   2 +-
 mm/page_idle.c                              |   2 +-
 mm/page_owner.c                             |   2 +-
 mm/percpu.c                                 |   2 +-
 mm/sparse-vmemmap.c                         |   1 -
 mm/sparse.c                                 |   1 -
 net/ipv4/inet_hashtables.c                  |   2 +-
 net/ipv4/tcp.c                              |   2 +-
 net/ipv4/udp.c                              |   2 +-
 net/sctp/protocol.c                         |   2 +-
 net/xfrm/xfrm_hash.c                        |   2 +-
 275 files changed, 353 insertions(+), 476 deletions(-)
 delete mode 100644 include/linux/bootmem.h

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_cia.c b/arch/alpha/kernel/core_cia.c
index 026ee955fd10..867e8730b0c5 100644
--- a/arch/alpha/kernel/core_cia.c
+++ b/arch/alpha/kernel/core_cia.c
@@ -21,7 +21,7 @@
 #include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/ptrace.h>
 #include <asm/mce.h>
diff --git a/arch/alpha/kernel/core_irongate.c b/arch/alpha/kernel/core_irongate.c
index 35572be9deb5..a9fd133a7fb2 100644
--- a/arch/alpha/kernel/core_irongate.c
+++ b/arch/alpha/kernel/core_irongate.c
@@ -20,7 +20,6 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 
 #include <asm/ptrace.h>
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 1f00c9433b10..8a568c4d8e81 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -18,7 +18,7 @@
 #include <linux/mc146818rtc.h>
 #include <linux/rtc.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/ptrace.h>
 #include <asm/smp.h>
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 132b06bdf903..97551597581b 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -16,7 +16,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/vmalloc.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/ptrace.h>
 #include <asm/smp.h>
diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c
index e7c956ea46b6..f334b8928d72 100644
--- a/arch/alpha/kernel/core_tsunami.c
+++ b/arch/alpha/kernel/core_tsunami.c
@@ -17,7 +17,7 @@
 #include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/ptrace.h>
 #include <asm/smp.h>
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index 59cbfc2bf2c5..a9378ee0c2f1 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -7,7 +7,7 @@
 
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/capability.h>
 #include <linux/mm.h>
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 4cc3eb92f55b..13937e72d875 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/module.h>
 #include <linux/cache.h>
 #include <linux/slab.h>
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 5d178c7ba5b2..82cf950bda2a 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -7,7 +7,7 @@
 #include <linux/mm.h>
 #include <linux/pci.h>
 #include <linux/gfp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/scatterlist.h>
 #include <linux/log2.h>
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index 64c06a0adf3d..a37fd990bd55 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -29,7 +29,6 @@
 #include <linux/string.h>
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/pci.h>
 #include <linux/seq_file.h>
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index ff4f54b86c7f..cd9a112d67ff 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -32,7 +32,7 @@
 #include <linux/pci.h>
 #include <linux/init.h>
 #include <linux/reboot.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bitops.h>
 
 #include <asm/ptrace.h>
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 853d15344934..a42fc5c4db89 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -19,7 +19,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h> /* max_low_pfn */
+#include <linux/memblock.h> /* max_low_pfn */
 #include <linux/vmalloc.h>
 #include <linux/gfp.h>
 
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 26cd925d19b1..74846553e3f1 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -10,7 +10,6 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/swap.h>
 #include <linux/initrd.h>
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index 2a01dd1005f4..d34f69eb1a95 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -15,7 +15,7 @@
 
 #include <linux/sched.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/sort.h>
 #include <linux/slab.h>
 #include <linux/stop_machine.h>
diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index f582dc8944c9..48e700151810 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -7,7 +7,7 @@
  *
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/highmem.h>
 #include <asm/processor.h>
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 0f29c6548779..f8fe5668b30f 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -8,7 +8,6 @@
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/initrd.h>
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 13bcd3b867cb..e3057c1b55b9 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -12,7 +12,6 @@
 #include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 39e6090d23ac..840a4adc69fc 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -16,7 +16,6 @@
 #include <linux/utsname.h>
 #include <linux/initrd.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
 #include <linux/seq_file.h>
 #include <linux/screen_info.h>
 #include <linux/of_platform.h>
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 1f9b34a7eccd..cd5732ab0cdf 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -141,7 +141,7 @@
 #include <linux/cpu.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <linux/platform_data/ti-sysc.h>
 
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 66566472c153..661fe48ab78d 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -9,7 +9,6 @@
  *
  *  DMA uncached mapping support.
  */
-#include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/genalloc.h>
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index d421a10c93a8..32e4845af2b6 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -11,7 +11,6 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/mman.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index 785d2a562a23..cb44aa290e73 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -1,6 +1,5 @@
 #include <linux/cpu.h>
 #include <linux/dma-mapping.h>
-#include <linux/bootmem.h>
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/export.h>
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index 0641ba54ab62..e70a49fc8dcd 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -1,4 +1,4 @@
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c
index ed46dc188b22..44e3c351e1ea 100644
--- a/arch/arm64/kernel/acpi.c
+++ b/arch/arm64/kernel/acpi.c
@@ -16,7 +16,6 @@
 #define pr_fmt(fmt) "ACPI: " fmt
 
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/cpumask.h>
 #include <linux/efi.h>
 #include <linux/efi-bgrt.h>
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index 4f4f1815e047..eac1d0cc595c 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -18,7 +18,6 @@
 
 #include <linux/acpi.h>
 #include <linux/bitmap.h>
-#include <linux/bootmem.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/memblock.h>
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 3428427f6c93..7ce7306f1d75 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -26,7 +26,6 @@
 #include <linux/initrd.h>
 #include <linux/console.h>
 #include <linux/cache.h>
-#include <linux/bootmem.h>
 #include <linux/screen_info.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index d190612b8f33..3a703e5d4e32 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -19,7 +19,7 @@
 
 #include <linux/gfp.h>
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/cache.h>
 #include <linux/export.h>
 #include <linux/slab.h>
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d8d73073835f..9d9582cac6c4 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -22,7 +22,6 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/cache.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index 6a65a2912d36..63527e585aac 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -11,7 +11,6 @@
  */
 
 #define pr_fmt(fmt) "kasan: " fmt
-#include <linux/bootmem.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/sched/task.h>
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 0bff116c07a8..27a31efd9e8e 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -20,7 +20,6 @@
 #define pr_fmt(fmt) "NUMA: " fmt
 
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/module.h>
 #include <linux/of.h>
diff --git a/arch/c6x/kernel/setup.c b/arch/c6x/kernel/setup.c
index 05d96a9541b5..2e1c0ea22eb0 100644
--- a/arch/c6x/kernel/setup.c
+++ b/arch/c6x/kernel/setup.c
@@ -11,7 +11,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
 #include <linux/clkdev.h>
 #include <linux/initrd.h>
 #include <linux/kernel.h>
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index 3383df8b3508..af5ada0520be 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -11,7 +11,7 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #ifdef CONFIG_BLK_DEV_RAM
 #include <linux/blkdev.h>
 #endif
diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index 34e2df5c0d6d..b32bfa1fe99e 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -18,7 +18,6 @@
 #include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/of.h>
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index f2bf4487aabd..6519252ac4db 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -30,7 +30,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 
 #include <asm/setup.h>
diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c
index 706699374444..38eaa7b703e7 100644
--- a/arch/hexagon/kernel/dma.c
+++ b/arch/hexagon/kernel/dma.c
@@ -19,7 +19,7 @@
  */
 
 #include <linux/dma-noncoherent.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/genalloc.h>
 #include <linux/module.h>
 #include <asm/page.h>
diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
index dc8c7e75b5d1..b3c3e04d4e57 100644
--- a/arch/hexagon/kernel/setup.c
+++ b/arch/hexagon/kernel/setup.c
@@ -20,7 +20,7 @@
 
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/mm.h>
 #include <linux/seq_file.h>
diff --git a/arch/hexagon/mm/init.c b/arch/hexagon/mm/init.c
index 88643faf3981..1719ede9e9bd 100644
--- a/arch/hexagon/mm/init.c
+++ b/arch/hexagon/mm/init.c
@@ -20,7 +20,6 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <asm/atomic.h>
 #include <linux/highmem.h>
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index 39f4433a6f0e..bec762a9b418 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -12,7 +12,7 @@
 #include <linux/smp.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kexec.h>
 #include <linux/elfcore.h>
 #include <linux/sysctl.h>
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
index f77d80edddfe..8f106638913c 100644
--- a/arch/ia64/kernel/efi.c
+++ b/arch/ia64/kernel/efi.c
@@ -23,7 +23,7 @@
  *	Skip non-WB memory and ignore empty memory ranges.
  */
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/crash_dump.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 6b51c88e3578..b49fe6f618ed 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -6,7 +6,7 @@
 #ifdef CONFIG_VIRTUAL_MEM_MAP
 #include <linux/compiler.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 EXPORT_SYMBOL(min_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
 EXPORT_SYMBOL(max_low_pfn);	/* defined by bootmem.c, but not exported by generic code */
 #endif
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 550243a94b5d..fe6e4946672e 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -90,7 +90,7 @@
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/delay.h>
 #include <asm/hw_irq.h>
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 71209766c47f..9a6603f8e409 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -77,7 +77,7 @@
 #include <linux/sched/task.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/timer.h>
 #include <linux/module.h>
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index dfe40cbdf3b3..45f956ad715a 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -14,7 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/kallsyms.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/timer.h>
 #include <linux/module.h>
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 0e6c2d9fb498..583a3746d70b 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/cpu.h>
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index 74fe317477e6..51ec944b036c 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -24,7 +24,7 @@
 
 #include <linux/module.h>
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/init.h>
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 9b820f7a6a98..e311ee13e61d 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -19,7 +19,7 @@
 #include <linux/node.h>
 #include <linux/slab.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/nodemask.h>
 #include <linux/notifier.h>
 #include <linux/export.h>
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
index e04efa088902..7601fe0622d2 100644
--- a/arch/ia64/kernel/unwind.c
+++ b/arch/ia64/kernel/unwind.c
@@ -28,7 +28,7 @@
  *	  acquired, then the read-write lock must be acquired first.
  */
 #include <linux/module.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 9e5c23a6b8b4..6e447234205c 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -14,7 +14,6 @@
  * Routines used by ia64 machines with contiguous (or virtually contiguous)
  * memory.
  */
-#include <linux/bootmem.h>
 #include <linux/efi.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index 70609f823960..8a965784340c 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -19,7 +19,6 @@
 #include <linux/mm.h>
 #include <linux/nmi.h>
 #include <linux/swap.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/efi.h>
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 43ea4a47163d..d5e12ff1d73c 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -8,7 +8,6 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 
-#include <linux/bootmem.h>
 #include <linux/efi.h>
 #include <linux/elf.h>
 #include <linux/memblock.h>
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index aa19b7ac8222..3861d6e32d5f 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -15,7 +15,7 @@
 #include <linux/mm.h>
 #include <linux/node.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/module.h>
 #include <asm/mmzone.h>
 #include <asm/numa.h>
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 5554863b4c9b..ab545daff7c3 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -21,7 +21,7 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 
 #include <asm/delay.h>
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 5d71800df431..196a0dd7ff97 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -20,7 +20,7 @@
 #include <linux/ioport.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 
 #include <asm/machvec.h>
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
index 9146192b86f5..9900e6d4add6 100644
--- a/arch/ia64/sn/kernel/bte.c
+++ b/arch/ia64/sn/kernel/bte.c
@@ -16,7 +16,7 @@
 #include <asm/nodedata.h>
 #include <asm/delay.h>
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 8b05d5581615..98f55220c67d 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -6,7 +6,7 @@
  * Copyright (C) 2006 Silicon Graphics, Inc. All rights reserved.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <asm/sn/types.h>
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index ab2564f95199..71ad6b0ccab4 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -20,7 +20,7 @@
 #include <linux/mm.h>
 #include <linux/serial.h>
 #include <linux/irq.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/interrupt.h>
 #include <linux/acpi.h>
diff --git a/arch/m68k/atari/stram.c b/arch/m68k/atari/stram.c
index 1089d67df315..6ffc204eb07d 100644
--- a/arch/m68k/atari/stram.c
+++ b/arch/m68k/atari/stram.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mount.h>
 #include <linux/blkdev.h>
 #include <linux/module.h>
diff --git a/arch/m68k/coldfire/m54xx.c b/arch/m68k/coldfire/m54xx.c
index adad03ca6e11..360c723c0ae6 100644
--- a/arch/m68k/coldfire/m54xx.c
+++ b/arch/m68k/coldfire/m54xx.c
@@ -16,7 +16,7 @@
 #include <linux/io.h>
 #include <linux/mm.h>
 #include <linux/clk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/pgalloc.h>
 #include <asm/machdep.h>
 #include <asm/coldfire.h>
diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c
index 5d3596c180f9..a1a3eaeaf58c 100644
--- a/arch/m68k/kernel/setup_mm.c
+++ b/arch/m68k/kernel/setup_mm.c
@@ -20,7 +20,6 @@
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index cfd5475bfc31..3c5def10d486 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -27,7 +27,6 @@
 #include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
diff --git a/arch/m68k/kernel/uboot.c b/arch/m68k/kernel/uboot.c
index 107082877064..1b4c562753da 100644
--- a/arch/m68k/kernel/uboot.c
+++ b/arch/m68k/kernel/uboot.c
@@ -16,7 +16,7 @@
 #include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index ae49ae4d3049..933c33e76a48 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -17,7 +17,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 
 #include <asm/setup.h>
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 38a1d92dd555..0de4999a3810 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -13,7 +13,6 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 
 #include <asm/setup.h>
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 2113eec8dbf9..7497cf30bf1c 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -18,7 +18,6 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/gfp.h>
 
diff --git a/arch/m68k/mm/sun3mmu.c b/arch/m68k/mm/sun3mmu.c
index 19c05ab9824d..f736db48a2e1 100644
--- a/arch/m68k/mm/sun3mmu.c
+++ b/arch/m68k/mm/sun3mmu.c
@@ -16,7 +16,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/setup.h>
 #include <linux/uaccess.h>
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index 79a2bb857906..542c4404861c 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -15,7 +15,7 @@
 #include <linux/tty.h>
 #include <linux/console.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/platform_device.h>
 
 #include <asm/oplib.h>
diff --git a/arch/m68k/sun3/dvma.c b/arch/m68k/sun3/dvma.c
index 5f92c72b05c3..a2c1c9304895 100644
--- a/arch/m68k/sun3/dvma.c
+++ b/arch/m68k/sun3/dvma.c
@@ -11,7 +11,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/list.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/m68k/sun3/mmu_emu.c b/arch/m68k/sun3/mmu_emu.c
index d30da12a1702..582a1284059a 100644
--- a/arch/m68k/sun3/mmu_emu.c
+++ b/arch/m68k/sun3/mmu_emu.c
@@ -13,7 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/ptrace.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bitops.h>
 #include <linux/module.h>
 #include <linux/sched/mm.h>
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index 72d94585b52e..8be8b750c629 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -7,7 +7,7 @@
  * Contains common routines for sun3/sun3x DVMA management.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
diff --git a/arch/m68k/sun3x/dvma.c b/arch/m68k/sun3x/dvma.c
index b2acbc862f60..89e630e66555 100644
--- a/arch/m68k/sun3x/dvma.c
+++ b/arch/m68k/sun3x/dvma.c
@@ -15,7 +15,7 @@
 #include <linux/init.h>
 #include <linux/bitops.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 
 #include <asm/sun3x.h>
diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c
index d801cc5f5b95..45e0a1aa9357 100644
--- a/arch/microblaze/mm/consistent.c
+++ b/arch/microblaze/mm/consistent.c
@@ -28,7 +28,7 @@
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/pci.h>
 #include <linux/interrupt.h>
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 9989740d397a..8c14988f52f2 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -7,10 +7,9 @@
  * for more details.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
-#include <linux/memblock.h>
 #include <linux/mm.h> /* mem_init */
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 2ffd171af8b6..6b89a66ec1a5 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -20,7 +20,7 @@
 #include <linux/pci.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/shmem_fs.h>
 #include <linux/list.h>
diff --git a/arch/mips/ar7/memory.c b/arch/mips/ar7/memory.c
index 0332f0514d05..80390a9ec264 100644
--- a/arch/mips/ar7/memory.c
+++ b/arch/mips/ar7/memory.c
@@ -16,7 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 4c7a93f4039a..9728abcb18fa 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -14,7 +14,7 @@
 
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c
index 7019e2967009..77a836e661c9 100644
--- a/arch/mips/bcm63xx/prom.c
+++ b/arch/mips/bcm63xx/prom.c
@@ -7,7 +7,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/smp.h>
 #include <asm/bootinfo.h>
 #include <asm/bmips.h>
diff --git a/arch/mips/bcm63xx/setup.c b/arch/mips/bcm63xx/setup.c
index 2be9caaa2085..e28ee9a7cc7e 100644
--- a/arch/mips/bcm63xx/setup.c
+++ b/arch/mips/bcm63xx/setup.c
@@ -9,7 +9,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <linux/pm.h>
 #include <asm/bootinfo.h>
diff --git a/arch/mips/bmips/setup.c b/arch/mips/bmips/setup.c
index 6329c5f780d6..1738a06396f9 100644
--- a/arch/mips/bmips/setup.c
+++ b/arch/mips/bmips/setup.c
@@ -9,7 +9,7 @@
 
 #include <linux/init.h>
 #include <linux/bitops.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/clk-provider.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c
index c44c1a654471..e8eb60ed99f2 100644
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@@ -11,7 +11,7 @@
  * Copyright (C) 2010 Cavium Networks, Inc.
  */
 #include <linux/dma-direct.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swiotlb.h>
 #include <linux/types.h>
 #include <linux/init.h>
diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c
index a2acc6454cf3..5073d2ed78bb 100644
--- a/arch/mips/dec/prom/memory.c
+++ b/arch/mips/dec/prom/memory.c
@@ -8,7 +8,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/types.h>
 
 #include <asm/addrspace.h>
diff --git a/arch/mips/emma/common/prom.c b/arch/mips/emma/common/prom.c
index cae42259d6da..675337b8a4a0 100644
--- a/arch/mips/emma/common/prom.c
+++ b/arch/mips/emma/common/prom.c
@@ -22,7 +22,7 @@
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/addrspace.h>
 #include <asm/bootinfo.h>
diff --git a/arch/mips/fw/arc/memory.c b/arch/mips/fw/arc/memory.c
index dd9496f26e6a..429b7f8d2aeb 100644
--- a/arch/mips/fw/arc/memory.c
+++ b/arch/mips/fw/arc/memory.c
@@ -17,7 +17,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swap.h>
 
 #include <asm/sgialib.h>
diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c
index 0a0aaf39fd16..4c41ed0a637e 100644
--- a/arch/mips/jazz/jazzdma.c
+++ b/arch/mips/jazz/jazzdma.c
@@ -13,7 +13,7 @@
 #include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/spinlock.h>
 #include <linux/gfp.h>
 #include <linux/dma-direct.h>
diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c
index 2c7288041a99..81845ba04835 100644
--- a/arch/mips/kernel/crash.c
+++ b/arch/mips/kernel/crash.c
@@ -3,7 +3,7 @@
 #include <linux/smp.h>
 #include <linux/reboot.h>
 #include <linux/kexec.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/crash_dump.h>
 #include <linux/delay.h>
 #include <linux/irq.h>
diff --git a/arch/mips/kernel/crash_dump.c b/arch/mips/kernel/crash_dump.c
index a8657d29c62e..01b2bd95ba1f 100644
--- a/arch/mips/kernel/crash_dump.c
+++ b/arch/mips/kernel/crash_dump.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/crash_dump.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 89950b7bf536..93b8e0b4332f 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -12,7 +12,7 @@
 #include <linux/export.h>
 #include <linux/errno.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/debugfs.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 31522d3bc8bf..41c1683761bb 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -15,7 +15,6 @@
 #include <linux/export.h>
 #include <linux/screen_info.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/initrd.h>
 #include <linux/root_dev.h>
 #include <linux/highmem.h>
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 623dc18f7f2f..0f852e1b5891 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -28,7 +28,6 @@
 #include <linux/smp.h>
 #include <linux/spinlock.h>
 #include <linux/kallsyms.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/interrupt.h>
 #include <linux/ptrace.h>
diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c
index 0bef238d2c0c..6176b9acba95 100644
--- a/arch/mips/kernel/vpe.c
+++ b/arch/mips/kernel/vpe.c
@@ -26,7 +26,7 @@
 #include <linux/moduleloader.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/mipsregs.h>
 #include <asm/mipsmtregs.h>
 #include <asm/cacheflush.h>
diff --git a/arch/mips/kvm/commpage.c b/arch/mips/kvm/commpage.c
index f43629979a0e..5812e6145801 100644
--- a/arch/mips/kvm/commpage.c
+++ b/arch/mips/kvm/commpage.c
@@ -14,7 +14,7 @@
 #include <linux/err.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c
index f8e772564d74..d77b61b3d6ee 100644
--- a/arch/mips/kvm/dyntrans.c
+++ b/arch/mips/kvm/dyntrans.c
@@ -16,7 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/cacheflush.h>
 
 #include "commpage.h"
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 4144bfaef137..ec9ed23bca7f 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -15,7 +15,7 @@
 #include <linux/kvm_host.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/random.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c
index aa0a1a00faf6..7257e8b6f5a9 100644
--- a/arch/mips/kvm/interrupt.c
+++ b/arch/mips/kvm/interrupt.c
@@ -13,7 +13,7 @@
 #include <linux/err.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
 
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index f7ea8e21656b..1fcc4d149054 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -18,7 +18,7 @@
 #include <linux/vmalloc.h>
 #include <linux/sched/signal.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/fpu.h>
 #include <asm/page.h>
diff --git a/arch/mips/lantiq/prom.c b/arch/mips/lantiq/prom.c
index d984bd5c2ec5..14d4c5e2b42f 100644
--- a/arch/mips/lantiq/prom.c
+++ b/arch/mips/lantiq/prom.c
@@ -8,7 +8,7 @@
 
 #include <linux/export.h>
 #include <linux/clk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/of_fdt.h>
 
 #include <asm/bootinfo.h>
diff --git a/arch/mips/lasat/prom.c b/arch/mips/lasat/prom.c
index 37b8fc5b9ac9..5ce1407de2d5 100644
--- a/arch/mips/lasat/prom.c
+++ b/arch/mips/lasat/prom.c
@@ -8,7 +8,7 @@
 #include <linux/ctype.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <asm/bootinfo.h>
 #include <asm/lasat/lasat.h>
diff --git a/arch/mips/loongson64/common/init.c b/arch/mips/loongson64/common/init.c
index 6ef17120722f..c073fbcb9805 100644
--- a/arch/mips/loongson64/common/init.c
+++ b/arch/mips/loongson64/common/init.c
@@ -8,7 +8,7 @@
  * option) any later version.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/bootinfo.h>
 #include <asm/traps.h>
 #include <asm/smp-ops.h>
diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c
index 703ad4536fe0..622761878cd1 100644
--- a/arch/mips/loongson64/loongson-3/numa.c
+++ b/arch/mips/loongson64/loongson-3/numa.c
@@ -18,7 +18,6 @@
 #include <linux/nodemask.h>
 #include <linux/swap.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/pfn.h>
 #include <linux/highmem.h>
 #include <asm/page.h>
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 0893b6136498..b521d8e2d359 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -22,7 +22,7 @@
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/proc_fs.h>
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index b19a3c506b1e..e2a33adc0f29 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -7,7 +7,7 @@
  */
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <asm/fixmap.h>
 #include <asm/pgtable.h>
diff --git a/arch/mips/mti-malta/malta-memory.c b/arch/mips/mti-malta/malta-memory.c
index a47556723b85..868921adef1d 100644
--- a/arch/mips/mti-malta/malta-memory.c
+++ b/arch/mips/mti-malta/malta-memory.c
@@ -12,7 +12,7 @@
  *          Steven J. Hill <sjhill@mips.com>
  */
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 
 #include <asm/bootinfo.h>
diff --git a/arch/mips/netlogic/xlp/dt.c b/arch/mips/netlogic/xlp/dt.c
index b5ba83f4c646..c856f2a3ea42 100644
--- a/arch/mips/netlogic/xlp/dt.c
+++ b/arch/mips/netlogic/xlp/dt.c
@@ -33,7 +33,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <linux/of_fdt.h>
 #include <linux/of_platform.h>
diff --git a/arch/mips/pci/pci-legacy.c b/arch/mips/pci/pci-legacy.c
index 3c3b1e6abb53..687513880fbf 100644
--- a/arch/mips/pci/pci-legacy.c
+++ b/arch/mips/pci/pci-legacy.c
@@ -11,7 +11,7 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/types.h>
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index c2e94cf5ecda..e68b44b27c0d 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -11,7 +11,7 @@
 #include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/types.h>
diff --git a/arch/mips/ralink/of.c b/arch/mips/ralink/of.c
index 1ada8492733b..d544e7b07f7a 100644
--- a/arch/mips/ralink/of.c
+++ b/arch/mips/ralink/of.c
@@ -14,7 +14,7 @@
 #include <linux/sizes.h>
 #include <linux/of_fdt.h>
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 
diff --git a/arch/mips/rb532/prom.c b/arch/mips/rb532/prom.c
index 6484e4a4597b..361a690facbf 100644
--- a/arch/mips/rb532/prom.c
+++ b/arch/mips/rb532/prom.c
@@ -29,7 +29,7 @@
 #include <linux/export.h>
 #include <linux/string.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <linux/blkdev.h>
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index cb1f1a6a166d..d8b8444d6795 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -18,7 +18,6 @@
 #include <linux/export.h>
 #include <linux/nodemask.h>
 #include <linux/swap.h>
-#include <linux/bootmem.h>
 #include <linux/pfn.h>
 #include <linux/highmem.h>
 #include <asm/page.h>
diff --git a/arch/mips/sibyte/common/cfe.c b/arch/mips/sibyte/common/cfe.c
index 092fb2a6ec4a..12a780f251e1 100644
--- a/arch/mips/sibyte/common/cfe.c
+++ b/arch/mips/sibyte/common/cfe.c
@@ -21,7 +21,7 @@
 #include <linux/linkage.h>
 #include <linux/mm.h>
 #include <linux/blkdev.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pm.h>
 #include <linux/smp.h>
 
diff --git a/arch/mips/sibyte/swarm/setup.c b/arch/mips/sibyte/swarm/setup.c
index 152ca71cc2d7..3b034b7178d6 100644
--- a/arch/mips/sibyte/swarm/setup.c
+++ b/arch/mips/sibyte/swarm/setup.c
@@ -23,7 +23,7 @@
 
 #include <linux/spinlock.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/blkdev.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/arch/mips/txx9/rbtx4938/prom.c b/arch/mips/txx9/rbtx4938/prom.c
index bcb469247e8c..2b36a2ee744c 100644
--- a/arch/mips/txx9/rbtx4938/prom.c
+++ b/arch/mips/txx9/rbtx4938/prom.c
@@ -11,7 +11,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/bootinfo.h>
 #include <asm/txx9/generic.h>
 #include <asm/txx9/rbtx4938.h>
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index 63a1a5ef5219..eacc79024879 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -2,9 +2,8 @@
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #include <linux/cpu.h>
-#include <linux/bootmem.h>
-#include <linux/seq_file.h>
 #include <linux/memblock.h>
+#include <linux/seq_file.h>
 #include <linux/console.h>
 #include <linux/screen_info.h>
 #include <linux/delay.h>
diff --git a/arch/nds32/mm/highmem.c b/arch/nds32/mm/highmem.c
index e17cb8a69315..022779af6148 100644
--- a/arch/nds32/mm/highmem.c
+++ b/arch/nds32/mm/highmem.c
@@ -6,7 +6,7 @@
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/fixmap.h>
 #include <asm/tlbflush.h>
 
diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c
index 66d3e9cf498d..131104bd2538 100644
--- a/arch/nds32/mm/init.c
+++ b/arch/nds32/mm/init.c
@@ -7,12 +7,11 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
 #include <linux/highmem.h>
-#include <linux/memblock.h>
 
 #include <asm/sections.h>
 #include <asm/setup.h>
diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c
index a6d4f7530247..232a36b511aa 100644
--- a/arch/nios2/kernel/prom.c
+++ b/arch/nios2/kernel/prom.c
@@ -25,7 +25,7 @@
 
 #include <linux/init.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
 #include <linux/io.h>
diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c
index 2d0011ddd4d5..6bbd4ae2beb0 100644
--- a/arch/nios2/kernel/setup.c
+++ b/arch/nios2/kernel/setup.c
@@ -16,7 +16,6 @@
 #include <linux/sched.h>
 #include <linux/sched/task.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <linux/of_fdt.h>
diff --git a/arch/nios2/mm/init.c b/arch/nios2/mm/init.c
index 12923501d94f..16cea5776b87 100644
--- a/arch/nios2/mm/init.c
+++ b/arch/nios2/mm/init.c
@@ -23,7 +23,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/binfmts.h>
 
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index e17fcd83120f..c605bdad1746 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -30,13 +30,12 @@
 #include <linux/delay.h>
 #include <linux/console.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/serial.h>
 #include <linux/initrd.h>
 #include <linux/of_fdt.h>
 #include <linux/of.h>
-#include <linux/memblock.h>
 #include <linux/device.h>
 
 #include <asm/sections.h>
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index 91a6a9ab7598..d157310eb377 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -26,12 +26,11 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/smp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/blkdev.h>	/* for initrd_* */
 #include <linux/pagemap.h>
-#include <linux/memblock.h>
 
 #include <asm/segment.h>
 #include <asm/pgalloc.h>
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 7e7a3126c5e9..2d7cffcaa476 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -14,7 +14,6 @@
 
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/delay.h>
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index d39ec3a4550a..274bd1442dd9 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -10,7 +10,7 @@
 #include <linux/capability.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/syscalls.h>
 #include <linux/irq.h>
 #include <linux/list.h>
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2b56d1f30387..93ee3703b42f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -33,7 +33,6 @@
 #include <linux/serial_8250.h>
 #include <linux/percpu.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/of_platform.h>
 #include <linux/hugetlb.h>
 #include <asm/debugfs.h>
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 9216c3a7fcfc..2a51e4cc8246 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -29,10 +29,9 @@
 #include <linux/unistd.h>
 #include <linux/serial.h>
 #include <linux/serial_8250.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pci.h>
 #include <linux/lockdep.h>
-#include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/nmi.h>
 
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index bf87d6e13369..5b61704447c1 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -2,7 +2,7 @@
 #include <linux/types.h>
 #include <linux/init.h>
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/string.h>
 #include <asm/setup.h>
 
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a7226ed9cae6..8cf035e68378 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -15,7 +15,6 @@
 #include <linux/export.h>
 #include <linux/of_fdt.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/moduleparam.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index b3fe79064a69..0a64fffabee1 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -27,12 +27,11 @@
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
 #include <linux/suspend.h>
-#include <linux/memblock.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 954f1986af4d..67b9d7b669a1 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -44,7 +44,7 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f04f15f9d232..3a048e98a132 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -11,7 +11,7 @@
 #define pr_fmt(fmt) "numa: " fmt
 
 #include <linux/threads.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <linux/mmzone.h>
@@ -19,7 +19,6 @@
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
 #include <linux/notifier.h>
-#include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/pfn.h>
 #include <linux/cpuset.h>
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index f45b369177a4..f3391be7c762 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -18,7 +18,7 @@
 #include <linux/errno.h>
 #include <linux/adb.h>
 #include <linux/pmu.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/completion.h>
 #include <linux/spinlock.h>
 #include <asm/sections.h>
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 23a67b545b70..aba81cbf0b36 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -17,11 +17,10 @@
 #include <linux/delay.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/msi.h>
-#include <linux/memblock.h>
 #include <linux/iommu.h>
 #include <linux/rculist.h>
 #include <linux/sizes.h>
diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c
index 12519857a33c..658bfab3350b 100644
--- a/arch/powerpc/platforms/ps3/setup.c
+++ b/arch/powerpc/platforms/ps3/setup.c
@@ -24,7 +24,7 @@
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 349a9ff6ca5b..2444feda831f 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -12,7 +12,7 @@
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
 #include <linux/bitmap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <asm/msi_bitmap.h>
 #include <asm/setup.h>
 
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index d58c111099b3..1d9bfaff60bc 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -13,9 +13,8 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
-#include <linux/initrd.h>
 #include <linux/memblock.h>
+#include <linux/initrd.h>
 #include <linux/swap.h>
 #include <linux/sizes.h>
 
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index d17566a8c76f..97eae3871868 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -13,10 +13,9 @@
 #include <linux/mm.h>
 #include <linux/gfp.h>
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/elf.h>
 #include <asm/asm-offsets.h>
-#include <linux/memblock.h>
 #include <asm/os_info.h>
 #include <asm/elf.h>
 #include <asm/ipl.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 781c1053a773..72dd23ef771b 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -34,7 +34,6 @@
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/root_dev.h>
 #include <linux/console.h>
 #include <linux/kernel_stat.h>
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 44f9a7d6450b..f82b3d3c36e2 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -20,7 +20,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/workqueue.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/mm.h>
@@ -35,7 +35,6 @@
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/crash_dump.h>
-#include <linux/memblock.h>
 #include <linux/kprobes.h>
 #include <asm/asm-offsets.h>
 #include <asm/diag.h>
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 799a91882a76..8992b04c0ade 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -8,7 +8,7 @@
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
 #include <linux/workqueue.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/uaccess.h>
 #include <linux/sysctl.h>
 #include <linux/cpuset.h>
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index ec31b48a42a5..ebe748a9f472 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -18,7 +18,7 @@
 #include <linux/user.h>
 #include <linux/elf.h>
 #include <linux/security.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/compat.h>
 #include <asm/asm-offsets.h>
 #include <asm/pgtable.h>
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 84111a43ea29..eba2def3414d 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -16,7 +16,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ctype.h>
 #include <linux/ioport.h>
 #include <asm/diag.h>
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 873f6ee1c46d..76d0708438e9 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -21,7 +21,7 @@
 #include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/pfn.h>
 #include <linux/poison.h>
@@ -29,7 +29,6 @@
 #include <linux/export.h>
 #include <linux/cma.h>
 #include <linux/gfp.h>
-#include <linux/memblock.h>
 #include <asm/processor.h>
 #include <linux/uaccess.h>
 #include <asm/pgtable.h>
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 04638b0b9ef1..0472e27febdf 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -4,14 +4,13 @@
  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
-#include <linux/memblock.h>
 #include <asm/cacheflush.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
diff --git a/arch/s390/numa/mode_emu.c b/arch/s390/numa/mode_emu.c
index 5a381fc8e958..bfba273c32c0 100644
--- a/arch/s390/numa/mode_emu.c
+++ b/arch/s390/numa/mode_emu.c
@@ -22,7 +22,6 @@
 #include <linux/kernel.h>
 #include <linux/cpumask.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/node.h>
 #include <linux/memory.h>
 #include <linux/slab.h>
diff --git a/arch/s390/numa/numa.c b/arch/s390/numa/numa.c
index 297f5d8b0890..ae0d9e889534 100644
--- a/arch/s390/numa/numa.c
+++ b/arch/s390/numa/numa.c
@@ -13,7 +13,6 @@
 #include <linux/kernel.h>
 #include <linux/mmzone.h>
 #include <linux/cpumask.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/node.h>
diff --git a/arch/s390/numa/toptree.c b/arch/s390/numa/toptree.c
index 7f61cc3fd4d1..71a608cd4f61 100644
--- a/arch/s390/numa/toptree.c
+++ b/arch/s390/numa/toptree.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/cpumask.h>
 #include <linux/list.h>
 #include <linux/list_sort.h>
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 21447f866415..c8c13c777162 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -11,12 +11,11 @@
 #include <linux/swap.h>
 #include <linux/init.h>
 #include <linux/gfp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/pagemap.h>
 #include <linux/percpu.h>
 #include <linux/io.h>
-#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
 #include <asm/mmu_context.h>
diff --git a/arch/sh/mm/ioremap_fixed.c b/arch/sh/mm/ioremap_fixed.c
index 927a1294c465..07e744d75fa0 100644
--- a/arch/sh/mm/ioremap_fixed.c
+++ b/arch/sh/mm/ioremap_fixed.c
@@ -14,7 +14,7 @@
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/io.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <asm/fixmap.h>
 #include <asm/page.h>
diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c
index a41526bd91e2..9a26b442f820 100644
--- a/arch/sparc/kernel/mdesc.c
+++ b/arch/sparc/kernel/mdesc.c
@@ -5,13 +5,11 @@
  */
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <linux/memblock.h>
 #include <linux/log2.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/miscdevice.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/refcount.h>
diff --git a/arch/sparc/kernel/prom_32.c b/arch/sparc/kernel/prom_32.c
index 4389944735c6..d41e2a749c5d 100644
--- a/arch/sparc/kernel/prom_32.c
+++ b/arch/sparc/kernel/prom_32.c
@@ -19,7 +19,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/prom.h>
 #include <asm/oplib.h>
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index de7c87b153fe..cd2825cb8420 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -32,7 +32,7 @@
 #include <linux/initrd.h>
 #include <linux/module.h>
 #include <linux/start_kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/io.h>
 #include <asm/processor.h>
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 6cc80d0f4b9f..4792e08ad36b 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -22,7 +22,7 @@
 #include <linux/cache.h>
 #include <linux/jiffies.h>
 #include <linux/profile.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 880714565c40..d900952bfc5f 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -22,7 +22,6 @@
 #include <linux/initrd.h>
 #include <linux/init.h>
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/poison.h>
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index a8c3453195e6..3c8aac21f426 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -11,7 +11,7 @@
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/initrd.h>
@@ -25,7 +25,6 @@
 #include <linux/sort.h>
 #include <linux/ioport.h>
 #include <linux/percpu.h>
-#include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/gfp.h>
 
diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index b48fea5ad9ef..a6142c5abf61 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c
@@ -11,7 +11,7 @@
 
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/vmalloc.h>
 #include <linux/kdebug.h>
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index ef19a391214f..673816880cce 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -6,7 +6,7 @@
  * Licensed under the GPL.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/inetdevice.h>
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 20442d20bd09..2b4dded11a7a 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/version.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/inetdevice.h>
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 844056cf313e..3678f5b05e42 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -4,7 +4,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/initrd.h>
 #include <asm/types.h>
 #include <init.h>
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 2c672a8f4571..1067469ba2ea 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -5,7 +5,6 @@
 
 #include <linux/stddef.h>
 #include <linux/module.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 296a91a04598..5bf56af4d5b9 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -4,7 +4,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
diff --git a/arch/unicore32/kernel/hibernate.c b/arch/unicore32/kernel/hibernate.c
index 9969ec374abb..29b71c68eb7c 100644
--- a/arch/unicore32/kernel/hibernate.c
+++ b/arch/unicore32/kernel/hibernate.c
@@ -13,7 +13,7 @@
 
 #include <linux/gfp.h>
 #include <linux/suspend.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index 9f163f976315..b2c38b32ea57 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -17,7 +17,7 @@
 #include <linux/utsname.h>
 #include <linux/initrd.h>
 #include <linux/console.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/screen_info.h>
 #include <linux/init.h>
@@ -27,7 +27,6 @@
 #include <linux/smp.h>
 #include <linux/fs.h>
 #include <linux/proc_fs.h>
-#include <linux/memblock.h>
 #include <linux/elf.h>
 #include <linux/io.h>
 
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index 44fd0e8fbe87..cf4eb9481fd6 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -11,13 +11,12 @@
 #include <linux/errno.h>
 #include <linux/swap.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mman.h>
 #include <linux/nodemask.h>
 #include <linux/initrd.h>
 #include <linux/highmem.h>
 #include <linux/gfp.h>
-#include <linux/memblock.h>
 #include <linux/sort.h>
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
diff --git a/arch/unicore32/mm/mmu.c b/arch/unicore32/mm/mmu.c
index 18b355a20f0b..040a8c279761 100644
--- a/arch/unicore32/mm/mmu.c
+++ b/arch/unicore32/mm/mmu.c
@@ -17,7 +17,6 @@
 #include <linux/nodemask.h>
 #include <linux/memblock.h>
 #include <linux/fs.h>
-#include <linux/bootmem.h>
 #include <linux/io.h>
 
 #include <asm/cputype.h>
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index f5ea6415b778..7f5d212551d4 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -32,7 +32,7 @@
 #include <linux/dmi.h>
 #include <linux/irq.h>
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
 #include <linux/efi-bgrt.h>
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index f1915b744052..ca13851f0570 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -7,7 +7,6 @@
  */
 
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/dmi.h>
 #include <linux/cpumask.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ab731ab09f06..32b2b7a41ef5 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -20,7 +20,7 @@
 #include <linux/acpi_pmtmr.h>
 #include <linux/clockchips.h>
 #include <linux/interrupt.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ftrace.h>
 #include <linux/ioport.h>
 #include <linux/export.h>
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 8c7450900e0e..5fbc57e4b0b9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -47,7 +47,7 @@
 #include <linux/kthread.h>
 #include <linux/jiffies.h>	/* time_after() */
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/irqdomain.h>
 #include <asm/io.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 660d0b22e962..cbbd57ae06ee 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,7 +1,7 @@
 /* cpu_feature_enabled() cannot be used this early */
 #define USE_EARLY_PGTABLE_L5
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/linkage.h>
 #include <linux/bitops.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a0ec4c37265a..68ff62bffbab 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -9,11 +9,10 @@
  * allocation code routines via a platform independent interface (memblock, etc.).
  */
 #include <linux/crash_dump.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/suspend.h>
 #include <linux/acpi.h>
 #include <linux/firmware-map.h>
-#include <linux/memblock.h>
 #include <linux/sort.h>
 
 #include <asm/e820/api.h>
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f1c5eb99d445..3482460d984d 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -11,7 +11,6 @@
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/delay.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 7ba73fe0d917..f4562fcec681 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -3,7 +3,7 @@
 #include <linux/dma-debug.h>
 #include <linux/dmar.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/pci.h>
 
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 71c0b01d93b1..bd08b9e1c9e2 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -5,7 +5,7 @@
 #include <linux/cache.h>
 #include <linux/init.h>
 #include <linux/swiotlb.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-direct.h>
 #include <linux/mem_encrypt.h>
 
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 637982efecd8..9b158b4716d2 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -20,7 +20,7 @@
 #include <linux/notifier.h>
 #include <linux/sched.h>
 #include <linux/gfp.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/nmi.h>
 
 #include <asm/fixmap.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 7005f89bf3b2..b74e7bfed6ab 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -30,7 +30,6 @@
 #include <linux/sfi.h>
 #include <linux/apm_bios.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
 #include <linux/console.h>
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 483412fb8a24..e8796fcd7e5a 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -4,7 +4,6 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/percpu.h>
 #include <linux/kexec.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 5369d7fac797..a9134d1910b9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -49,7 +49,7 @@
 #include <linux/sched/hotplug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/percpu.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/nmi.h>
 #include <linux/tboot.h>
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index 75730ce01f8d..285aaa62d153 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -30,7 +30,6 @@
 #include <linux/string.h>
 #include <linux/pci.h>
 #include <linux/dma-mapping.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <asm/tce.h>
 #include <asm/calgary.h>
diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c
index 048c761d97b0..058b2f36b3a6 100644
--- a/arch/x86/mm/amdtopology.c
+++ b/arch/x86/mm/amdtopology.c
@@ -12,7 +12,6 @@
 #include <linux/string.h>
 #include <linux/nodemask.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <linux/pci_ids.h>
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b24eb4eb9984..71d4b9d4d43f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -8,7 +8,7 @@
 #include <linux/sched/task_stack.h>	/* task_stack_*(), ...		*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/extable.h>		/* search_exception_tables	*/
-#include <linux/bootmem.h>		/* max_low_pfn			*/
+#include <linux/memblock.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
 #include <linux/perf_event.h>		/* perf_sw_event		*/
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 62915a5e0fa2..0d4bdcb84da5 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -1,7 +1,7 @@
 #include <linux/highmem.h>
 #include <linux/export.h>
 #include <linux/swap.h> /* for totalram_pages */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 void *kmap(struct page *page)
 {
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index faca978ebf9d..ef99f3892e1f 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -3,7 +3,6 @@
 #include <linux/ioport.h>
 #include <linux/swap.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>	/* for max_low_pfn */
 #include <linux/swapfile.h>
 #include <linux/swapops.h>
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3bbe5f58a67d..49ecf5ecf6d3 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -23,7 +23,6 @@
 #include <linux/pci.h>
 #include <linux/pfn.h>
 #include <linux/poison.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/memory_hotplug.h>
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bfb0bedc21d3..5fab264948c2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -20,7 +20,6 @@
 #include <linux/init.h>
 #include <linux/initrd.h>
 #include <linux/pagemap.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/proc_fs.h>
 #include <linux/pci.h>
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 24e0920a9b25..5378d10f1d31 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -6,7 +6,7 @@
  * (C) Copyright 1995 1996 Linus Torvalds
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/ioport.h>
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8f87499124b8..04a9cf6b034f 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -5,10 +5,9 @@
 /* cpu_feature_enabled() cannot be used this early */
 #define USE_EARLY_PGTABLE_L5
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kasan.h>
 #include <linux/kdebug.h>
-#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index 61db77b0eda9..3f452ffed7e9 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -23,6 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/memblock.h>
 
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 16e37d712ffd..1308f5408bf7 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -4,7 +4,6 @@
 #include <linux/mm.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/mmzone.h>
 #include <linux/ctype.h>
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index e8a4a09e20f1..f2bd3d61e16b 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -22,7 +22,6 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/init.h>
 
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 066f3511d5f1..59d80160fa5a 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -3,7 +3,7 @@
  * Generic VM initialization for x86-64 NUMA setups.
  * Copyright 2002,2003 Andi Kleen, SuSE Labs.
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include "numa_internal.h"
 
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index b54d52a2d00a..a80fdd7fb40f 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -6,7 +6,6 @@
 #include <linux/errno.h>
 #include <linux/topology.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <asm/dma.h>
 
 #include "numa_internal.h"
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index a25588ad75ef..08f8f76a4852 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -5,7 +5,7 @@
  * Clears the a test pte bit on random pages in the direct mapping,
  * then reverts and compares page tables forwards and afterwards.
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kthread.h>
 #include <linux/random.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 62bb30b4bd2a..f799076e3d57 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -3,7 +3,7 @@
  * Thanks to Ben LaHaise for precious feedback.
  */
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/interrupt.h>
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3d0c83ef6aab..08013524fba1 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -8,7 +8,7 @@
  */
 
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/debugfs.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index 7f9acb68324c..bdc98150d4db 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/mmdebug.h>
 #include <linux/export.h>
 #include <linux/mm.h>
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index ed4ac215305d..8cd66152cdb0 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -32,7 +32,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/pat.h>
 #include <asm/e820/api.h>
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 9061babfbc83..7ae939e353cd 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -36,9 +36,8 @@
 #include <linux/efi.h>
 #include <linux/efi-bgrt.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
-#include <linux/slab.h>
 #include <linux/memblock.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
 #include <linux/time.h>
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index e8da7f492970..cf0347f61b21 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -23,7 +23,7 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <linux/spinlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ioport.h>
 #include <linux/mc146818rtc.h>
 #include <linux/efi.h>
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 4b70d0f5a803..95e77a667ba5 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -8,7 +8,6 @@
 #include <linux/efi.h>
 #include <linux/slab.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 140cd76ee897..115c8e4173bb 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -17,7 +17,7 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/of_pdt.h>
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 15695e30f982..be15bdcb20df 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -8,7 +8,7 @@
 
 #include <linux/gfp.h>
 #include <linux/suspend.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 67b2f31a1265..e996e8e744cb 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #endif
 #include <linux/cpu.h>
 #include <linux/kexec.h>
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index ec7a4209f310..2f6787fc7106 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -23,7 +23,7 @@
 #include <linux/start_kernel.h>
 #include <linux/sched.h>
 #include <linux/kprobes.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/page-flags.h>
@@ -31,7 +31,6 @@
 #include <linux/console.h>
 #include <linux/pci.h>
 #include <linux/gfp.h>
-#include <linux/memblock.h>
 #include <linux/edd.h>
 #include <linux/frame.h>
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index b3e11afed25b..b06731705529 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -67,7 +67,6 @@
 #include <linux/hash.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 21f13e9aabe1..5ca440a74316 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -24,7 +24,7 @@
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <asm/pci-bridge.h>
 #include <asm/platform.h>
diff --git a/arch/xtensa/mm/cache.c b/arch/xtensa/mm/cache.c
index 9220dcde7520..b27359e2a464 100644
--- a/arch/xtensa/mm/cache.c
+++ b/arch/xtensa/mm/cache.c
@@ -21,7 +21,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/ptrace.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swap.h>
 #include <linux/pagemap.h>
 
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index f7fbe6334939..9750a48f491b 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -18,7 +18,7 @@
 
 #include <linux/kernel.h>
 #include <linux/errno.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
diff --git a/arch/xtensa/mm/kasan_init.c b/arch/xtensa/mm/kasan_init.c
index 1a30a258ccd0..6b95ca43aec0 100644
--- a/arch/xtensa/mm/kasan_init.c
+++ b/arch/xtensa/mm/kasan_init.c
@@ -8,11 +8,10 @@
  * Copyright (C) 2017 Cadence Design Systems Inc.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init_task.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
-#include <linux/memblock.h>
 #include <asm/initialize_mmu.h>
 #include <asm/tlbflush.h>
 #include <asm/traps.h>
diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c
index f33a1ff2662a..a4dcfd39bc5c 100644
--- a/arch/xtensa/mm/mmu.c
+++ b/arch/xtensa/mm/mmu.c
@@ -4,7 +4,7 @@
  *
  * Extracted from init.c
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/percpu.h>
 #include <linux/init.h>
 #include <linux/string.h>
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index 206b9d4591e8..190846dddc67 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -30,7 +30,7 @@
 #include <linux/etherdevice.h>
 #include <linux/interrupt.h>
 #include <linux/ioctl.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/ethtool.h>
 #include <linux/rtnetlink.h>
 #include <linux/platform_device.h>
diff --git a/arch/xtensa/platforms/iss/setup.c b/arch/xtensa/platforms/iss/setup.c
index 58709e89a8ed..c14cc673976c 100644
--- a/arch/xtensa/platforms/iss/setup.c
+++ b/arch/xtensa/platforms/iss/setup.c
@@ -16,7 +16,7 @@
  * option) any later version.
  *
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/stddef.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/block/blk-settings.c b/block/blk-settings.c
index ffd459969689..696c04c1ab6c 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -6,7 +6,7 @@
 #include <linux/init.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
-#include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
+#include <linux/memblock.h>	/* for max_pfn/max_low_pfn */
 #include <linux/gcd.h>
 #include <linux/lcm.h>
 #include <linux/jiffies.h>
diff --git a/block/bounce.c b/block/bounce.c
index ec0d99995f5f..cf49fe02f65c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -18,7 +18,7 @@
 #include <linux/init.h>
 #include <linux/hash.h>
 #include <linux/highmem.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/printk.h>
 #include <asm/tlbflush.h>
 
diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c
index 85167603b9c9..274699463b4f 100644
--- a/drivers/acpi/numa.c
+++ b/drivers/acpi/numa.c
@@ -27,7 +27,6 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/numa.h>
 #include <linux/nodemask.h>
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index a3d012b08fc5..61203eebf3a1 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -31,9 +31,8 @@
 #include <linux/irq.h>
 #include <linux/errno.h>
 #include <linux/acpi.h>
-#include <linux/bootmem.h>
-#include <linux/earlycpio.h>
 #include <linux/memblock.h>
+#include <linux/earlycpio.h>
 #include <linux/initrd.h>
 #include "internal.h"
 
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 23cf4427f425..41b91af95afb 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -16,7 +16,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/dma-mapping.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 5c54d3734daf..5b2867a33b98 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -23,7 +23,7 @@
 #include <linux/of_address.h>
 #include <linux/list.h>
 #include <linux/regmap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/device.h>
 
 #include "clock.h"
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index f2483548cde9..099d83e4e910 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -5,7 +5,7 @@
 #include <linux/ctype.h>
 #include <linux/dmi.h>
 #include <linux/efi.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/random.h>
 #include <asm/dmi.h>
 #include <asm/unaligned.h>
diff --git a/drivers/firmware/efi/apple-properties.c b/drivers/firmware/efi/apple-properties.c
index 2b675f788b61..ac1654f74dc7 100644
--- a/drivers/firmware/efi/apple-properties.c
+++ b/drivers/firmware/efi/apple-properties.c
@@ -20,7 +20,7 @@
 
 #define pr_fmt(fmt) "apple-properties: " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/efi.h>
 #include <linux/io.h>
 #include <linux/platform_data/x86/apple.h>
diff --git a/drivers/firmware/iscsi_ibft_find.c b/drivers/firmware/iscsi_ibft_find.c
index 2224f1dc074b..72d9ea18270b 100644
--- a/drivers/firmware/iscsi_ibft_find.c
+++ b/drivers/firmware/iscsi_ibft_find.c
@@ -18,7 +18,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
 #include <linux/device.h>
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 03cead6d5f97..2a23453f005a 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -19,7 +19,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
 
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index f9f69f7111a9..44bd5b9166bb 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -11,7 +11,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bug.h>
 #include <linux/clk.h>
 #include <linux/component.h>
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 676c029494e4..0e780848f59b 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -13,7 +13,7 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bug.h>
 #include <linux/clk.h>
 #include <linux/component.h>
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 0069f9084f9f..880a81c82b7a 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -23,7 +23,7 @@
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/dmapool.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/jiffies.h>
@@ -38,7 +38,6 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/slab.h>
-#include <linux/memblock.h>
 #include <linux/sched/signal.h>
 
 #include <asm/byteorder.h>
diff --git a/drivers/mtd/ar7part.c b/drivers/mtd/ar7part.c
index fc15ec58230a..0d33cf0842ad 100644
--- a/drivers/mtd/ar7part.c
+++ b/drivers/mtd/ar7part.c
@@ -25,7 +25,7 @@
 
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/module.h>
 
 #include <uapi/linux/magic.h>
diff --git a/drivers/net/arcnet/arc-rimi.c b/drivers/net/arcnet/arc-rimi.c
index a07e24970be4..11c5bad95226 100644
--- a/drivers/net/arcnet/arc-rimi.c
+++ b/drivers/net/arcnet/arc-rimi.c
@@ -33,7 +33,7 @@
 #include <linux/ioport.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/drivers/net/arcnet/com20020-isa.c b/drivers/net/arcnet/com20020-isa.c
index 38fa60ddaf2e..28510e33924f 100644
--- a/drivers/net/arcnet/com20020-isa.c
+++ b/drivers/net/arcnet/com20020-isa.c
@@ -38,7 +38,7 @@
 #include <linux/netdevice.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/io.h>
 
 #include "arcdevice.h"
diff --git a/drivers/net/arcnet/com90io.c b/drivers/net/arcnet/com90io.c
index 4e56aaf2b984..2c546013a980 100644
--- a/drivers/net/arcnet/com90io.c
+++ b/drivers/net/arcnet/com90io.c
@@ -34,7 +34,7 @@
 #include <linux/ioport.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index ffe62a7ae19b..bb532aae0d92 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -11,7 +11,6 @@
 #include <linux/crc32.h>
 #include <linux/kernel.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/mutex.h>
 #include <linux/of.h>
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 01e23b85e798..49ae2aa744d6 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -5,7 +5,7 @@
 
 #define pr_fmt(fmt) "### dt-test ### " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/errno.h>
diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c
index 16a4e8528bbc..8f3a2eeb28dc 100644
--- a/drivers/s390/char/fs3270.c
+++ b/drivers/s390/char/fs3270.c
@@ -8,7 +8,7 @@
  *     Copyright IBM Corp. 2003, 2009
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/console.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
index 5b8af2782282..2b0c36c2c568 100644
--- a/drivers/s390/char/tty3270.c
+++ b/drivers/s390/char/tty3270.c
@@ -19,7 +19,7 @@
 #include <linux/workqueue.h>
 
 #include <linux/slab.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/compat.h>
 
 #include <asm/ccwdev.h>
diff --git a/drivers/s390/cio/cmf.c b/drivers/s390/cio/cmf.c
index 8af4948dae80..72dd2471ec1e 100644
--- a/drivers/s390/cio/cmf.c
+++ b/drivers/s390/cio/cmf.c
@@ -13,7 +13,7 @@
 #define KMSG_COMPONENT "cio"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/list.h>
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 8f5c1d7f751a..97b6f197f007 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -9,7 +9,7 @@
 
 #include <linux/kernel_stat.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c
index 153b3f3cc795..a5136901dd8a 100644
--- a/drivers/sfi/sfi_core.c
+++ b/drivers/sfi/sfi_core.c
@@ -59,7 +59,7 @@
 #define KMSG_COMPONENT "SFI"
 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/errno.h>
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index 79ad30d34949..b929c7ae3a27 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -24,7 +24,7 @@
 #include <linux/console.h>
 #include <linux/sysrq.h>
 #include <linux/device.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 #include <linux/fs_uart_pd.h>
 #include <linux/of_address.h>
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c
index 4eba17f3d293..56fc527015cb 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_cpm1.c
@@ -19,7 +19,7 @@
 #include <linux/console.h>
 #include <linux/sysrq.h>
 #include <linux/device.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/io.h>
diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c b/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c
index e3bff068dc3c..6a1cd03bfe39 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_cpm2.c
@@ -19,7 +19,7 @@
 #include <linux/console.h>
 #include <linux/sysrq.h>
 #include <linux/device.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/dma-mapping.h>
 
 #include <asm/io.h>
diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c
index ddc5fa88f268..d2652dccc699 100644
--- a/drivers/usb/early/xhci-dbc.c
+++ b/drivers/usb/early/xhci-dbc.c
@@ -12,7 +12,6 @@
 #include <linux/console.h>
 #include <linux/pci_regs.h>
 #include <linux/pci_ids.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/io.h>
 #include <asm/pci-direct.h>
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index e12bb256036f..a3f5cbfcd4a1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -44,7 +44,7 @@
 #include <linux/cred.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/mutex.h>
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index e6c1934734b7..93194f3e7540 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -28,7 +28,7 @@
 #include <linux/irq.h>
 #include <linux/moduleparam.h>
 #include <linux/string.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/slab.h>
 #include <linux/irqnr.h>
 #include <linux/pci.h>
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 84575baceebc..f15f89df1f36 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -33,7 +33,7 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index c5f26a87d238..2a7f545bd0b5 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -35,7 +35,6 @@
 
 #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
 
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/dma-direct.h>
 #include <linux/export.h>
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 55988b8418ee..5165aa82bf7d 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -68,7 +68,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/swap.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
diff --git a/fs/dcache.c b/fs/dcache.c
index c2e443fb76ae..2593153471cf 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -26,7 +26,7 @@
 #include <linux/export.h>
 #include <linux/security.h>
 #include <linux/seqlock.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/bit_spinlock.h>
 #include <linux/rculist_bl.h>
 #include <linux/list_lru.h>
diff --git a/fs/inode.c b/fs/inode.c
index 9b808986d440..9e198f00b64c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -10,7 +10,7 @@
 #include <linux/swap.h>
 #include <linux/security.h>
 #include <linux/cdev.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/fsnotify.h>
 #include <linux/mount.h>
 #include <linux/posix_acl.h>
diff --git a/fs/namespace.c b/fs/namespace.c
index d86830c86ce8..98d27da43304 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -23,7 +23,7 @@
 #include <linux/uaccess.h>
 #include <linux/proc_ns.h>
 #include <linux/magic.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/task_work.h>
 #include <linux/sched/task.h>
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d297fe4472a9..bbcc185062bb 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -22,7 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/printk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 792c78a49174..6c517b11acf8 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/compiler.h>
 #include <linux/fs.h>
 #include <linux/init.h>
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 0377a104495b..3fe90443c1bb 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -16,7 +16,7 @@
 #include <linux/slab.h>
 #include <linux/highmem.h>
 #include <linux/printk.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/crash_dump.h>
 #include <linux/list.h>
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
deleted file mode 100644
index b58873a567b2..000000000000
--- a/include/linux/bootmem.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
- */
-#ifndef _LINUX_BOOTMEM_H
-#define _LINUX_BOOTMEM_H
-
-#include <linux/mmzone.h>
-#include <linux/mm_types.h>
-#include <asm/dma.h>
-#include <asm/processor.h>
-
-/*
- *  simple boot-time physical memory area allocator.
- */
-
-extern unsigned long max_low_pfn;
-extern unsigned long min_low_pfn;
-
-/*
- * highest page
- */
-extern unsigned long max_pfn;
-/*
- * highest possible page
- */
-extern unsigned long long max_possible_pfn;
-
-extern unsigned long memblock_free_all(void);
-extern void reset_node_managed_pages(pg_data_t *pgdat);
-extern void reset_all_zones_managed_pages(void);
-
-/* We are using top down, so it is safe to use 0 here */
-#define BOOTMEM_LOW_LIMIT 0
-
-#ifndef ARCH_LOW_ADDRESS_LIMIT
-#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
-#endif
-
-/* FIXME: use MEMBLOCK_ALLOC_* variants here */
-#define BOOTMEM_ALLOC_ACCESSIBLE	0
-#define BOOTMEM_ALLOC_ANYWHERE		(~(phys_addr_t)0)
-
-/* FIXME: Move to memblock.h at a point where we remove nobootmem.c */
-void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
-				      phys_addr_t min_addr,
-				      phys_addr_t max_addr, int nid);
-void *memblock_alloc_try_nid_nopanic(phys_addr_t size,
-		phys_addr_t align, phys_addr_t min_addr,
-		phys_addr_t max_addr, int nid);
-void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
-		phys_addr_t min_addr, phys_addr_t max_addr, int nid);
-void __memblock_free_early(phys_addr_t base, phys_addr_t size);
-void __memblock_free_late(phys_addr_t base, phys_addr_t size);
-
-static inline void * __init memblock_alloc(
-					phys_addr_t size,  phys_addr_t align)
-{
-	return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
-					    BOOTMEM_ALLOC_ACCESSIBLE,
-					    NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_raw(
-					phys_addr_t size,  phys_addr_t align)
-{
-	return memblock_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT,
-					    BOOTMEM_ALLOC_ACCESSIBLE,
-					    NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_from(
-		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
-{
-	return memblock_alloc_try_nid(size, align, min_addr,
-				      BOOTMEM_ALLOC_ACCESSIBLE,
-				      NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_nopanic(
-					phys_addr_t size, phys_addr_t align)
-{
-	return memblock_alloc_try_nid_nopanic(size, align,
-						    BOOTMEM_LOW_LIMIT,
-						    BOOTMEM_ALLOC_ACCESSIBLE,
-						    NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_low(
-					phys_addr_t size, phys_addr_t align)
-{
-	return memblock_alloc_try_nid(size, align,
-						   BOOTMEM_LOW_LIMIT,
-						   ARCH_LOW_ADDRESS_LIMIT,
-						   NUMA_NO_NODE);
-}
-static inline void * __init memblock_alloc_low_nopanic(
-					phys_addr_t size, phys_addr_t align)
-{
-	return memblock_alloc_try_nid_nopanic(size, align,
-						   BOOTMEM_LOW_LIMIT,
-						   ARCH_LOW_ADDRESS_LIMIT,
-						   NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_from_nopanic(
-		phys_addr_t size, phys_addr_t align, phys_addr_t min_addr)
-{
-	return memblock_alloc_try_nid_nopanic(size, align, min_addr,
-						    BOOTMEM_ALLOC_ACCESSIBLE,
-						    NUMA_NO_NODE);
-}
-
-static inline void * __init memblock_alloc_node(
-		phys_addr_t size, phys_addr_t align, int nid)
-{
-	return memblock_alloc_try_nid(size, align, BOOTMEM_LOW_LIMIT,
-					    BOOTMEM_ALLOC_ACCESSIBLE, nid);
-}
-
-static inline void * __init memblock_alloc_node_nopanic(
-						phys_addr_t size, int nid)
-{
-	return memblock_alloc_try_nid_nopanic(size, 0, BOOTMEM_LOW_LIMIT,
-						    BOOTMEM_ALLOC_ACCESSIBLE,
-						    nid);
-}
-
-static inline void __init memblock_free_early(
-					phys_addr_t base, phys_addr_t size)
-{
-	__memblock_free_early(base, size);
-}
-
-static inline void __init memblock_free_early_nid(
-				phys_addr_t base, phys_addr_t size, int nid)
-{
-	__memblock_free_early(base, size);
-}
-
-static inline void __init memblock_free_late(
-					phys_addr_t base, phys_addr_t size)
-{
-	__memblock_free_late(base, size);
-}
-
-extern void *alloc_large_system_hash(const char *tablename,
-				     unsigned long bucketsize,
-				     unsigned long numentries,
-				     int scale,
-				     int flags,
-				     unsigned int *_hash_shift,
-				     unsigned int *_hash_mask,
-				     unsigned long low_limit,
-				     unsigned long high_limit);
-
-#define HASH_EARLY	0x00000001	/* Allocating during early boot? */
-#define HASH_SMALL	0x00000002	/* sub-page allocation allowed, min
-					 * shift passed via *_hash_shift */
-#define HASH_ZERO	0x00000004	/* Zero allocated hash table */
-
-/* Only NUMA needs hash distribution. 64bit NUMA architectures have
- * sufficient vmalloc space.
- */
-#ifdef CONFIG_NUMA
-#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT)
-extern int hashdist;		/* Distribute hashes across NUMA nodes? */
-#else
-#define hashdist (0)
-#endif
-
-
-#endif /* _LINUX_BOOTMEM_H */
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 9d46a7204975..1b4d85879cbe 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -15,6 +15,19 @@
 
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <asm/dma.h>
+
+extern unsigned long max_low_pfn;
+extern unsigned long min_low_pfn;
+
+/*
+ * highest page
+ */
+extern unsigned long max_pfn;
+/*
+ * highest possible page
+ */
+extern unsigned long long max_possible_pfn;
 
 #define INIT_MEMBLOCK_REGIONS	128
 #define INIT_PHYSMEM_REGIONS	4
@@ -119,6 +132,10 @@ int memblock_mark_nomap(phys_addr_t base, phys_addr_t size);
 int memblock_clear_nomap(phys_addr_t base, phys_addr_t size);
 enum memblock_flags choose_memblock_flags(void);
 
+unsigned long memblock_free_all(void);
+void reset_node_managed_pages(pg_data_t *pgdat);
+void reset_all_zones_managed_pages(void);
+
 /* Low level functions */
 int memblock_add_range(struct memblock_type *type,
 		       phys_addr_t base, phys_addr_t size,
@@ -300,11 +317,116 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
 }
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
+/* Flags for memblock allocation APIs */
+#define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
+#define MEMBLOCK_ALLOC_ACCESSIBLE	0
+
+/* We are using top down, so it is safe to use 0 here */
+#define MEMBLOCK_LOW_LIMIT 0
+
+#ifndef ARCH_LOW_ADDRESS_LIMIT
+#define ARCH_LOW_ADDRESS_LIMIT  0xffffffffUL
+#endif
+
 phys_addr_t memblock_phys_alloc_nid(phys_addr_t size, phys_addr_t align, int nid);
 phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid);
 
 phys_addr_t memblock_phys_alloc(phys_addr_t size, phys_addr_t align);
 
+void *memblock_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align,
+				 phys_addr_t min_addr, phys_addr_t max_addr,
+				 int nid);
+void *memblock_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align,
+				     phys_addr_t min_addr, phys_addr_t max_addr,
+				     int nid);
+void *memblock_alloc_try_nid(phys_addr_t size, phys_addr_t align,
+			     phys_addr_t min_addr, phys_addr_t max_addr,
+			     int nid);
+
+static inline void * __init memblock_alloc(phys_addr_t size,  phys_addr_t align)
+{
+	return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+				      MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_raw(phys_addr_t size,
+					       phys_addr_t align)
+{
+	return memblock_alloc_try_nid_raw(size, align, MEMBLOCK_LOW_LIMIT,
+					  MEMBLOCK_ALLOC_ACCESSIBLE,
+					  NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_from(phys_addr_t size,
+						phys_addr_t align,
+						phys_addr_t min_addr)
+{
+	return memblock_alloc_try_nid(size, align, min_addr,
+				      MEMBLOCK_ALLOC_ACCESSIBLE, NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_nopanic(phys_addr_t size,
+						   phys_addr_t align)
+{
+	return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT,
+					      MEMBLOCK_ALLOC_ACCESSIBLE,
+					      NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_low(phys_addr_t size,
+					       phys_addr_t align)
+{
+	return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+				      ARCH_LOW_ADDRESS_LIMIT, NUMA_NO_NODE);
+}
+static inline void * __init memblock_alloc_low_nopanic(phys_addr_t size,
+						       phys_addr_t align)
+{
+	return memblock_alloc_try_nid_nopanic(size, align, MEMBLOCK_LOW_LIMIT,
+					      ARCH_LOW_ADDRESS_LIMIT,
+					      NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_from_nopanic(phys_addr_t size,
+							phys_addr_t align,
+							phys_addr_t min_addr)
+{
+	return memblock_alloc_try_nid_nopanic(size, align, min_addr,
+					      MEMBLOCK_ALLOC_ACCESSIBLE,
+					      NUMA_NO_NODE);
+}
+
+static inline void * __init memblock_alloc_node(phys_addr_t size,
+						phys_addr_t align, int nid)
+{
+	return memblock_alloc_try_nid(size, align, MEMBLOCK_LOW_LIMIT,
+				      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+}
+
+static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size,
+							int nid)
+{
+	return memblock_alloc_try_nid_nopanic(size, 0, MEMBLOCK_LOW_LIMIT,
+					      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
+}
+
+static inline void __init memblock_free_early(phys_addr_t base,
+					      phys_addr_t size)
+{
+	__memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_early_nid(phys_addr_t base,
+						  phys_addr_t size, int nid)
+{
+	__memblock_free_early(base, size);
+}
+
+static inline void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
+{
+	__memblock_free_late(base, size);
+}
+
 /*
  * Set the allocation direction to bottom-up or top-down.
  */
@@ -323,10 +445,6 @@ static inline bool memblock_bottom_up(void)
 	return memblock.bottom_up;
 }
 
-/* Flags for memblock_alloc_base() amd __memblock_alloc_base() */
-#define MEMBLOCK_ALLOC_ANYWHERE	(~(phys_addr_t)0)
-#define MEMBLOCK_ALLOC_ACCESSIBLE	0
-
 phys_addr_t __init memblock_alloc_range(phys_addr_t size, phys_addr_t align,
 					phys_addr_t start, phys_addr_t end,
 					enum memblock_flags flags);
@@ -432,6 +550,31 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
 	     i < memblock_type->cnt;					\
 	     i++, rgn = &memblock_type->regions[i])
 
+extern void *alloc_large_system_hash(const char *tablename,
+				     unsigned long bucketsize,
+				     unsigned long numentries,
+				     int scale,
+				     int flags,
+				     unsigned int *_hash_shift,
+				     unsigned int *_hash_mask,
+				     unsigned long low_limit,
+				     unsigned long high_limit);
+
+#define HASH_EARLY	0x00000001	/* Allocating during early boot? */
+#define HASH_SMALL	0x00000002	/* sub-page allocation allowed, min
+					 * shift passed via *_hash_shift */
+#define HASH_ZERO	0x00000004	/* Zero allocated hash table */
+
+/* Only NUMA needs hash distribution. 64bit NUMA architectures have
+ * sufficient vmalloc space.
+ */
+#ifdef CONFIG_NUMA
+#define HASHDIST_DEFAULT IS_ENABLED(CONFIG_64BIT)
+extern int hashdist;		/* Distribute hashes across NUMA nodes? */
+#else
+#define hashdist (0)
+#endif
+
 #ifdef CONFIG_MEMTEST
 extern void early_memtest(phys_addr_t start, phys_addr_t end);
 #else
diff --git a/init/main.c b/init/main.c
index 8cef69c61389..51b8e7b8ae5b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -25,7 +25,7 @@
 #include <linux/ioport.h>
 #include <linux/init.h>
 #include <linux/initrd.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/acpi.h>
 #include <linux/console.h>
 #include <linux/nmi.h>
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index f14c376937e5..22a12ab5a5e9 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -4,7 +4,7 @@
  *
  * DMA operations that map physical memory directly without using an IOMMU.
  */
-#include <linux/bootmem.h> /* for max_pfn */
+#include <linux/memblock.h> /* for max_pfn */
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/dma-direct.h>
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 801da67e957b..5731daa09a32 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -40,7 +40,7 @@
 #include <asm/dma.h>
 
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/iommu-helper.h>
 
 #define CREATE_TRACE_POINTS
diff --git a/kernel/futex.c b/kernel/futex.c
index 3e2de8fc1891..f423f9b6577e 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -65,7 +65,7 @@
 #include <linux/sched/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/freezer.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/fault-inject.h>
 
 #include <asm/futex.h>
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 0130e488ebfe..8f36c27c1794 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -4,7 +4,7 @@
 #endif
 
 #include <linux/hash.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/debug_locks.h>
 
 /*
diff --git a/kernel/pid.c b/kernel/pid.c
index cdf63e53a014..b2f6c506035d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -31,7 +31,7 @@
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/rculist.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
 #include <linux/init_task.h>
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 34116a6097be..3c9e365438ad 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -23,7 +23,7 @@
 #include <linux/pm.h>
 #include <linux/device.h>
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/nmi.h>
 #include <linux/syscalls.h>
 #include <linux/console.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 429e4a3833ca..1b2a029360b7 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -31,7 +31,6 @@
 #include <linux/delay.h>
 #include <linux/smp.h>
 #include <linux/security.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/syscalls.h>
 #include <linux/crash_core.h>
diff --git a/kernel/profile.c b/kernel/profile.c
index 9aa2a4445b0d..9c08a2c7cb1d 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -16,7 +16,7 @@
 
 #include <linux/export.h>
 #include <linux/profile.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/notifier.h>
 #include <linux/mm.h>
 #include <linux/cpumask.h>
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 1405cb22e6bc..75b5e7672c4c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -4,7 +4,7 @@
 #include <linux/bitops.h>
 #include <linux/cpumask.h>
 #include <linux/export.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 /**
  * cpumask_next - get the next cpu in a cpumask
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e35d99844612..c007fb5fb8d5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -15,7 +15,6 @@
 #include <linux/compiler.h>
 #include <linux/cpuset.h>
 #include <linux/mutex.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 785a9707786b..c7550eb65922 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -10,11 +10,10 @@
  *
  */
 
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/init.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
-#include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/pfn.h>
 #include <linux/slab.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 4f7e4b5a2f08..877de4fa0720 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -92,7 +92,7 @@
 #include <linux/stacktrace.h>
 #include <linux/cache.h>
 #include <linux/percpu.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/mmzone.h>
 #include <linux/slab.h>
diff --git a/mm/memblock.c b/mm/memblock.c
index 2ed73245b5da..c655342569f8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,7 +20,6 @@
 #include <linux/kmemleak.h>
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 
 #include <asm/sections.h>
 #include <linux/io.h>
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 7e6509a53d79..41e326472ef9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -33,7 +33,6 @@
 #include <linux/stop_machine.h>
 #include <linux/hugetlb.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 #include <linux/compaction.h>
 
 #include <asm/tlbflush.h>
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5f3291601945..ef289fadec0e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -20,7 +20,6 @@
 #include <linux/interrupt.h>
 #include <linux/pagemap.h>
 #include <linux/jiffies.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/compiler.h>
 #include <linux/kernel.h>
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 5323c2ade686..ae44f7adbe07 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/mm.h>
 #include <linux/mmzone.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/page_ext.h>
 #include <linux/memory.h>
 #include <linux/vmalloc.h>
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 6302bc62c27d..b9e4b42b33ab 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/init.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kobject.h>
diff --git a/mm/page_owner.c b/mm/page_owner.c
index d80adfe702d3..87bc0dfdb52b 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -3,7 +3,7 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/stacktrace.h>
 #include <linux/page_owner.h>
 #include <linux/jump_label.h>
diff --git a/mm/percpu.c b/mm/percpu.c
index 3050c1d37d37..61cdbb3b3736 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -65,7 +65,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bitmap.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/lcm.h>
 #include <linux/list.h>
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 7408cabed61a..7fec05796796 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -20,7 +20,6 @@
  */
 #include <linux/mm.h>
 #include <linux/mmzone.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/memremap.h>
 #include <linux/highmem.h>
diff --git a/mm/sparse.c b/mm/sparse.c
index b139fbc61d10..ab2ac45e0440 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -5,7 +5,6 @@
 #include <linux/mm.h>
 #include <linux/slab.h>
 #include <linux/mmzone.h>
-#include <linux/bootmem.h>
 #include <linux/memblock.h>
 #include <linux/compiler.h>
 #include <linux/highmem.h>
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index f5c9ef2586de..411dd7a90046 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -19,7 +19,7 @@
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/vmalloc.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 
 #include <net/addrconf.h>
 #include <net/inet_connection_sock.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1834818ed07b..9e6bc4d6daa7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -262,7 +262,7 @@
 #include <linux/net.h>
 #include <linux/socket.h>
 #include <linux/random.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/cache.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ca3ed931f2a9..1976fddb9e00 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -81,7 +81,7 @@
 
 #include <linux/uaccess.h>
 #include <asm/ioctls.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/types.h>
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e948db29ab53..9b277bd36d1a 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,7 +46,7 @@
 #include <linux/netdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/seq_file.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/slab.h>
diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c
index 2ad33ce1ea17..eca8d84d99bf 100644
--- a/net/xfrm/xfrm_hash.c
+++ b/net/xfrm/xfrm_hash.c
@@ -6,7 +6,7 @@
 
 #include <linux/kernel.h>
 #include <linux/mm.h>
-#include <linux/bootmem.h>
+#include <linux/memblock.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/xfrm.h>
-- 
cgit v1.2.3


From 7e1c4e27928e5f87b9b1eaf06dc31773b2f1e7f1 Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Tue, 30 Oct 2018 15:09:57 -0700
Subject: memblock: stop using implicit alignment to SMP_CACHE_BYTES

When a memblock allocation APIs are called with align = 0, the alignment
is implicitly set to SMP_CACHE_BYTES.

Implicit alignment is done deep in the memblock allocator and it can
come as a surprise.  Not that such an alignment would be wrong even
when used incorrectly but it is better to be explicit for the sake of
clarity and the prinicple of the least surprise.

Replace all such uses of memblock APIs with the 'align' parameter
explicitly set to SMP_CACHE_BYTES and stop implicit alignment assignment
in the memblock internal allocation functions.

For the case when memblock APIs are used via helper functions, e.g.  like
iommu_arena_new_node() in Alpha, the helper functions were detected with
Coccinelle's help and then manually examined and updated where
appropriate.

The direct memblock APIs users were updated using the semantic patch below:

@@
expression size, min_addr, max_addr, nid;
@@
(
|
- memblock_alloc_try_nid_raw(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid_raw(size, SMP_CACHE_BYTES, min_addr, max_addr,
nid)
|
- memblock_alloc_try_nid_nopanic(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, min_addr, max_addr,
nid)
|
- memblock_alloc_try_nid(size, 0, min_addr, max_addr, nid)
+ memblock_alloc_try_nid(size, SMP_CACHE_BYTES, min_addr, max_addr, nid)
|
- memblock_alloc(size, 0)
+ memblock_alloc(size, SMP_CACHE_BYTES)
|
- memblock_alloc_raw(size, 0)
+ memblock_alloc_raw(size, SMP_CACHE_BYTES)
|
- memblock_alloc_from(size, 0, min_addr)
+ memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr)
|
- memblock_alloc_nopanic(size, 0)
+ memblock_alloc_nopanic(size, SMP_CACHE_BYTES)
|
- memblock_alloc_low(size, 0)
+ memblock_alloc_low(size, SMP_CACHE_BYTES)
|
- memblock_alloc_low_nopanic(size, 0)
+ memblock_alloc_low_nopanic(size, SMP_CACHE_BYTES)
|
- memblock_alloc_from_nopanic(size, 0, min_addr)
+ memblock_alloc_from_nopanic(size, SMP_CACHE_BYTES, min_addr)
|
- memblock_alloc_node(size, 0, nid)
+ memblock_alloc_node(size, SMP_CACHE_BYTES, nid)
)

[mhocko@suse.com: changelog update]
[akpm@linux-foundation.org: coding-style fixes]
[rppt@linux.ibm.com: fix missed uses of implicit alignment]
  Link: http://lkml.kernel.org/r/20181016133656.GA10925@rapoport-lnx
Link: http://lkml.kernel.org/r/1538687224-17535-1-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Suggested-by: Michal Hocko <mhocko@suse.com>
Acked-by: Paul Burton <paul.burton@mips.com>	[MIPS]
Acked-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/alpha/kernel/core_apecs.c            |  3 ++-
 arch/alpha/kernel/core_lca.c              |  3 ++-
 arch/alpha/kernel/core_marvel.c           |  4 ++--
 arch/alpha/kernel/core_mcpcia.c           |  6 +++--
 arch/alpha/kernel/core_t2.c               |  2 +-
 arch/alpha/kernel/core_titan.c            |  6 +++--
 arch/alpha/kernel/core_tsunami.c          |  6 +++--
 arch/alpha/kernel/core_wildfire.c         |  6 +++--
 arch/alpha/kernel/pci-noop.c              |  4 ++--
 arch/alpha/kernel/pci.c                   |  4 ++--
 arch/alpha/kernel/pci_iommu.c             |  4 ++--
 arch/arm/kernel/setup.c                   |  4 ++--
 arch/arm/mach-omap2/omap_hwmod.c          |  8 ++++---
 arch/arm64/kernel/setup.c                 |  2 +-
 arch/ia64/kernel/mca.c                    |  4 ++--
 arch/ia64/mm/tlb.c                        |  6 +++--
 arch/ia64/sn/kernel/io_common.c           |  4 +++-
 arch/ia64/sn/kernel/setup.c               |  5 ++--
 arch/m68k/sun3/sun3dvma.c                 |  2 +-
 arch/microblaze/mm/init.c                 |  2 +-
 arch/mips/kernel/setup.c                  |  2 +-
 arch/powerpc/kernel/paca.c                |  2 +-
 arch/powerpc/kernel/pci_32.c              |  3 ++-
 arch/powerpc/lib/alloc.c                  |  2 +-
 arch/powerpc/mm/mmu_context_nohash.c      |  7 +++---
 arch/powerpc/platforms/powermac/nvram.c   |  2 +-
 arch/powerpc/platforms/powernv/pci-ioda.c |  6 ++---
 arch/powerpc/sysdev/msi_bitmap.c          |  2 +-
 arch/um/drivers/net_kern.c                |  2 +-
 arch/um/drivers/vector_kern.c             |  2 +-
 arch/um/kernel/initrd.c                   |  2 +-
 arch/unicore32/kernel/setup.c             |  2 +-
 arch/x86/kernel/acpi/boot.c               |  2 +-
 arch/x86/kernel/apic/io_apic.c            |  2 +-
 arch/x86/kernel/e820.c                    |  3 ++-
 arch/x86/platform/olpc/olpc_dt.c          |  2 +-
 arch/xtensa/platforms/iss/network.c       |  2 +-
 drivers/clk/ti/clk.c                      |  2 +-
 drivers/firmware/efi/memmap.c             |  2 +-
 drivers/firmware/memmap.c                 |  3 ++-
 drivers/macintosh/smu.c                   |  2 +-
 drivers/of/of_reserved_mem.c              |  1 +
 include/linux/memblock.h                  |  3 ++-
 init/main.c                               | 13 +++++++----
 kernel/power/snapshot.c                   |  3 ++-
 lib/cpumask.c                             |  2 +-
 mm/memblock.c                             |  8 -------
 mm/page_alloc.c                           |  6 +++--
 mm/percpu.c                               | 38 ++++++++++++++++---------------
 mm/sparse.c                               |  3 ++-
 50 files changed, 120 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/core_apecs.c b/arch/alpha/kernel/core_apecs.c
index 1bf3eef34c22..6df765ff2b10 100644
--- a/arch/alpha/kernel/core_apecs.c
+++ b/arch/alpha/kernel/core_apecs.c
@@ -346,7 +346,8 @@ apecs_init_arch(void)
 	 * Window 1 is direct access 1GB at 1GB
 	 * Window 2 is scatter-gather 8MB at 8MB (for isa)
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	hose->sg_pci = NULL;
 	__direct_map_base = 0x40000000;
 	__direct_map_size = 0x40000000;
diff --git a/arch/alpha/kernel/core_lca.c b/arch/alpha/kernel/core_lca.c
index 81c0c43635b0..57e0750419f2 100644
--- a/arch/alpha/kernel/core_lca.c
+++ b/arch/alpha/kernel/core_lca.c
@@ -275,7 +275,8 @@ lca_init_arch(void)
 	 * Note that we do not try to save any of the DMA window CSRs
 	 * before setting them, since we cannot read those CSRs on LCA.
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	hose->sg_pci = NULL;
 	__direct_map_base = 0x40000000;
 	__direct_map_size = 0x40000000;
diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c
index 8a568c4d8e81..c1d0c18c71ca 100644
--- a/arch/alpha/kernel/core_marvel.c
+++ b/arch/alpha/kernel/core_marvel.c
@@ -82,7 +82,7 @@ mk_resource_name(int pe, int port, char *str)
 	char *name;
 	
 	sprintf(tmp, "PCI %s PE %d PORT %d", str, pe, port);
-	name = memblock_alloc(strlen(tmp) + 1, 0);
+	name = memblock_alloc(strlen(tmp) + 1, SMP_CACHE_BYTES);
 	strcpy(name, tmp);
 
 	return name;
@@ -117,7 +117,7 @@ alloc_io7(unsigned int pe)
 		return NULL;
 	}
 
-	io7 = memblock_alloc(sizeof(*io7), 0);
+	io7 = memblock_alloc(sizeof(*io7), SMP_CACHE_BYTES);
 	io7->pe = pe;
 	raw_spin_lock_init(&io7->irq_lock);
 
diff --git a/arch/alpha/kernel/core_mcpcia.c b/arch/alpha/kernel/core_mcpcia.c
index b1549db54260..74b1d018124c 100644
--- a/arch/alpha/kernel/core_mcpcia.c
+++ b/arch/alpha/kernel/core_mcpcia.c
@@ -364,9 +364,11 @@ mcpcia_startup_hose(struct pci_controller *hose)
 	 * Window 1 is scatter-gather (up to) 1GB at 1GB (for pci)
 	 * Window 2 is direct access 2GB at 2GB
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	hose->sg_pci = iommu_arena_new(hose, 0x40000000,
-				       size_for_memory(0x40000000), 0);
+				       size_for_memory(0x40000000),
+				       SMP_CACHE_BYTES);
 
 	__direct_map_base = 0x80000000;
 	__direct_map_size = 0x80000000;
diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c
index 2c00b61ca379..98d5b6ff8a76 100644
--- a/arch/alpha/kernel/core_t2.c
+++ b/arch/alpha/kernel/core_t2.c
@@ -351,7 +351,7 @@ t2_sg_map_window2(struct pci_controller *hose,
 
 	/* Note we can only do 1 SG window, as the other is for direct, so
 	   do an ISA SG area, especially for the floppy. */
-	hose->sg_isa = iommu_arena_new(hose, base, length, 0);
+	hose->sg_isa = iommu_arena_new(hose, base, length, SMP_CACHE_BYTES);
 	hose->sg_pci = NULL;
 
 	temp = (base & 0xfff00000UL) | ((base + length - 1) >> 20);
diff --git a/arch/alpha/kernel/core_titan.c b/arch/alpha/kernel/core_titan.c
index 97551597581b..2a2820fb1be6 100644
--- a/arch/alpha/kernel/core_titan.c
+++ b/arch/alpha/kernel/core_titan.c
@@ -316,10 +316,12 @@ titan_init_one_pachip_port(titan_pachip_port *port, int index)
 	 * Window 1 is direct access 1GB at 2GB
 	 * Window 2 is scatter-gather 1GB at 3GB
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	hose->sg_isa->align_entry = 8; /* 64KB for ISA */
 
-	hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x40000000, 0);
+	hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x40000000,
+				       SMP_CACHE_BYTES);
 	hose->sg_pci->align_entry = 4; /* Titan caches 4 PTEs at a time */
 
 	port->wsba[0].csr = hose->sg_isa->dma_base | 3;
diff --git a/arch/alpha/kernel/core_tsunami.c b/arch/alpha/kernel/core_tsunami.c
index f334b8928d72..fc1ab73f23de 100644
--- a/arch/alpha/kernel/core_tsunami.c
+++ b/arch/alpha/kernel/core_tsunami.c
@@ -319,12 +319,14 @@ tsunami_init_one_pchip(tsunami_pchip *pchip, int index)
 	 * NOTE: we need the align_entry settings for Acer devices on ES40,
 	 * specifically floppy and IDE when memory is larger than 2GB.
 	 */
-	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
 	/* Initially set for 4 PTEs, but will be overridden to 64K for ISA. */
         hose->sg_isa->align_entry = 4;
 
 	hose->sg_pci = iommu_arena_new(hose, 0x40000000,
-				       size_for_memory(0x40000000), 0);
+				       size_for_memory(0x40000000),
+				       SMP_CACHE_BYTES);
         hose->sg_pci->align_entry = 4; /* Tsunami caches 4 PTEs at a time */
 
 	__direct_map_base = 0x80000000;
diff --git a/arch/alpha/kernel/core_wildfire.c b/arch/alpha/kernel/core_wildfire.c
index cad36fc6ed7d..353c03d15442 100644
--- a/arch/alpha/kernel/core_wildfire.c
+++ b/arch/alpha/kernel/core_wildfire.c
@@ -111,8 +111,10 @@ wildfire_init_hose(int qbbno, int hoseno)
          * ??? We ought to scale window 3 memory.
          *
          */
-        hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000, 0);
-        hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000, 0);
+	hose->sg_isa = iommu_arena_new(hose, 0x00800000, 0x00800000,
+				       SMP_CACHE_BYTES);
+	hose->sg_pci = iommu_arena_new(hose, 0xc0000000, 0x08000000,
+				       SMP_CACHE_BYTES);
 
 	pci = WILDFIRE_pci(qbbno, hoseno);
 
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c
index a9378ee0c2f1..091cff3c68fd 100644
--- a/arch/alpha/kernel/pci-noop.c
+++ b/arch/alpha/kernel/pci-noop.c
@@ -33,7 +33,7 @@ alloc_pci_controller(void)
 {
 	struct pci_controller *hose;
 
-	hose = memblock_alloc(sizeof(*hose), 0);
+	hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES);
 
 	*hose_tail = hose;
 	hose_tail = &hose->next;
@@ -44,7 +44,7 @@ alloc_pci_controller(void)
 struct resource * __init
 alloc_resource(void)
 {
-	return memblock_alloc(sizeof(struct resource), 0);
+	return memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
 }
 
 SYSCALL_DEFINE3(pciconfig_iobase, long, which, unsigned long, bus,
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 13937e72d875..97098127df83 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -392,7 +392,7 @@ alloc_pci_controller(void)
 {
 	struct pci_controller *hose;
 
-	hose = memblock_alloc(sizeof(*hose), 0);
+	hose = memblock_alloc(sizeof(*hose), SMP_CACHE_BYTES);
 
 	*hose_tail = hose;
 	hose_tail = &hose->next;
@@ -403,7 +403,7 @@ alloc_pci_controller(void)
 struct resource * __init
 alloc_resource(void)
 {
-	return memblock_alloc(sizeof(struct resource), 0);
+	return memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
 }
 
 
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 82cf950bda2a..46e08e0d9181 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -79,7 +79,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 		printk("%s: couldn't allocate arena from node %d\n"
 		       "    falling back to system-wide allocation\n",
 		       __func__, nid);
-		arena = memblock_alloc(sizeof(*arena), 0);
+		arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
 	}
 
 	arena->ptes = memblock_alloc_node(sizeof(*arena), align, nid);
@@ -92,7 +92,7 @@ iommu_arena_new_node(int nid, struct pci_controller *hose, dma_addr_t base,
 
 #else /* CONFIG_DISCONTIGMEM */
 
-	arena = memblock_alloc(sizeof(*arena), 0);
+	arena = memblock_alloc(sizeof(*arena), SMP_CACHE_BYTES);
 	arena->ptes = memblock_alloc_from(mem_size, align, 0);
 
 #endif /* CONFIG_DISCONTIGMEM */
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 840a4adc69fc..ac7e08886863 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -856,7 +856,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 		 */
 		boot_alias_start = phys_to_idmap(start);
 		if (arm_has_idmap_alias() && boot_alias_start != IDMAP_INVALID_ADDR) {
-			res = memblock_alloc(sizeof(*res), 0);
+			res = memblock_alloc(sizeof(*res), SMP_CACHE_BYTES);
 			res->name = "System RAM (boot alias)";
 			res->start = boot_alias_start;
 			res->end = phys_to_idmap(end);
@@ -864,7 +864,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
 			request_resource(&iomem_resource, res);
 		}
 
-		res = memblock_alloc(sizeof(*res), 0);
+		res = memblock_alloc(sizeof(*res), SMP_CACHE_BYTES);
 		res->name  = "System RAM";
 		res->start = start;
 		res->end = end;
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index cd5732ab0cdf..083dcd9942ce 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -726,7 +726,7 @@ static int __init _setup_clkctrl_provider(struct device_node *np)
 	u64 size;
 	int i;
 
-	provider = memblock_alloc(sizeof(*provider), 0);
+	provider = memblock_alloc(sizeof(*provider), SMP_CACHE_BYTES);
 	if (!provider)
 		return -ENOMEM;
 
@@ -736,12 +736,14 @@ static int __init _setup_clkctrl_provider(struct device_node *np)
 		of_property_count_elems_of_size(np, "reg", sizeof(u32)) / 2;
 
 	provider->addr =
-		memblock_alloc(sizeof(void *) * provider->num_addrs, 0);
+		memblock_alloc(sizeof(void *) * provider->num_addrs,
+			       SMP_CACHE_BYTES);
 	if (!provider->addr)
 		return -ENOMEM;
 
 	provider->size =
-		memblock_alloc(sizeof(u32) * provider->num_addrs, 0);
+		memblock_alloc(sizeof(u32) * provider->num_addrs,
+			       SMP_CACHE_BYTES);
 	if (!provider->size)
 		return -ENOMEM;
 
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 7ce7306f1d75..953e316521fc 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -218,7 +218,7 @@ static void __init request_standard_resources(void)
 	num_standard_resources = memblock.memory.cnt;
 	standard_resources = memblock_alloc_low(num_standard_resources *
 					        sizeof(*standard_resources),
-					        0);
+					        SMP_CACHE_BYTES);
 
 	for_each_memblock(memory, region) {
 		res = &standard_resources[i++];
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 9a6603f8e409..91bd1e129379 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -361,9 +361,9 @@ static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
 
 #define IA64_LOG_ALLOCATE(it, size) \
 	{ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
-		(ia64_err_rec_t *)memblock_alloc(size, 0); \
+		(ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); \
 	ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
-		(ia64_err_rec_t *)memblock_alloc(size, 0);}
+		(ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES);}
 #define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
 #define IA64_LOG_LOCK(it)      spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
 #define IA64_LOG_UNLOCK(it)    spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index ab545daff7c3..9340bcb4f29c 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -59,8 +59,10 @@ struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
 void __init
 mmu_context_init (void)
 {
-	ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, 0);
-	ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, 0);
+	ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3,
+					 SMP_CACHE_BYTES);
+	ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3,
+					   SMP_CACHE_BYTES);
 }
 
 /*
diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c
index 98f55220c67d..8df13d0d96fa 100644
--- a/arch/ia64/sn/kernel/io_common.c
+++ b/arch/ia64/sn/kernel/io_common.c
@@ -391,7 +391,9 @@ void __init hubdev_init_node(nodepda_t * npda, cnodeid_t node)
 	if (node >= num_online_nodes())	/* Headless/memless IO nodes */
 		node = 0;
 
-	hubdev_info = (struct hubdev_info *)memblock_alloc_node(size, 0, node);
+	hubdev_info = (struct hubdev_info *)memblock_alloc_node(size,
+								SMP_CACHE_BYTES,
+								node);
 
 	npda->pdinfo = (void *)hubdev_info;
 }
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 71ad6b0ccab4..a6d40a2c5bff 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -511,7 +511,8 @@ static void __init sn_init_pdas(char **cmdline_p)
 	 */
 	for_each_online_node(cnode) {
 		nodepdaindr[cnode] =
-		    memblock_alloc_node(sizeof(nodepda_t), 0, cnode);
+		    memblock_alloc_node(sizeof(nodepda_t), SMP_CACHE_BYTES,
+					cnode);
 		memset(nodepdaindr[cnode]->phys_cpuid, -1,
 		    sizeof(nodepdaindr[cnode]->phys_cpuid));
 		spin_lock_init(&nodepdaindr[cnode]->ptc_lock);
@@ -522,7 +523,7 @@ static void __init sn_init_pdas(char **cmdline_p)
 	 */
 	for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++)
 		nodepdaindr[cnode] =
-		    memblock_alloc_node(sizeof(nodepda_t), 0, 0);
+		    memblock_alloc_node(sizeof(nodepda_t), SMP_CACHE_BYTES, 0);
 
 	/*
 	 * Now copy the array of nodepda pointers to each nodepda.
diff --git a/arch/m68k/sun3/sun3dvma.c b/arch/m68k/sun3/sun3dvma.c
index 8be8b750c629..4d64711d3d47 100644
--- a/arch/m68k/sun3/sun3dvma.c
+++ b/arch/m68k/sun3/sun3dvma.c
@@ -268,7 +268,7 @@ void __init dvma_init(void)
 	list_add(&(hole->list), &hole_list);
 
 	iommu_use = memblock_alloc(IOMMU_TOTAL_ENTRIES * sizeof(unsigned long),
-				   0);
+				   SMP_CACHE_BYTES);
 
 	dvma_unmap_iommu(DVMA_START, DVMA_SIZE);
 
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 8c14988f52f2..b17fd8aafd64 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -376,7 +376,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 	if (mem_init_done)
 		p = kzalloc(size, mask);
 	else {
-		p = memblock_alloc(size, 0);
+		p = memblock_alloc(size, SMP_CACHE_BYTES);
 		if (p)
 			memset(p, 0, size);
 	}
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 41c1683761bb..ea09ed6a80a9 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -916,7 +916,7 @@ static void __init resource_init(void)
 		if (end >= HIGHMEM_START)
 			end = HIGHMEM_START - 1;
 
-		res = memblock_alloc(sizeof(struct resource), 0);
+		res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
 
 		res->start = start;
 		res->end = end;
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index f331a0054b3a..913bfca09c4f 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -198,7 +198,7 @@ void __init allocate_paca_ptrs(void)
 	paca_nr_cpu_ids = nr_cpu_ids;
 
 	paca_ptrs_size = sizeof(struct paca_struct *) * nr_cpu_ids;
-	paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, 0));
+	paca_ptrs = __va(memblock_phys_alloc(paca_ptrs_size, SMP_CACHE_BYTES));
 	memset(paca_ptrs, 0x88, paca_ptrs_size);
 }
 
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 274bd1442dd9..d3f04f2d8249 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -203,7 +203,8 @@ pci_create_OF_bus_map(void)
 	struct property* of_prop;
 	struct device_node *dn;
 
-	of_prop = memblock_alloc(sizeof(struct property) + 256, 0);
+	of_prop = memblock_alloc(sizeof(struct property) + 256,
+				 SMP_CACHE_BYTES);
 	dn = of_find_node_by_path("/");
 	if (dn) {
 		memset(of_prop, -1, sizeof(struct property) + 256);
diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c
index 5b61704447c1..dedf88a76f58 100644
--- a/arch/powerpc/lib/alloc.c
+++ b/arch/powerpc/lib/alloc.c
@@ -14,7 +14,7 @@ void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
 	if (slab_is_available())
 		p = kzalloc(size, mask);
 	else {
-		p = memblock_alloc(size, 0);
+		p = memblock_alloc(size, SMP_CACHE_BYTES);
 	}
 	return p;
 }
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c
index 67b9d7b669a1..2faca46ad720 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/mmu_context_nohash.c
@@ -461,10 +461,11 @@ void __init mmu_context_init(void)
 	/*
 	 * Allocate the maps used by context management
 	 */
-	context_map = memblock_alloc(CTX_MAP_SIZE, 0);
-	context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1), 0);
+	context_map = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
+	context_mm = memblock_alloc(sizeof(void *) * (LAST_CONTEXT + 1),
+				    SMP_CACHE_BYTES);
 #ifdef CONFIG_SMP
-	stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, 0);
+	stale_map[boot_cpuid] = memblock_alloc(CTX_MAP_SIZE, SMP_CACHE_BYTES);
 
 	cpuhp_setup_state_nocalls(CPUHP_POWERPC_MMU_CTX_PREPARE,
 				  "powerpc/mmu/ctx:prepare",
diff --git a/arch/powerpc/platforms/powermac/nvram.c b/arch/powerpc/platforms/powermac/nvram.c
index f3391be7c762..ae54d7fe68f3 100644
--- a/arch/powerpc/platforms/powermac/nvram.c
+++ b/arch/powerpc/platforms/powermac/nvram.c
@@ -513,7 +513,7 @@ static int __init core99_nvram_setup(struct device_node *dp, unsigned long addr)
 		printk(KERN_ERR "nvram: no address\n");
 		return -EINVAL;
 	}
-	nvram_image = memblock_alloc(NVRAM_SIZE, 0);
+	nvram_image = memblock_alloc(NVRAM_SIZE, SMP_CACHE_BYTES);
 	nvram_data = ioremap(addr, NVRAM_SIZE*2);
 	nvram_naddrs = 1; /* Make sure we get the correct case */
 
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index aba81cbf0b36..dd807446801e 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -3769,7 +3769,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	phb_id = be64_to_cpup(prop64);
 	pr_debug("  PHB-ID  : 0x%016llx\n", phb_id);
 
-	phb = memblock_alloc(sizeof(*phb), 0);
+	phb = memblock_alloc(sizeof(*phb), SMP_CACHE_BYTES);
 
 	/* Allocate PCI controller */
 	phb->hose = hose = pcibios_alloc_controller(np);
@@ -3815,7 +3815,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	else
 		phb->diag_data_size = PNV_PCI_DIAG_BUF_SIZE;
 
-	phb->diag_data = memblock_alloc(phb->diag_data_size, 0);
+	phb->diag_data = memblock_alloc(phb->diag_data_size, SMP_CACHE_BYTES);
 
 	/* Parse 32-bit and IO ranges (if any) */
 	pci_process_bridge_OF_ranges(hose, np, !hose->global_number);
@@ -3874,7 +3874,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	}
 	pemap_off = size;
 	size += phb->ioda.total_pe_num * sizeof(struct pnv_ioda_pe);
-	aux = memblock_alloc(size, 0);
+	aux = memblock_alloc(size, SMP_CACHE_BYTES);
 	phb->ioda.pe_alloc = aux;
 	phb->ioda.m64_segmap = aux + m64map_off;
 	phb->ioda.m32_segmap = aux + m32map_off;
diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c
index 2444feda831f..d45450f6666a 100644
--- a/arch/powerpc/sysdev/msi_bitmap.c
+++ b/arch/powerpc/sysdev/msi_bitmap.c
@@ -128,7 +128,7 @@ int __ref msi_bitmap_alloc(struct msi_bitmap *bmp, unsigned int irq_count,
 	if (bmp->bitmap_from_slab)
 		bmp->bitmap = kzalloc(size, GFP_KERNEL);
 	else {
-		bmp->bitmap = memblock_alloc(size, 0);
+		bmp->bitmap = memblock_alloc(size, SMP_CACHE_BYTES);
 		/* the bitmap won't be freed from memblock allocator */
 		kmemleak_not_leak(bmp->bitmap);
 	}
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 673816880cce..624cb47cc9cd 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -650,7 +650,7 @@ static int __init eth_setup(char *str)
 		return 1;
 	}
 
-	new = memblock_alloc(sizeof(*new), 0);
+	new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES);
 
 	INIT_LIST_HEAD(&new->list);
 	new->index = n;
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 2b4dded11a7a..10d8d20eb9ec 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1580,7 +1580,7 @@ static int __init vector_setup(char *str)
 				 str, error);
 		return 1;
 	}
-	new = memblock_alloc(sizeof(*new), 0);
+	new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES);
 	INIT_LIST_HEAD(&new->list);
 	new->unit = n;
 	new->arguments = str;
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 3678f5b05e42..ce169ea87e61 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -36,7 +36,7 @@ int __init read_initrd(void)
 		return 0;
 	}
 
-	area = memblock_alloc(size, 0);
+	area = memblock_alloc(size, SMP_CACHE_BYTES);
 
 	if (load_initrd(initrd, area, size) == -1)
 		return 0;
diff --git a/arch/unicore32/kernel/setup.c b/arch/unicore32/kernel/setup.c
index b2c38b32ea57..4b0cb68c355a 100644
--- a/arch/unicore32/kernel/setup.c
+++ b/arch/unicore32/kernel/setup.c
@@ -206,7 +206,7 @@ request_standard_resources(struct meminfo *mi)
 		if (mi->bank[i].size == 0)
 			continue;
 
-		res = memblock_alloc_low(sizeof(*res), 0);
+		res = memblock_alloc_low(sizeof(*res), SMP_CACHE_BYTES);
 		res->name  = "System RAM";
 		res->start = mi->bank[i].start;
 		res->end   = mi->bank[i].start + mi->bank[i].size - 1;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7f5d212551d4..92c76bf97ad8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -934,7 +934,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
 	 */
 #define HPET_RESOURCE_NAME_SIZE 9
 	hpet_res = memblock_alloc(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE,
-				  0);
+				  SMP_CACHE_BYTES);
 
 	hpet_res->name = (void *)&hpet_res[1];
 	hpet_res->flags = IORESOURCE_MEM;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5fbc57e4b0b9..2953bbf05c08 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2578,7 +2578,7 @@ static struct resource * __init ioapic_setup_resources(void)
 	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
 	n *= nr_ioapics;
 
-	mem = memblock_alloc(n, 0);
+	mem = memblock_alloc(n, SMP_CACHE_BYTES);
 	res = (void *)mem;
 
 	mem += sizeof(struct resource) * nr_ioapics;
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 68ff62bffbab..50895c2f937d 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1093,7 +1093,8 @@ void __init e820__reserve_resources(void)
 	struct resource *res;
 	u64 end;
 
-	res = memblock_alloc(sizeof(*res) * e820_table->nr_entries, 0);
+	res = memblock_alloc(sizeof(*res) * e820_table->nr_entries,
+			     SMP_CACHE_BYTES);
 	e820_res = res;
 
 	for (i = 0; i < e820_table->nr_entries; i++) {
diff --git a/arch/x86/platform/olpc/olpc_dt.c b/arch/x86/platform/olpc/olpc_dt.c
index 115c8e4173bb..24d2175a9480 100644
--- a/arch/x86/platform/olpc/olpc_dt.c
+++ b/arch/x86/platform/olpc/olpc_dt.c
@@ -141,7 +141,7 @@ void * __init prom_early_alloc(unsigned long size)
 		 * fast enough on the platforms we care about while minimizing
 		 * wasted bootmem) and hand off chunks of it to callers.
 		 */
-		res = memblock_alloc(chunk_size, 0);
+		res = memblock_alloc(chunk_size, SMP_CACHE_BYTES);
 		BUG_ON(!res);
 		prom_early_allocated += chunk_size;
 		memset(res, 0, chunk_size);
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index 190846dddc67..d052712373b6 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -646,7 +646,7 @@ static int __init iss_net_setup(char *str)
 		return 1;
 	}
 
-	new = memblock_alloc(sizeof(*new), 0);
+	new = memblock_alloc(sizeof(*new), SMP_CACHE_BYTES);
 	if (new == NULL) {
 		pr_err("Alloc_bootmem failed\n");
 		return 1;
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index 5b2867a33b98..e205af814582 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -342,7 +342,7 @@ void __init omap2_clk_legacy_provider_init(int index, void __iomem *mem)
 {
 	struct clk_iomap *io;
 
-	io = memblock_alloc(sizeof(*io), 0);
+	io = memblock_alloc(sizeof(*io), SMP_CACHE_BYTES);
 
 	io->mem = mem;
 
diff --git a/drivers/firmware/efi/memmap.c b/drivers/firmware/efi/memmap.c
index ef618bceb79a..fa2904fb841f 100644
--- a/drivers/firmware/efi/memmap.c
+++ b/drivers/firmware/efi/memmap.c
@@ -15,7 +15,7 @@
 
 static phys_addr_t __init __efi_memmap_alloc_early(unsigned long size)
 {
-	return memblock_phys_alloc(size, 0);
+	return memblock_phys_alloc(size, SMP_CACHE_BYTES);
 }
 
 static phys_addr_t __init __efi_memmap_alloc_late(unsigned long size)
diff --git a/drivers/firmware/memmap.c b/drivers/firmware/memmap.c
index 2a23453f005a..d168c87c7d30 100644
--- a/drivers/firmware/memmap.c
+++ b/drivers/firmware/memmap.c
@@ -333,7 +333,8 @@ int __init firmware_map_add_early(u64 start, u64 end, const char *type)
 {
 	struct firmware_map_entry *entry;
 
-	entry = memblock_alloc(sizeof(struct firmware_map_entry), 0);
+	entry = memblock_alloc(sizeof(struct firmware_map_entry),
+			       SMP_CACHE_BYTES);
 	if (WARN_ON(!entry))
 		return -ENOMEM;
 
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 880a81c82b7a..0a0b8e1f4236 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -492,7 +492,7 @@ int __init smu_init (void)
 		goto fail_np;
 	}
 
-	smu = memblock_alloc(sizeof(struct smu_device), 0);
+	smu = memblock_alloc(sizeof(struct smu_device), SMP_CACHE_BYTES);
 
 	spin_lock_init(&smu->lock);
 	INIT_LIST_HEAD(&smu->cmd_list);
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index d6255c276a41..1977ee0adcb1 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -36,6 +36,7 @@ int __init __weak early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
 	 * panic()s on allocation failure.
 	 */
 	end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end;
+	align = !align ? SMP_CACHE_BYTES : align;
 	base = __memblock_alloc_base(size, align, end);
 	if (!base)
 		return -ENOMEM;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 1b4d85879cbe..aee299a6aa76 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -406,7 +406,8 @@ static inline void * __init memblock_alloc_node(phys_addr_t size,
 static inline void * __init memblock_alloc_node_nopanic(phys_addr_t size,
 							int nid)
 {
-	return memblock_alloc_try_nid_nopanic(size, 0, MEMBLOCK_LOW_LIMIT,
+	return memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES,
+					      MEMBLOCK_LOW_LIMIT,
 					      MEMBLOCK_ALLOC_ACCESSIBLE, nid);
 }
 
diff --git a/init/main.c b/init/main.c
index 51b8e7b8ae5b..ee147103ba1b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -375,10 +375,11 @@ static inline void smp_prepare_cpus(unsigned int maxcpus) { }
 static void __init setup_command_line(char *command_line)
 {
 	saved_command_line =
-		memblock_alloc(strlen(boot_command_line) + 1, 0);
+		memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
 	initcall_command_line =
-		memblock_alloc(strlen(boot_command_line) + 1, 0);
-	static_command_line = memblock_alloc(strlen(command_line) + 1, 0);
+		memblock_alloc(strlen(boot_command_line) + 1, SMP_CACHE_BYTES);
+	static_command_line = memblock_alloc(strlen(command_line) + 1,
+					     SMP_CACHE_BYTES);
 	strcpy(saved_command_line, boot_command_line);
 	strcpy(static_command_line, command_line);
 }
@@ -773,8 +774,10 @@ static int __init initcall_blacklist(char *str)
 		str_entry = strsep(&str, ",");
 		if (str_entry) {
 			pr_debug("blacklisting initcall %s\n", str_entry);
-			entry = memblock_alloc(sizeof(*entry), 0);
-			entry->buf = memblock_alloc(strlen(str_entry) + 1, 0);
+			entry = memblock_alloc(sizeof(*entry),
+					       SMP_CACHE_BYTES);
+			entry->buf = memblock_alloc(strlen(str_entry) + 1,
+						    SMP_CACHE_BYTES);
 			strcpy(entry->buf, str_entry);
 			list_add(&entry->next, &blacklisted_initcalls);
 		}
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 3c9e365438ad..b0308a2c6000 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -963,7 +963,8 @@ void __init __register_nosave_region(unsigned long start_pfn,
 		BUG_ON(!region);
 	} else {
 		/* This allocation cannot fail */
-		region = memblock_alloc(sizeof(struct nosave_region), 0);
+		region = memblock_alloc(sizeof(struct nosave_region),
+					SMP_CACHE_BYTES);
 	}
 	region->start_pfn = start_pfn;
 	region->end_pfn = end_pfn;
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 75b5e7672c4c..8d666ab84b5c 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -163,7 +163,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var);
  */
 void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
-	*mask = memblock_alloc(cpumask_size(), 0);
+	*mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES);
 }
 
 /**
diff --git a/mm/memblock.c b/mm/memblock.c
index c655342569f8..839531133816 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1247,9 +1247,6 @@ static phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
 {
 	phys_addr_t found;
 
-	if (!align)
-		align = SMP_CACHE_BYTES;
-
 	found = memblock_find_in_range_node(size, align, start, end, nid,
 					    flags);
 	if (found && !memblock_reserve(found, size)) {
@@ -1343,8 +1340,6 @@ phys_addr_t __init memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t ali
  * The allocation is performed from memory region limited by
  * memblock.current_limit if @max_addr == %MEMBLOCK_ALLOC_ACCESSIBLE.
  *
- * The memory block is aligned on %SMP_CACHE_BYTES if @align == 0.
- *
  * The phys address of allocated boot memory block is converted to virtual and
  * allocated memory is reset to 0.
  *
@@ -1374,9 +1369,6 @@ static void * __init memblock_alloc_internal(
 	if (WARN_ON_ONCE(slab_is_available()))
 		return kzalloc_node(size, GFP_NOWAIT, nid);
 
-	if (!align)
-		align = SMP_CACHE_BYTES;
-
 	if (max_addr > memblock.current_limit)
 		max_addr = memblock.current_limit;
 again:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ef289fadec0e..a919ba5cb3c8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7710,9 +7710,11 @@ void *__init alloc_large_system_hash(const char *tablename,
 		size = bucketsize << log2qty;
 		if (flags & HASH_EARLY) {
 			if (flags & HASH_ZERO)
-				table = memblock_alloc_nopanic(size, 0);
+				table = memblock_alloc_nopanic(size,
+							       SMP_CACHE_BYTES);
 			else
-				table = memblock_alloc_raw(size, 0);
+				table = memblock_alloc_raw(size,
+							   SMP_CACHE_BYTES);
 		} else if (hashdist) {
 			table = __vmalloc(size, gfp_flags, PAGE_KERNEL);
 		} else {
diff --git a/mm/percpu.c b/mm/percpu.c
index 61cdbb3b3736..a6b74c6fe0be 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1102,8 +1102,8 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 
 	/* allocate chunk */
 	chunk = memblock_alloc(sizeof(struct pcpu_chunk) +
-				    BITS_TO_LONGS(region_size >> PAGE_SHIFT),
-				    0);
+			       BITS_TO_LONGS(region_size >> PAGE_SHIFT),
+			       SMP_CACHE_BYTES);
 
 	INIT_LIST_HEAD(&chunk->list);
 
@@ -1114,12 +1114,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
 	chunk->nr_pages = region_size >> PAGE_SHIFT;
 	region_bits = pcpu_chunk_map_bits(chunk);
 
-	chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) *
-					       sizeof(chunk->alloc_map[0]), 0);
-	chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) *
-					       sizeof(chunk->bound_map[0]), 0);
-	chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) *
-					       sizeof(chunk->md_blocks[0]), 0);
+	chunk->alloc_map = memblock_alloc(BITS_TO_LONGS(region_bits) * sizeof(chunk->alloc_map[0]),
+					  SMP_CACHE_BYTES);
+	chunk->bound_map = memblock_alloc(BITS_TO_LONGS(region_bits + 1) * sizeof(chunk->bound_map[0]),
+					  SMP_CACHE_BYTES);
+	chunk->md_blocks = memblock_alloc(pcpu_chunk_nr_blocks(chunk) * sizeof(chunk->md_blocks[0]),
+					  SMP_CACHE_BYTES);
 	pcpu_init_md_blocks(chunk);
 
 	/* manage populated page bitmap */
@@ -2075,12 +2075,14 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0);
 
 	/* process group information and build config tables accordingly */
-	group_offsets = memblock_alloc(ai->nr_groups *
-					     sizeof(group_offsets[0]), 0);
-	group_sizes = memblock_alloc(ai->nr_groups *
-					   sizeof(group_sizes[0]), 0);
-	unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]), 0);
-	unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]), 0);
+	group_offsets = memblock_alloc(ai->nr_groups * sizeof(group_offsets[0]),
+				       SMP_CACHE_BYTES);
+	group_sizes = memblock_alloc(ai->nr_groups * sizeof(group_sizes[0]),
+				     SMP_CACHE_BYTES);
+	unit_map = memblock_alloc(nr_cpu_ids * sizeof(unit_map[0]),
+				  SMP_CACHE_BYTES);
+	unit_off = memblock_alloc(nr_cpu_ids * sizeof(unit_off[0]),
+				  SMP_CACHE_BYTES);
 
 	for (cpu = 0; cpu < nr_cpu_ids; cpu++)
 		unit_map[cpu] = UINT_MAX;
@@ -2144,8 +2146,8 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
 	 * empty chunks.
 	 */
 	pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
-	pcpu_slot = memblock_alloc(
-			pcpu_nr_slots * sizeof(pcpu_slot[0]), 0);
+	pcpu_slot = memblock_alloc(pcpu_nr_slots * sizeof(pcpu_slot[0]),
+				   SMP_CACHE_BYTES);
 	for (i = 0; i < pcpu_nr_slots; i++)
 		INIT_LIST_HEAD(&pcpu_slot[i]);
 
@@ -2458,7 +2460,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size,
 	size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
 	areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
 
-	areas = memblock_alloc_nopanic(areas_size, 0);
+	areas = memblock_alloc_nopanic(areas_size, SMP_CACHE_BYTES);
 	if (!areas) {
 		rc = -ENOMEM;
 		goto out_free;
@@ -2599,7 +2601,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size,
 	/* unaligned allocations can't be freed, round up to page size */
 	pages_size = PFN_ALIGN(unit_pages * num_possible_cpus() *
 			       sizeof(pages[0]));
-	pages = memblock_alloc(pages_size, 0);
+	pages = memblock_alloc(pages_size, SMP_CACHE_BYTES);
 
 	/* allocate pages */
 	j = 0;
diff --git a/mm/sparse.c b/mm/sparse.c
index ab2ac45e0440..33307fc05c4d 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -68,7 +68,8 @@ static noinline struct mem_section __ref *sparse_index_alloc(int nid)
 	if (slab_is_available())
 		section = kzalloc_node(array_size, GFP_KERNEL, nid);
 	else
-		section = memblock_alloc_node(array_size, 0, nid);
+		section = memblock_alloc_node(array_size, SMP_CACHE_BYTES,
+					      nid);
 
 	return section;
 }
-- 
cgit v1.2.3


From d15e59260f62bd5e0f625cf5f5240f6ffac78ab6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 30 Oct 2018 15:10:18 -0700
Subject: mm/memory_hotplug: make remove_memory() take the device_hotplug_lock

Patch series "mm: online/offline_pages called w.o. mem_hotplug_lock", v3.

Reading through the code and studying how mem_hotplug_lock is to be used,
I noticed that there are two places where we can end up calling
device_online()/device_offline() - online_pages()/offline_pages() without
the mem_hotplug_lock.  And there are other places where we call
device_online()/device_offline() without the device_hotplug_lock.

While e.g.
	echo "online" > /sys/devices/system/memory/memory9/state
is fine, e.g.
	echo 1 > /sys/devices/system/memory/memory9/online
Will not take the mem_hotplug_lock. However the device_lock() and
device_hotplug_lock.

E.g.  via memory_probe_store(), we can end up calling
add_memory()->online_pages() without the device_hotplug_lock.  So we can
have concurrent callers in online_pages().  We e.g.  touch in
online_pages() basically unprotected zone->present_pages then.

Looks like there is a longer history to that (see Patch #2 for details),
and fixing it to work the way it was intended is not really possible.  We
would e.g.  have to take the mem_hotplug_lock in device/base/core.c, which
sounds wrong.

Summary: We had a lock inversion on mem_hotplug_lock and device_lock().
More details can be found in patch 3 and patch 6.

I propose the general rules (documentation added in patch 6):

1. add_memory/add_memory_resource() must only be called with
   device_hotplug_lock.
2. remove_memory() must only be called with device_hotplug_lock. This is
   already documented and holds for all callers.
3. device_online()/device_offline() must only be called with
   device_hotplug_lock. This is already documented and true for now in core
   code. Other callers (related to memory hotplug) have to be fixed up.
4. mem_hotplug_lock is taken inside of add_memory/remove_memory/
   online_pages/offline_pages.

To me, this looks way cleaner than what we have right now (and easier to
verify).  And looking at the documentation of remove_memory, using
lock_device_hotplug also for add_memory() feels natural.

This patch (of 6):

remove_memory() is exported right now but requires the
device_hotplug_lock, which is not exported.  So let's provide a variant
that takes the lock and only export that one.

The lock is already held in
	arch/powerpc/platforms/pseries/hotplug-memory.c
	drivers/acpi/acpi_memhotplug.c
	arch/powerpc/platforms/powernv/memtrace.c

Apart from that, there are not other users in the tree.

Link: http://lkml.kernel.org/r/20180925091457.28651-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Rashmica Gupta <rashmica.g@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Rashmica Gupta <rashmica.g@gmail.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Cc: John Allen <jallen@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Juergen Gross <jgross@suse.com>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/powernv/memtrace.c       | 2 +-
 arch/powerpc/platforms/pseries/hotplug-memory.c | 6 +++---
 drivers/acpi/acpi_memhotplug.c                  | 2 +-
 include/linux/memory_hotplug.h                  | 3 ++-
 mm/memory_hotplug.c                             | 9 ++++++++-
 5 files changed, 15 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index a29fdf8a2e56..773623f6bfb1 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -121,7 +121,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size)
 			lock_device_hotplug();
 			end_pfn = base_pfn + nr_pages;
 			for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
-				remove_memory(nid, pfn << PAGE_SHIFT, bytes);
+				__remove_memory(nid, pfn << PAGE_SHIFT, bytes);
 			}
 			unlock_device_hotplug();
 			return base_pfn << PAGE_SHIFT;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 2b796da822c2..d79b31e7a6cf 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -300,7 +300,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz
 	nid = memory_add_physaddr_to_nid(base);
 
 	for (i = 0; i < sections_per_block; i++) {
-		remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
+		__remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
 		base += MIN_MEMORY_BLOCK_SIZE;
 	}
 
@@ -389,7 +389,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 	block_sz = pseries_memory_block_size();
 	nid = memory_add_physaddr_to_nid(lmb->base_addr);
 
-	remove_memory(nid, lmb->base_addr, block_sz);
+	__remove_memory(nid, lmb->base_addr, block_sz);
 
 	/* Update memory regions for memory remove */
 	memblock_remove(lmb->base_addr, block_sz);
@@ -676,7 +676,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 
 	rc = dlpar_online_lmb(lmb);
 	if (rc) {
-		remove_memory(nid, lmb->base_addr, block_sz);
+		__remove_memory(nid, lmb->base_addr, block_sz);
 		invalidate_lmb_associativity_index(lmb);
 	} else {
 		lmb->flags |= DRCONF_MEM_ASSIGNED;
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 6b0d3ef7309c..811148415993 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -282,7 +282,7 @@ static void acpi_memory_remove_memory(struct acpi_memory_device *mem_device)
 			nid = memory_add_physaddr_to_nid(info->start_addr);
 
 		acpi_unbind_memory_blocks(info);
-		remove_memory(nid, info->start_addr, info->length);
+		__remove_memory(nid, info->start_addr, info->length);
 		list_del(&info->list);
 		kfree(info);
 	}
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 34a28227068d..1f096852f479 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -301,6 +301,7 @@ extern bool is_mem_section_removable(unsigned long pfn, unsigned long nr_pages);
 extern void try_offline_node(int nid);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern void remove_memory(int nid, u64 start, u64 size);
+extern void __remove_memory(int nid, u64 start, u64 size);
 
 #else
 static inline bool is_mem_section_removable(unsigned long pfn,
@@ -317,6 +318,7 @@ static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
 }
 
 static inline void remove_memory(int nid, u64 start, u64 size) {}
+static inline void __remove_memory(int nid, u64 start, u64 size) {}
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
 extern void __ref free_area_init_core_hotplug(int nid);
@@ -330,7 +332,6 @@ extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
 		unsigned long nr_pages, struct vmem_altmap *altmap);
 extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
 extern bool is_memblock_offlined(struct memory_block *mem);
-extern void remove_memory(int nid, u64 start, u64 size);
 extern int sparse_add_one_section(struct pglist_data *pgdat,
 		unsigned long start_pfn, struct vmem_altmap *altmap);
 extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 41e326472ef9..8f38e689da25 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1806,7 +1806,7 @@ EXPORT_SYMBOL(try_offline_node);
  * and online/offline operations before this call, as required by
  * try_offline_node().
  */
-void __ref remove_memory(int nid, u64 start, u64 size)
+void __ref __remove_memory(int nid, u64 start, u64 size)
 {
 	int ret;
 
@@ -1835,5 +1835,12 @@ void __ref remove_memory(int nid, u64 start, u64 size)
 
 	mem_hotplug_done();
 }
+
+void remove_memory(int nid, u64 start, u64 size)
+{
+	lock_device_hotplug();
+	__remove_memory(nid, start, size);
+	unlock_device_hotplug();
+}
 EXPORT_SYMBOL_GPL(remove_memory);
 #endif /* CONFIG_MEMORY_HOTREMOVE */
-- 
cgit v1.2.3


From 8df1d0e4a265f25dc1e7e7624ccdbcb4a6630c89 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 30 Oct 2018 15:10:24 -0700
Subject: mm/memory_hotplug: make add_memory() take the device_hotplug_lock

add_memory() currently does not take the device_hotplug_lock, however
is aleady called under the lock from
	arch/powerpc/platforms/pseries/hotplug-memory.c
	drivers/acpi/acpi_memhotplug.c
to synchronize against CPU hot-remove and similar.

In general, we should hold the device_hotplug_lock when adding memory to
synchronize against online/offline request (e.g.  from user space) - which
already resulted in lock inversions due to device_lock() and
mem_hotplug_lock - see 30467e0b3be ("mm, hotplug: fix concurrent memory
hot-add deadlock").  add_memory()/add_memory_resource() will create memory
block devices, so this really feels like the right thing to do.

Holding the device_hotplug_lock makes sure that a memory block device
can really only be accessed (e.g. via .online/.state) from user space,
once the memory has been fully added to the system.

The lock is not held yet in
	drivers/xen/balloon.c
	arch/powerpc/platforms/powernv/memtrace.c
	drivers/s390/char/sclp_cmd.c
	drivers/hv/hv_balloon.c
So, let's either use the locked variants or take the lock.

Don't export add_memory_resource(), as it once was exported to be used by
XEN, which is never built as a module.  If somebody requires it, we also
have to export a locked variant (as device_hotplug_lock is never
exported).

Link: http://lkml.kernel.org/r/20180925091457.28651-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Rashmica Gupta <rashmica.g@gmail.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Len Brown <lenb@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Cc: John Allen <jallen@linux.vnet.ibm.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Mathieu Malaterre <malat@debian.org>
Cc: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: YASUAKI ISHIMATSU <yasu.isimatu@gmail.com>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Kate Stewart <kstewart@linuxfoundation.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Philippe Ombredanne <pombredanne@nexb.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/platforms/pseries/hotplug-memory.c |  2 +-
 drivers/acpi/acpi_memhotplug.c                  |  2 +-
 drivers/base/memory.c                           |  9 +++++++--
 drivers/xen/balloon.c                           |  3 +++
 include/linux/memory_hotplug.h                  |  1 +
 mm/memory_hotplug.c                             | 22 +++++++++++++++++++---
 6 files changed, 32 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index d79b31e7a6cf..2a983b5a52e1 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -668,7 +668,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
 	nid = memory_add_physaddr_to_nid(lmb->base_addr);
 
 	/* Add the memory */
-	rc = add_memory(nid, lmb->base_addr, block_sz);
+	rc = __add_memory(nid, lmb->base_addr, block_sz);
 	if (rc) {
 		invalidate_lmb_associativity_index(lmb);
 		return rc;
diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c
index 811148415993..8fe0960ea572 100644
--- a/drivers/acpi/acpi_memhotplug.c
+++ b/drivers/acpi/acpi_memhotplug.c
@@ -228,7 +228,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
 		if (node < 0)
 			node = memory_add_physaddr_to_nid(info->start_addr);
 
-		result = add_memory(node, info->start_addr, info->length);
+		result = __add_memory(node, info->start_addr, info->length);
 
 		/*
 		 * If the memory block has been used by the kernel, add_memory()
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 817320c7c4c1..40cac122ec73 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -519,15 +519,20 @@ memory_probe_store(struct device *dev, struct device_attribute *attr,
 	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
 		return -EINVAL;
 
+	ret = lock_device_hotplug_sysfs();
+	if (ret)
+		goto out;
+
 	nid = memory_add_physaddr_to_nid(phys_addr);
-	ret = add_memory(nid, phys_addr,
-			 MIN_MEMORY_BLOCK_SIZE * sections_per_block);
+	ret = __add_memory(nid, phys_addr,
+			   MIN_MEMORY_BLOCK_SIZE * sections_per_block);
 
 	if (ret)
 		goto out;
 
 	ret = count;
 out:
+	unlock_device_hotplug();
 	return ret;
 }
 
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index a3f5cbfcd4a1..fdfc64f5acea 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -395,7 +395,10 @@ static enum bp_state reserve_additional_memory(void)
 	 * callers drop the mutex before trying again.
 	 */
 	mutex_unlock(&balloon_mutex);
+	/* add_memory_resource() requires the device_hotplug lock */
+	lock_device_hotplug();
 	rc = add_memory_resource(nid, resource, memhp_auto_online);
+	unlock_device_hotplug();
 	mutex_lock(&balloon_mutex);
 
 	if (rc) {
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 1f096852f479..ffd9cd10fcf3 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -324,6 +324,7 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
 extern void __ref free_area_init_core_hotplug(int nid);
 extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
 		void *arg, int (*func)(struct memory_block *, void *));
+extern int __add_memory(int nid, u64 start, u64 size);
 extern int add_memory(int nid, u64 start, u64 size);
 extern int add_memory_resource(int nid, struct resource *resource, bool online);
 extern int arch_add_memory(int nid, u64 start, u64 size,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8f38e689da25..39cc887bbdcc 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1068,7 +1068,12 @@ static int online_memory_block(struct memory_block *mem, void *arg)
 	return device_online(&mem->dev);
 }
 
-/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
+/*
+ * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
+ * and online/offline operations (triggered e.g. by sysfs).
+ *
+ * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG
+ */
 int __ref add_memory_resource(int nid, struct resource *res, bool online)
 {
 	u64 start, size;
@@ -1137,9 +1142,9 @@ out:
 	mem_hotplug_done();
 	return ret;
 }
-EXPORT_SYMBOL_GPL(add_memory_resource);
 
-int __ref add_memory(int nid, u64 start, u64 size)
+/* requires device_hotplug_lock, see add_memory_resource() */
+int __ref __add_memory(int nid, u64 start, u64 size)
 {
 	struct resource *res;
 	int ret;
@@ -1153,6 +1158,17 @@ int __ref add_memory(int nid, u64 start, u64 size)
 		release_memory_resource(res);
 	return ret;
 }
+
+int add_memory(int nid, u64 start, u64 size)
+{
+	int rc;
+
+	lock_device_hotplug();
+	rc = __add_memory(nid, start, size);
+	unlock_device_hotplug();
+
+	return rc;
+}
 EXPORT_SYMBOL_GPL(add_memory);
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-- 
cgit v1.2.3


From bb58fd7eeffc9bd5d6e2a16cbf0e9e259f8d09f2 Mon Sep 17 00:00:00 2001
From: Mitch Williams <mitch.a.williams@intel.com>
Date: Fri, 19 Oct 2018 14:11:03 -0700
Subject: i40e: Update status codes

Add a few new status code which will be used by the ice driver, and
rename a few to make them more consistent. Error code are mapped to
similar values as in i40e_status.h, so as to be compatible with older
VF drivers not using this status enum.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c |  2 +-
 include/linux/avf/virtchnl.h                       | 12 +++++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 81b0e1f8d14b..ac5698ed0b11 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -3674,7 +3674,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
 		dev_err(&pf->pdev->dev, "Invalid message from VF %d, opcode %d, len %d\n",
 			local_vf_id, v_opcode, msglen);
 		switch (ret) {
-		case VIRTCHNL_ERR_PARAM:
+		case VIRTCHNL_STATUS_ERR_PARAM:
 			return -EPERM;
 		default:
 			return -EINVAL;
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 2c9756bd9c4c..b2488055fd1d 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -62,13 +62,19 @@
 /* Error Codes */
 enum virtchnl_status_code {
 	VIRTCHNL_STATUS_SUCCESS				= 0,
-	VIRTCHNL_ERR_PARAM				= -5,
+	VIRTCHNL_STATUS_ERR_PARAM			= -5,
+	VIRTCHNL_STATUS_ERR_NO_MEMORY			= -18,
 	VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH		= -38,
 	VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR		= -39,
 	VIRTCHNL_STATUS_ERR_INVALID_VF_ID		= -40,
-	VIRTCHNL_STATUS_NOT_SUPPORTED			= -64,
+	VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR		= -53,
+	VIRTCHNL_STATUS_ERR_NOT_SUPPORTED		= -64,
 };
 
+/* Backward compatibility */
+#define VIRTCHNL_ERR_PARAM VIRTCHNL_STATUS_ERR_PARAM
+#define VIRTCHNL_STATUS_NOT_SUPPORTED VIRTCHNL_STATUS_ERR_NOT_SUPPORTED
+
 #define VIRTCHNL_LINK_SPEED_100MB_SHIFT		0x1
 #define VIRTCHNL_LINK_SPEED_1000MB_SHIFT	0x2
 #define VIRTCHNL_LINK_SPEED_10GB_SHIFT		0x3
@@ -831,7 +837,7 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
 	case VIRTCHNL_OP_EVENT:
 	case VIRTCHNL_OP_UNKNOWN:
 	default:
-		return VIRTCHNL_ERR_PARAM;
+		return VIRTCHNL_STATUS_ERR_PARAM;
 	}
 	/* few more checks */
 	if (err_msg_format || valid_len != msglen)
-- 
cgit v1.2.3


From a324e9396ca3d00e1101476ba067b412e0aba232 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 31 Oct 2018 19:20:20 +0100
Subject: EDAC, skx: Fix randconfig builds

The driver depends on the ADXL component glue and selects it. However,
ADXL itself implicitly depends on ACPI and in nonsensical randconfig
builds like this:

  # CONFIG_ACPI is not set
  CONFIG_ACPI_ADXL=y

where ACPI is not enabled, the build fails with:

  drivers/edac/skx_edac.o: In function `skx_mce_check_error':
  skx_edac.c:(.text+0xab): undefined reference to `adxl_decode'
  drivers/edac/skx_edac.o: In function `skx_init':
  skx_edac.c:(.init.text+0x8bf): undefined reference to `adxl_get_component_names'
  make: *** [vmlinux] Error 1

Add stubs for that case so that the build succeeds. CONFIG_ACPI=n
doesn't make any sense for real configurations but this fix will at
least silence randconfig builds.

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
---
 drivers/edac/Kconfig | 2 +-
 include/linux/adxl.h | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index ffd349c12479..68e479b4d9c9 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -234,7 +234,7 @@ config EDAC_SKX
 	depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
 	depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
 	select DMI
-	select ACPI_ADXL
+	select ACPI_ADXL if ACPI
 	help
 	  Support for error detection and correction the Intel
 	  Skylake server Integrated Memory Controllers. If your
diff --git a/include/linux/adxl.h b/include/linux/adxl.h
index 2a629acb4c3f..2d29f55923e3 100644
--- a/include/linux/adxl.h
+++ b/include/linux/adxl.h
@@ -7,7 +7,12 @@
 #ifndef _LINUX_ADXL_H
 #define _LINUX_ADXL_H
 
+#ifdef CONFIG_ACPI_ADXL
 const char * const *adxl_get_component_names(void);
 int adxl_decode(u64 addr, u64 component_values[]);
+#else
+static inline const char * const *adxl_get_component_names(void)  { return NULL; }
+static inline int adxl_decode(u64 addr, u64 component_values[])   { return  -EOPNOTSUPP; }
+#endif
 
 #endif /* _LINUX_ADXL_H */
-- 
cgit v1.2.3


From 439cd39ea136d2c026805264d58a91f36b6b64ca Mon Sep 17 00:00:00 2001
From: Stefano Brivio <sbrivio@redhat.com>
Date: Sat, 14 Jul 2018 21:59:43 +0200
Subject: netfilter: ipset: list:set: Decrease refcount synchronously on
 deletion and replace

Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
when flush/dump set in parallel") postponed decreasing set
reference counters to the RCU callback.

An 'ipset del' command can terminate before the RCU grace period
is elapsed, and if sets are listed before then, the reference
counter shown in userspace will be wrong:

 # ipset create h hash:ip; ipset create l list:set; ipset add l
 # ipset del l h; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 1
 Number of entries: 0
 Members:
 # sleep 1; ipset list h
 Name: h
 Type: hash:ip
 Revision: 4
 Header: family inet hashsize 1024 maxelem 65536
 Size in memory: 88
 References: 0
 Number of entries: 0
 Members:

Fix this by making the reference count update synchronous again.

As a result, when sets are listed, ip_set_name_byindex() might
now fetch a set whose reference count is already zero. Instead
of relying on the reference count to protect against concurrent
set renaming, grab ip_set_ref_lock as reader and copy the name,
while holding the same lock in ip_set_rename() as writer
instead.

Reported-by: Li Shuang <shuali@redhat.com>
Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when flush/dump set in parallel")
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h |  2 +-
 net/netfilter/ipset/ip_set_core.c      | 23 +++++++++++------------
 net/netfilter/ipset/ip_set_list_set.c  | 17 +++++++++++------
 3 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 34fc80f3eb90..1d100efe74ec 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -314,7 +314,7 @@ enum {
 extern ip_set_id_t ip_set_get_byname(struct net *net,
 				     const char *name, struct ip_set **set);
 extern void ip_set_put_byindex(struct net *net, ip_set_id_t index);
-extern const char *ip_set_name_byindex(struct net *net, ip_set_id_t index);
+extern void ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name);
 extern ip_set_id_t ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index);
 extern void ip_set_nfnl_put(struct net *net, ip_set_id_t index);
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index bc4bd247bb7d..fa15a831aeee 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -693,21 +693,20 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
 /* Get the name of a set behind a set index.
- * We assume the set is referenced, so it does exist and
- * can't be destroyed. The set cannot be renamed due to
- * the referencing either.
- *
+ * Set itself is protected by RCU, but its name isn't: to protect against
+ * renaming, grab ip_set_ref_lock as reader (see ip_set_rename()) and copy the
+ * name.
  */
-const char *
-ip_set_name_byindex(struct net *net, ip_set_id_t index)
+void
+ip_set_name_byindex(struct net *net, ip_set_id_t index, char *name)
 {
-	const struct ip_set *set = ip_set_rcu_get(net, index);
+	struct ip_set *set = ip_set_rcu_get(net, index);
 
 	BUG_ON(!set);
-	BUG_ON(set->ref == 0);
 
-	/* Referenced, so it's safe */
-	return set->name;
+	read_lock_bh(&ip_set_ref_lock);
+	strncpy(name, set->name, IPSET_MAXNAMELEN);
+	read_unlock_bh(&ip_set_ref_lock);
 }
 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 
@@ -1153,7 +1152,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
 	if (!set)
 		return -ENOENT;
 
-	read_lock_bh(&ip_set_ref_lock);
+	write_lock_bh(&ip_set_ref_lock);
 	if (set->ref != 0) {
 		ret = -IPSET_ERR_REFERENCED;
 		goto out;
@@ -1170,7 +1169,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
 	strncpy(set->name, name2, IPSET_MAXNAMELEN);
 
 out:
-	read_unlock_bh(&ip_set_ref_lock);
+	write_unlock_bh(&ip_set_ref_lock);
 	return ret;
 }
 
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 072a658fde04..4eef55da0878 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
 {
 	struct set_elem *e = container_of(rcu, struct set_elem, rcu);
 	struct ip_set *set = e->set;
-	struct list_set *map = set->data;
 
-	ip_set_put_byindex(map->net, e->id);
 	ip_set_ext_destroy(set, e);
 	kfree(e);
 }
@@ -158,15 +156,21 @@ __list_set_del_rcu(struct rcu_head * rcu)
 static inline void
 list_set_del(struct ip_set *set, struct set_elem *e)
 {
+	struct list_set *map = set->data;
+
 	set->elements--;
 	list_del_rcu(&e->list);
+	ip_set_put_byindex(map->net, e->id);
 	call_rcu(&e->rcu, __list_set_del_rcu);
 }
 
 static inline void
-list_set_replace(struct set_elem *e, struct set_elem *old)
+list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
 {
+	struct list_set *map = set->data;
+
 	list_replace_rcu(&old->list, &e->list);
+	ip_set_put_byindex(map->net, old->id);
 	call_rcu(&old->rcu, __list_set_del_rcu);
 }
 
@@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	INIT_LIST_HEAD(&e->list);
 	list_set_init_extensions(set, ext, e);
 	if (n)
-		list_set_replace(e, n);
+		list_set_replace(set, e, n);
 	else if (next)
 		list_add_tail_rcu(&e->list, &next->list);
 	else if (prev)
@@ -486,6 +490,7 @@ list_set_list(const struct ip_set *set,
 	const struct list_set *map = set->data;
 	struct nlattr *atd, *nested;
 	u32 i = 0, first = cb->args[IPSET_CB_ARG0];
+	char name[IPSET_MAXNAMELEN];
 	struct set_elem *e;
 	int ret = 0;
 
@@ -504,8 +509,8 @@ list_set_list(const struct ip_set *set,
 		nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
 		if (!nested)
 			goto nla_put_failure;
-		if (nla_put_string(skb, IPSET_ATTR_NAME,
-				   ip_set_name_byindex(map->net, e->id)))
+		ip_set_name_byindex(map->net, e->id, name);
+		if (nla_put_string(skb, IPSET_ATTR_NAME, name))
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, e, true))
 			goto nla_put_failure;
-- 
cgit v1.2.3


From 0962590e553331db2cc0aef2dc35c57f6300dbbe Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 1 Nov 2018 00:05:52 +0100
Subject: bpf: fix partial copy of map_ptr when dst is scalar

ALU operations on pointers such as scalar_reg += map_value_ptr are
handled in adjust_ptr_min_max_vals(). Problem is however that map_ptr
and range in the register state share a union, so transferring state
through dst_reg->range = ptr_reg->range is just buggy as any new
map_ptr in the dst_reg is then truncated (or null) for subsequent
checks. Fix this by adding a raw member and use it for copying state
over to dst_reg.

Fixes: f1174f77b50c ("bpf/verifier: rework value tracking")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Edward Cree <ecree@solarflare.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |  3 +++
 kernel/bpf/verifier.c        | 10 ++++++----
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9e8056ec20fa..d93e89761a8b 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -51,6 +51,9 @@ struct bpf_reg_state {
 		 *   PTR_TO_MAP_VALUE_OR_NULL
 		 */
 		struct bpf_map *map_ptr;
+
+		/* Max size from any of the above. */
+		unsigned long raw;
 	};
 	/* Fixed part of pointer offset, pointer types only */
 	s32 off;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 171a2c88e77d..774fa40a32ae 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3046,7 +3046,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->umax_value = umax_ptr;
 			dst_reg->var_off = ptr_reg->var_off;
 			dst_reg->off = ptr_reg->off + smin_val;
-			dst_reg->range = ptr_reg->range;
+			dst_reg->raw = ptr_reg->raw;
 			break;
 		}
 		/* A new variable offset is created.  Note that off_reg->off
@@ -3076,10 +3076,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		}
 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
 		dst_reg->off = ptr_reg->off;
+		dst_reg->raw = ptr_reg->raw;
 		if (reg_is_pkt_pointer(ptr_reg)) {
 			dst_reg->id = ++env->id_gen;
 			/* something was added to pkt_ptr, set range to zero */
-			dst_reg->range = 0;
+			dst_reg->raw = 0;
 		}
 		break;
 	case BPF_SUB:
@@ -3108,7 +3109,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 			dst_reg->var_off = ptr_reg->var_off;
 			dst_reg->id = ptr_reg->id;
 			dst_reg->off = ptr_reg->off - smin_val;
-			dst_reg->range = ptr_reg->range;
+			dst_reg->raw = ptr_reg->raw;
 			break;
 		}
 		/* A new variable offset is created.  If the subtrahend is known
@@ -3134,11 +3135,12 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 		}
 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
 		dst_reg->off = ptr_reg->off;
+		dst_reg->raw = ptr_reg->raw;
 		if (reg_is_pkt_pointer(ptr_reg)) {
 			dst_reg->id = ++env->id_gen;
 			/* something was added to pkt_ptr, set range to zero */
 			if (smin_val < 0)
-				dst_reg->range = 0;
+				dst_reg->raw = 0;
 		}
 		break;
 	case BPF_AND:
-- 
cgit v1.2.3


From a846446b1914d1e3d996d657754f43fde89bab51 Mon Sep 17 00:00:00 2001
From: Dmitry Safonov <dima@arista.com>
Date: Fri, 12 Oct 2018 14:42:52 +0100
Subject: x86/compat: Adjust in_compat_syscall() to generic code under !COMPAT

The result of in_compat_syscall() can be pictured as:

x86 platform:
    ---------------------------------------------------
    |  Arch\syscall  |  64-bit  |   ia32   |   x32    |
    |-------------------------------------------------|
    |     x86_64     |  false   |   true   |   true   |
    |-------------------------------------------------|
    |      i686      |          |  <true>  |          |
    ---------------------------------------------------

Other platforms:
    -------------------------------------------
    |  Arch\syscall  |  64-bit  |   compat    |
    |-----------------------------------------|
    |     64-bit     |  false   |    true     |
    |-----------------------------------------|
    |    32-bit(?)   |          |   <false>   |
    -------------------------------------------

As seen, the result of in_compat_syscall() on generic 32-bit platform
differs from i686.

There is no reason for in_compat_syscall() == true on native i686.  It also
easy to misread code if the result on native 32-bit platform differs
between arches.

Because of that non arch-specific code has many places with:
    if (IS_ENABLED(CONFIG_COMPAT) && in_compat_syscall())
in different variations.

It looks-like the only non-x86 code which uses in_compat_syscall() not
under CONFIG_COMPAT guard is in amd/amdkfd. But according to the commit
a18069c132cb ("amdkfd: Disable support for 32-bit user processes"), it
actually should be disabled on native i686.

Rename in_compat_syscall() to in_32bit_syscall() for x86-specific code
and make in_compat_syscall() false under !CONFIG_COMPAT.

A follow on patch will clean up generic users which were forced to check
IS_ENABLED(CONFIG_COMPAT) with in_compat_syscall().

Signed-off-by: Dmitry Safonov <dima@arista.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Andy Lutomirski <luto@kernel.org>
Cc: Dmitry Safonov <0x7f454c46@gmail.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-efi@vger.kernel.org
Cc: netdev@vger.kernel.org
Link: https://lkml.kernel.org/r/20181012134253.23266-2-dima@arista.com
---
 arch/x86/include/asm/compat.h |  9 ++++++++-
 arch/x86/include/asm/ftrace.h |  4 +---
 arch/x86/kernel/process_64.c  |  4 ++--
 arch/x86/kernel/sys_x86_64.c  | 11 ++++++-----
 arch/x86/mm/hugetlbpage.c     |  4 ++--
 arch/x86/mm/mmap.c            |  2 +-
 include/linux/compat.h        |  4 ++--
 7 files changed, 22 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index fab4df16a3c4..22c4dfe65992 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -217,11 +217,18 @@ static inline bool in_x32_syscall(void)
 	return false;
 }
 
-static inline bool in_compat_syscall(void)
+static inline bool in_32bit_syscall(void)
 {
 	return in_ia32_syscall() || in_x32_syscall();
 }
+
+#ifdef CONFIG_COMPAT
+static inline bool in_compat_syscall(void)
+{
+	return in_32bit_syscall();
+}
 #define in_compat_syscall in_compat_syscall	/* override the generic impl */
+#endif
 
 struct compat_siginfo;
 int __copy_siginfo_to_user32(struct compat_siginfo __user *to,
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index c18ed65287d5..cf350639e76d 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -76,9 +76,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
 #define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1
 static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
 {
-	if (in_compat_syscall())
-		return true;
-	return false;
+	return in_32bit_syscall();
 }
 #endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
 #endif /* !COMPILE_OFFSETS */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 31b4755369f0..0e0b4288a4b2 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -701,10 +701,10 @@ static void __set_personality_x32(void)
 		current->mm->context.ia32_compat = TIF_X32;
 	current->personality &= ~READ_IMPLIES_EXEC;
 	/*
-	 * in_compat_syscall() uses the presence of the x32 syscall bit
+	 * in_32bit_syscall() uses the presence of the x32 syscall bit
 	 * flag to determine compat status.  The x86 mmap() code relies on
 	 * the syscall bitness so set x32 syscall bit right here to make
-	 * in_compat_syscall() work during exec().
+	 * in_32bit_syscall() work during exec().
 	 *
 	 * Pretend to come from a x32 execve.
 	 */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 6a78d4b36a79..f7476ce23b6e 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -105,7 +105,7 @@ out:
 static void find_start_end(unsigned long addr, unsigned long flags,
 		unsigned long *begin, unsigned long *end)
 {
-	if (!in_compat_syscall() && (flags & MAP_32BIT)) {
+	if (!in_32bit_syscall() && (flags & MAP_32BIT)) {
 		/* This is usually used needed to map code in small
 		   model, so it needs to be in the first 31bit. Limit
 		   it to that.  This means we need to move the
@@ -122,7 +122,7 @@ static void find_start_end(unsigned long addr, unsigned long flags,
 	}
 
 	*begin	= get_mmap_base(1);
-	if (in_compat_syscall())
+	if (in_32bit_syscall())
 		*end = task_size_32bit();
 	else
 		*end = task_size_64bit(addr > DEFAULT_MAP_WINDOW);
@@ -193,7 +193,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 		return addr;
 
 	/* for MAP_32BIT mappings we force the legacy mmap base */
-	if (!in_compat_syscall() && (flags & MAP_32BIT))
+	if (!in_32bit_syscall() && (flags & MAP_32BIT))
 		goto bottomup;
 
 	/* requesting a specific address */
@@ -217,9 +217,10 @@ get_unmapped_area:
 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
 	 * in the full address space.
 	 *
-	 * !in_compat_syscall() check to avoid high addresses for x32.
+	 * !in_32bit_syscall() check to avoid high addresses for x32
+	 * (and make it no op on native i386).
 	 */
-	if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
 
 	info.align_mask = 0;
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 00b296617ca4..92e4c4b85bba 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -92,7 +92,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
 	 * in the full address space.
 	 */
-	info.high_limit = in_compat_syscall() ?
+	info.high_limit = in_32bit_syscall() ?
 		task_size_32bit() : task_size_64bit(addr > DEFAULT_MAP_WINDOW);
 
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
@@ -116,7 +116,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
 	 * If hint address is above DEFAULT_MAP_WINDOW, look for unmapped area
 	 * in the full address space.
 	 */
-	if (addr > DEFAULT_MAP_WINDOW && !in_compat_syscall())
+	if (addr > DEFAULT_MAP_WINDOW && !in_32bit_syscall())
 		info.high_limit += TASK_SIZE_MAX - DEFAULT_MAP_WINDOW;
 
 	info.align_mask = PAGE_MASK & ~huge_page_mask(h);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1e95d57760cf..db3165714521 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -166,7 +166,7 @@ unsigned long get_mmap_base(int is_legacy)
 	struct mm_struct *mm = current->mm;
 
 #ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
-	if (in_compat_syscall()) {
+	if (in_32bit_syscall()) {
 		return is_legacy ? mm->mmap_compat_legacy_base
 				 : mm->mmap_compat_base;
 	}
diff --git a/include/linux/compat.h b/include/linux/compat.h
index d30e4dbd4be2..f1ef24c68fcc 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -1029,9 +1029,9 @@ int kcompat_sys_fstatfs64(unsigned int fd, compat_size_t sz,
 #else /* !CONFIG_COMPAT */
 
 #define is_compat_task() (0)
-#ifndef in_compat_syscall
+/* Ensure no one redefines in_compat_syscall() under !CONFIG_COMPAT */
+#define in_compat_syscall in_compat_syscall
 static inline bool in_compat_syscall(void) { return false; }
-#endif
 
 #endif /* CONFIG_COMPAT */
 
-- 
cgit v1.2.3


From c3be6577d82a9f0163eb1e2c37a477414d12a209 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 1 Nov 2018 17:51:34 +0000
Subject: SUNRPC: Use atomic(64)_t for seq_send(64)

The seq_send & seq_send64 fields in struct krb5_ctx are used as
atomically incrementing counters. This is implemented using cmpxchg() &
cmpxchg64() to implement what amount to custom versions of
atomic_fetch_inc() & atomic64_fetch_inc().

Besides the duplication, using cmpxchg64() has another major drawback in
that some 32 bit architectures don't provide it. As such commit
571ed1fd2390 ("SUNRPC: Replace krb5_seq_lock with a lockless scheme")
resulted in build failures for some architectures.

Change seq_send to be an atomic_t and seq_send64 to be an atomic64_t,
then use atomic(64)_* functions to manipulate the values. The atomic64_t
type & associated functions are provided even on architectures which
lack real 64 bit atomic memory access via CONFIG_GENERIC_ATOMIC64 which
uses spinlocks to serialize access. This fixes the build failures for
architectures lacking cmpxchg64().

A potential alternative that was raised would be to provide cmpxchg64()
on the 32 bit architectures that currently lack it, using spinlocks.
However this would provide a version of cmpxchg64() with semantics a
little different to the implementations on architectures with real 64
bit atomics - the spinlock-based implementation would only work if all
access to the memory used with cmpxchg64() is *always* performed using
cmpxchg64(). That is not currently a requirement for users of
cmpxchg64(), and making it one seems questionable. As such avoiding
cmpxchg64() outside of architecture-specific code seems best,
particularly in cases where atomic64_t seems like a better fit anyway.

The CONFIG_GENERIC_ATOMIC64 implementation of atomic64_* functions will
use spinlocks & so faces the same issue, but with the key difference
that the memory backing an atomic64_t ought to always be accessed via
the atomic64_* functions anyway making the issue moot.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Fixes: 571ed1fd2390 ("SUNRPC: Replace krb5_seq_lock with a lockless scheme")
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Anna Schumaker <anna.schumaker@netapp.com>
Cc: J. Bruce Fields <bfields@fieldses.org>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: David S. Miller <davem@davemloft.net>
Cc: linux-nfs@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/gss_krb5.h     |  7 ++-----
 net/sunrpc/auth_gss/gss_krb5_mech.c | 16 ++++++++++------
 net/sunrpc/auth_gss/gss_krb5_seal.c | 28 ++--------------------------
 net/sunrpc/auth_gss/gss_krb5_wrap.c |  4 ++--
 4 files changed, 16 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 69f749afa617..4162de72e95c 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -107,8 +107,8 @@ struct krb5_ctx {
 	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
 	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
-	u32			seq_send;
-	u64			seq_send64;
+	atomic_t		seq_send;
+	atomic64_t		seq_send64;
 	struct xdr_netobj	mech_used;
 	u8			initiator_sign[GSS_KRB5_MAX_KEYLEN];
 	u8			acceptor_sign[GSS_KRB5_MAX_KEYLEN];
@@ -118,9 +118,6 @@ struct krb5_ctx {
 	u8			acceptor_integ[GSS_KRB5_MAX_KEYLEN];
 };
 
-extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx);
-extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx);
-
 /* The length of the Kerberos GSS token header */
 #define GSS_KRB5_TOK_HDR_LEN	(16)
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7bb2514aadd9..71cb29dc86c2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -275,6 +275,7 @@ out_err:
 static int
 gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 {
+	u32 seq_send;
 	int tmp;
 
 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
@@ -316,9 +317,10 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
 	if (IS_ERR(p))
 		goto out_err;
-	p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send));
+	p = simple_get_bytes(p, end, &seq_send, sizeof(seq_send));
 	if (IS_ERR(p))
 		goto out_err;
+	atomic_set(&ctx->seq_send, seq_send);
 	p = simple_get_netobj(p, end, &ctx->mech_used);
 	if (IS_ERR(p))
 		goto out_err;
@@ -610,6 +612,7 @@ static int
 gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 		gfp_t gfp_mask)
 {
+	u64 seq_send64;
 	int keylen;
 
 	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
@@ -620,14 +623,15 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
 	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
 	if (IS_ERR(p))
 		goto out_err;
-	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
+	p = simple_get_bytes(p, end, &seq_send64, sizeof(seq_send64));
 	if (IS_ERR(p))
 		goto out_err;
+	atomic64_set(&ctx->seq_send64, seq_send64);
 	/* set seq_send for use by "older" enctypes */
-	ctx->seq_send = ctx->seq_send64;
-	if (ctx->seq_send64 != ctx->seq_send) {
-		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
-			(unsigned long)ctx->seq_send64, ctx->seq_send);
+	atomic_set(&ctx->seq_send, seq_send64);
+	if (seq_send64 != atomic_read(&ctx->seq_send)) {
+		dprintk("%s: seq_send64 %llx, seq_send %x overflow?\n", __func__,
+			seq_send64, atomic_read(&ctx->seq_send));
 		p = ERR_PTR(-EINVAL);
 		goto out_err;
 	}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index b4adeb06660b..48fe4a591b54 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -123,30 +123,6 @@ setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
 	return krb5_hdr;
 }
 
-u32
-gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx)
-{
-	u32 old, seq_send = READ_ONCE(ctx->seq_send);
-
-	do {
-		old = seq_send;
-		seq_send = cmpxchg(&ctx->seq_send, old, old + 1);
-	} while (old != seq_send);
-	return seq_send;
-}
-
-u64
-gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
-{
-	u64 old, seq_send = READ_ONCE(ctx->seq_send);
-
-	do {
-		old = seq_send;
-		seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
-	} while (old != seq_send);
-	return seq_send;
-}
-
 static u32
 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
@@ -177,7 +153,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-	seq_send = gss_seq_send_fetch_and_inc(ctx);
+	seq_send = atomic_fetch_inc(&ctx->seq_send);
 
 	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
 			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
@@ -205,7 +181,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 
 	/* Set up the sequence number. Now 64-bits in clear
 	 * text and w/o direction indicator */
-	seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx));
+	seq_send_be64 = cpu_to_be64(atomic64_fetch_inc(&ctx->seq_send64));
 	memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
 
 	if (ctx->initiate) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 41cb294cd071..6af6f119d9c1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -228,7 +228,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
-	seq_send = gss_seq_send_fetch_and_inc(kctx);
+	seq_send = atomic_fetch_inc(&kctx->seq_send);
 
 	/* XXX would probably be more efficient to compute checksum
 	 * and encrypt at the same time: */
@@ -475,7 +475,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
 	*be16ptr++ = 0;
 
 	be64ptr = (__be64 *)be16ptr;
-	*be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx));
+	*be64ptr = cpu_to_be64(atomic64_fetch_inc(&kctx->seq_send64));
 
 	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
 	if (err)
-- 
cgit v1.2.3


From b5f2954d30c77649bce9c27e7a0a94299d9cfdf8 Mon Sep 17 00:00:00 2001
From: Dennis Zhou <dennis@kernel.org>
Date: Thu, 1 Nov 2018 17:24:10 -0400
Subject: blkcg: revert blkcg cleanups series

This reverts a series committed earlier due to null pointer exception
bug report in [1]. It seems there are edge case interactions that I did
not consider and will need some time to understand what causes the
adverse interactions.

The original series can be found in [2] with a follow up series in [3].

[1] https://www.spinics.net/lists/cgroups/msg20719.html
[2] https://lore.kernel.org/lkml/20180911184137.35897-1-dennisszhou@gmail.com/
[3] https://lore.kernel.org/lkml/20181020185612.51587-1-dennis@kernel.org/

This reverts the following commits:
d459d853c2ed, b2c3fa546705, 101246ec02b5, b3b9f24f5fcc, e2b0989954ae,
f0fcb3ec89f3, c839e7a03f92, bdc2491708c4, 74b7c02a9bc1, 5bf9a1f3b4ef,
a7b39b4e961c, 07b05bcc3213, 49f4c2dc2b50, 27e6fa996c53

Signed-off-by: Dennis Zhou <dennis@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/admin-guide/cgroup-v2.rst |   8 +-
 block/bfq-cgroup.c                      |   4 +-
 block/bfq-iosched.c                     |   2 +-
 block/bio.c                             | 174 +++++++++-----------------------
 block/blk-cgroup.c                      | 123 +++++++---------------
 block/blk-core.c                        |   1 -
 block/blk-iolatency.c                   |  26 ++++-
 block/blk-throttle.c                    |  13 ++-
 block/bounce.c                          |   4 +-
 block/cfq-iosched.c                     |   4 +-
 drivers/block/loop.c                    |   5 +-
 drivers/md/raid0.c                      |   2 +-
 fs/buffer.c                             |  10 +-
 fs/ext4/page-io.c                       |   2 +-
 include/linux/bio.h                     |  26 ++---
 include/linux/blk-cgroup.h              | 145 +++++++++-----------------
 include/linux/blk_types.h               |   1 +
 include/linux/cgroup.h                  |   2 -
 include/linux/writeback.h               |   5 +-
 kernel/cgroup/cgroup.c                  |  48 ++-------
 kernel/trace/blktrace.c                 |   4 +-
 mm/page_io.c                            |   2 +-
 22 files changed, 208 insertions(+), 403 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index caf36105a1c7..184193bcb262 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1857,10 +1857,8 @@ following two functions.
 
   wbc_init_bio(@wbc, @bio)
 	Should be called for each bio carrying writeback data and
-	associates the bio with the inode's owner cgroup and the
-	corresponding request queue.  This must be called after
-	a queue (device) has been associated with the bio and
-	before submission.
+	associates the bio with the inode's owner cgroup.  Can be
+	called anytime between bio allocation and submission.
 
   wbc_account_io(@wbc, @page, @bytes)
 	Should be called for each data segment being written out.
@@ -1879,7 +1877,7 @@ the configuration, the bio may be executed at a lower priority and if
 the writeback session is holding shared resources, e.g. a journal
 entry, may lead to priority inversion.  There is no one easy solution
 for the problem.  Filesystems can try to work around specific problem
-cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
+cases by skipping wbc_init_bio() or using bio_associate_blkcg()
 directly.
 
 
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d9a7916ff0ab..9fe5952d117d 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 	uint64_t serial_nr;
 
 	rcu_read_lock();
-	serial_nr = __bio_blkcg(bio)->css.serial_nr;
+	serial_nr = bio_blkcg(bio)->css.serial_nr;
 
 	/*
 	 * Check whether blkcg has changed.  The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
 	if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
 		goto out;
 
-	bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
+	bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
 	/*
 	 * Update blkg_path for bfq_log_* functions. We cache this
 	 * path, and update it here, for the following
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 6075100f03a5..3a27d31fcda6 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
 
 	rcu_read_lock();
 
-	bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
+	bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
 	if (!bfqg) {
 		bfqq = &bfqd->oom_bfqq;
 		goto out;
diff --git a/block/bio.c b/block/bio.c
index bbfeb4ee2892..4a5a036268fb 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
 	bio->bi_iter = bio_src->bi_iter;
 	bio->bi_io_vec = bio_src->bi_io_vec;
 
-	bio_clone_blkg_association(bio, bio_src);
-
-	blkcg_bio_issue_init(bio);
+	bio_clone_blkcg_association(bio, bio_src);
 }
 EXPORT_SYMBOL(__bio_clone_fast);
 
@@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src);
 
 #ifdef CONFIG_BLK_CGROUP
 
-/**
- * bio_associate_blkg - associate a bio with the a blkg
- * @bio: target bio
- * @blkg: the blkg to associate
- *
- * This tries to associate @bio with the specified blkg.  Association failure
- * is handled by walking up the blkg tree.  Therefore, the blkg associated can
- * be anything between @blkg and the root_blkg.  This situation only happens
- * when a cgroup is dying and then the remaining bios will spill to the closest
- * alive blkg.
- *
- * A reference will be taken on the @blkg and will be released when @bio is
- * freed.
- */
-int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
-{
-	if (unlikely(bio->bi_blkg))
-		return -EBUSY;
-	bio->bi_blkg = blkg_tryget_closest(blkg);
-	return 0;
-}
-
-/**
- * __bio_associate_blkg_from_css - internal blkg association function
- *
- * This in the core association function that all association paths rely on.
- * A blkg reference is taken which is released upon freeing of the bio.
- */
-static int __bio_associate_blkg_from_css(struct bio *bio,
-					 struct cgroup_subsys_state *css)
-{
-	struct request_queue *q = bio->bi_disk->queue;
-	struct blkcg_gq *blkg;
-	int ret;
-
-	rcu_read_lock();
-
-	if (!css || !css->parent)
-		blkg = q->root_blkg;
-	else
-		blkg = blkg_lookup_create(css_to_blkcg(css), q);
-
-	ret = bio_associate_blkg(bio, blkg);
-
-	rcu_read_unlock();
-	return ret;
-}
-
-/**
- * bio_associate_blkg_from_css - associate a bio with a specified css
- * @bio: target bio
- * @css: target css
- *
- * Associate @bio with the blkg found by combining the css's blkg and the
- * request_queue of the @bio.  This falls back to the queue's root_blkg if
- * the association fails with the css.
- */
-int bio_associate_blkg_from_css(struct bio *bio,
-				struct cgroup_subsys_state *css)
-{
-	if (unlikely(bio->bi_blkg))
-		return -EBUSY;
-	return __bio_associate_blkg_from_css(bio, css);
-}
-EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
-
 #ifdef CONFIG_MEMCG
 /**
- * bio_associate_blkg_from_page - associate a bio with the page's blkg
+ * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
  * @bio: target bio
  * @page: the page to lookup the blkcg from
  *
- * Associate @bio with the blkg from @page's owning memcg and the respective
- * request_queue.  If cgroup_e_css returns NULL, fall back to the queue's
- * root_blkg.
- *
- * Note: this must be called after bio has an associated device.
+ * Associate @bio with the blkcg from @page's owning memcg.  This works like
+ * every other associate function wrt references.
  */
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
 {
-	struct cgroup_subsys_state *css;
-	int ret;
+	struct cgroup_subsys_state *blkcg_css;
 
-	if (unlikely(bio->bi_blkg))
+	if (unlikely(bio->bi_css))
 		return -EBUSY;
 	if (!page->mem_cgroup)
 		return 0;
-
-	rcu_read_lock();
-
-	css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
-
-	ret = __bio_associate_blkg_from_css(bio, css);
-
-	rcu_read_unlock();
-	return ret;
+	blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
+				     &io_cgrp_subsys);
+	bio->bi_css = blkcg_css;
+	return 0;
 }
 #endif /* CONFIG_MEMCG */
 
 /**
- * bio_associate_create_blkg - associate a bio with a blkg from q
- * @q: request_queue where bio is going
+ * bio_associate_blkcg - associate a bio with the specified blkcg
  * @bio: target bio
+ * @blkcg_css: css of the blkcg to associate
+ *
+ * Associate @bio with the blkcg specified by @blkcg_css.  Block layer will
+ * treat @bio as if it were issued by a task which belongs to the blkcg.
  *
- * Associate @bio with the blkg found from the bio's css and the request_queue.
- * If one is not found, bio_lookup_blkg creates the blkg.  This falls back to
- * the queue's root_blkg if association fails.
+ * This function takes an extra reference of @blkcg_css which will be put
+ * when @bio is released.  The caller must own @bio and is responsible for
+ * synchronizing calls to this function.
  */
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
 {
-	struct cgroup_subsys_state *css;
-	int ret = 0;
-
-	/* someone has already associated this bio with a blkg */
-	if (bio->bi_blkg)
-		return ret;
-
-	rcu_read_lock();
-
-	css = blkcg_css();
-
-	ret = __bio_associate_blkg_from_css(bio, css);
-
-	rcu_read_unlock();
-	return ret;
+	if (unlikely(bio->bi_css))
+		return -EBUSY;
+	css_get(blkcg_css);
+	bio->bi_css = blkcg_css;
+	return 0;
 }
+EXPORT_SYMBOL_GPL(bio_associate_blkcg);
 
 /**
- * bio_reassociate_blkg - reassociate a bio with a blkg from q
- * @q: request_queue where bio is going
+ * bio_associate_blkg - associate a bio with the specified blkg
  * @bio: target bio
+ * @blkg: the blkg to associate
  *
- * When submitting a bio, multiple recursive calls to make_request() may occur.
- * This causes the initial associate done in blkcg_bio_issue_check() to be
- * incorrect and reference the prior request_queue.  This performs reassociation
- * when this situation happens.
+ * Associate @bio with the blkg specified by @blkg.  This is the queue specific
+ * blkcg information associated with the @bio, a reference will be taken on the
+ * @blkg and will be freed when the bio is freed.
  */
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
+int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 {
-	if (bio->bi_blkg) {
-		blkg_put(bio->bi_blkg);
-		bio->bi_blkg = NULL;
-	}
-
-	return bio_associate_create_blkg(q, bio);
+	if (unlikely(bio->bi_blkg))
+		return -EBUSY;
+	if (!blkg_try_get(blkg))
+		return -ENODEV;
+	bio->bi_blkg = blkg;
+	return 0;
 }
 
 /**
@@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio)
 		put_io_context(bio->bi_ioc);
 		bio->bi_ioc = NULL;
 	}
+	if (bio->bi_css) {
+		css_put(bio->bi_css);
+		bio->bi_css = NULL;
+	}
 	if (bio->bi_blkg) {
 		blkg_put(bio->bi_blkg);
 		bio->bi_blkg = NULL;
@@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio)
 }
 
 /**
- * bio_clone_blkg_association - clone blkg association from src to dst bio
+ * bio_clone_blkcg_association - clone blkcg association from src to dst bio
  * @dst: destination bio
  * @src: source bio
  */
-void bio_clone_blkg_association(struct bio *dst, struct bio *src)
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
 {
-	if (src->bi_blkg)
-		bio_associate_blkg(dst, src->bi_blkg);
+	if (src->bi_css)
+		WARN_ON(bio_associate_blkcg(dst, src->bi_css));
 }
-EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
+EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
 #endif /* CONFIG_BLK_CGROUP */
 
 static void __init biovec_init_slabs(void)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 992da5592c6e..c630e02836a8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
 	kfree(blkg);
 }
 
-static void __blkg_release(struct rcu_head *rcu)
-{
-	struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
-
-	percpu_ref_exit(&blkg->refcnt);
-
-	/* release the blkcg and parent blkg refs this blkg has been holding */
-	css_put(&blkg->blkcg->css);
-	if (blkg->parent)
-		blkg_put(blkg->parent);
-
-	wb_congested_put(blkg->wb_congested);
-
-	blkg_free(blkg);
-}
-
-/*
- * A group is RCU protected, but having an rcu lock does not mean that one
- * can access all the fields of blkg and assume these are valid.  For
- * example, don't try to follow throtl_data and request queue links.
- *
- * Having a reference to blkg under an rcu allows accesses to only values
- * local to groups like group stats and group rate limits.
- */
-static void blkg_release(struct percpu_ref *ref)
-{
-	struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
-
-	call_rcu(&blkg->rcu_head, __blkg_release);
-}
-
 /**
  * blkg_alloc - allocate a blkg
  * @blkcg: block cgroup the new blkg is associated with
@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
 	blkg->q = q;
 	INIT_LIST_HEAD(&blkg->q_node);
 	blkg->blkcg = blkcg;
+	atomic_set(&blkg->refcnt, 1);
 
 	/* root blkg uses @q->root_rl, init rl only for !root blkgs */
 	if (blkcg != &blkcg_root) {
@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 		blkg_get(blkg->parent);
 	}
 
-	ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
-			      GFP_NOWAIT | __GFP_NOWARN);
-	if (ret)
-		goto err_cancel_ref;
-
 	/* invoke per-policy init */
 	for (i = 0; i < BLKCG_MAX_POLS; i++) {
 		struct blkcg_policy *pol = blkcg_policy[i];
@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
 	blkg_put(blkg);
 	return ERR_PTR(ret);
 
-err_cancel_ref:
-	percpu_ref_exit(&blkg->refcnt);
 err_put_congested:
 	wb_congested_put(wb_congested);
 err_put_css:
@@ -296,7 +259,7 @@ err_free_blkg:
 }
 
 /**
- * __blkg_lookup_create - lookup blkg, try to create one if not there
+ * blkg_lookup_create - lookup blkg, try to create one if not there
  * @blkcg: blkcg of interest
  * @q: request_queue of interest
  *
@@ -305,11 +268,12 @@ err_free_blkg:
  * that all non-root blkg's have access to the parent blkg.  This function
  * should be called under RCU read lock and @q->queue_lock.
  *
- * Returns the blkg or the closest blkg if blkg_create fails as it walks
- * down from root.
+ * Returns pointer to the looked up or created blkg on success, ERR_PTR()
+ * value on error.  If @q is dead, returns ERR_PTR(-EINVAL).  If @q is not
+ * dead and bypassing, returns ERR_PTR(-EBUSY).
  */
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-				      struct request_queue *q)
+struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
+				    struct request_queue *q)
 {
 	struct blkcg_gq *blkg;
 
@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 	 * we shouldn't allow anything to go through for a bypassing queue.
 	 */
 	if (unlikely(blk_queue_bypass(q)))
-		return q->root_blkg;
+		return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
 
 	blkg = __blkg_lookup(blkcg, q, true);
 	if (blkg)
@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
 
 	/*
 	 * Create blkgs walking down from blkcg_root to @blkcg, so that all
-	 * non-root blkgs have access to their parents.  Returns the closest
-	 * blkg to the intended blkg should blkg_create() fail.
+	 * non-root blkgs have access to their parents.
 	 */
 	while (true) {
 		struct blkcg *pos = blkcg;
 		struct blkcg *parent = blkcg_parent(blkcg);
-		struct blkcg_gq *ret_blkg = q->root_blkg;
-
-		while (parent) {
-			blkg = __blkg_lookup(parent, q, false);
-			if (blkg) {
-				/* remember closest blkg */
-				ret_blkg = blkg;
-				break;
-			}
+
+		while (parent && !__blkg_lookup(parent, q, false)) {
 			pos = parent;
 			parent = blkcg_parent(parent);
 		}
 
 		blkg = blkg_create(pos, q, NULL);
-		if (IS_ERR(blkg))
-			return ret_blkg;
-		if (pos == blkcg)
+		if (pos == blkcg || IS_ERR(blkg))
 			return blkg;
 	}
 }
 
-/**
- * blkg_lookup_create - find or create a blkg
- * @blkcg: target block cgroup
- * @q: target request_queue
- *
- * This looks up or creates the blkg representing the unique pair
- * of the blkcg and the request_queue.
- */
-struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
-				    struct request_queue *q)
-{
-	struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
-	unsigned long flags;
-
-	if (unlikely(!blkg)) {
-		spin_lock_irqsave(q->queue_lock, flags);
-
-		blkg = __blkg_lookup_create(blkcg, q);
-
-		spin_unlock_irqrestore(q->queue_lock, flags);
-	}
-
-	return blkg;
-}
-
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	 * Put the reference taken at the time of creation so that when all
 	 * queues are gone, group can be destroyed.
 	 */
-	percpu_ref_kill(&blkg->refcnt);
+	blkg_put(blkg);
 }
 
 /**
@@ -451,6 +380,29 @@ static void blkg_destroy_all(struct request_queue *q)
 	q->root_rl.blkg = NULL;
 }
 
+/*
+ * A group is RCU protected, but having an rcu lock does not mean that one
+ * can access all the fields of blkg and assume these are valid.  For
+ * example, don't try to follow throtl_data and request queue links.
+ *
+ * Having a reference to blkg under an rcu allows accesses to only values
+ * local to groups like group stats and group rate limits.
+ */
+void __blkg_release_rcu(struct rcu_head *rcu_head)
+{
+	struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
+
+	/* release the blkcg and parent blkg refs this blkg has been holding */
+	css_put(&blkg->blkcg->css);
+	if (blkg->parent)
+		blkg_put(blkg->parent);
+
+	wb_congested_put(blkg->wb_congested);
+
+	blkg_free(blkg);
+}
+EXPORT_SYMBOL_GPL(__blkg_release_rcu);
+
 /*
  * The next function used by blk_queue_for_each_rl().  It's a bit tricky
  * because the root blkg uses @q->root_rl instead of its own rl.
@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
 	blkg = blkg_lookup(blkcg, q);
 	if (!blkg)
 		goto out;
-	if (!blkg_tryget(blkg))
+	blkg = blkg_try_get(blkg);
+	if (!blkg)
 		goto out;
 	rcu_read_unlock();
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 26a5dac80ed9..ce12515f9b9b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2435,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
 			if (q)
 				blk_queue_exit(q);
 			q = bio->bi_disk->queue;
-			bio_reassociate_blkg(q, bio);
 			flags = 0;
 			if (bio->bi_opf & REQ_NOWAIT)
 				flags = BLK_MQ_REQ_NOWAIT;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 35c48d7b8f78..bb240a0c1309 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -480,12 +480,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
 				     spinlock_t *lock)
 {
 	struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
-	struct blkcg_gq *blkg = bio->bi_blkg;
+	struct blkcg *blkcg;
+	struct blkcg_gq *blkg;
+	struct request_queue *q = rqos->q;
 	bool issue_as_root = bio_issue_as_root_blkg(bio);
 
 	if (!blk_iolatency_enabled(blkiolat))
 		return;
 
+	rcu_read_lock();
+	blkcg = bio_blkcg(bio);
+	bio_associate_blkcg(bio, &blkcg->css);
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg)) {
+		if (!lock)
+			spin_lock_irq(q->queue_lock);
+		blkg = blkg_lookup_create(blkcg, q);
+		if (IS_ERR(blkg))
+			blkg = NULL;
+		if (!lock)
+			spin_unlock_irq(q->queue_lock);
+	}
+	if (!blkg)
+		goto out;
+
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+	bio_associate_blkg(bio, blkg);
+out:
+	rcu_read_unlock();
 	while (blkg && blkg->parent) {
 		struct iolatency_grp *iolat = blkg_to_lat(blkg);
 		if (!iolat) {
@@ -706,7 +728,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
 		 * We could be exiting, don't access the pd unless we have a
 		 * ref on the blkg.
 		 */
-		if (!blkg_tryget(blkg))
+		if (!blkg_try_get(blkg))
 			continue;
 
 		iolat = blkg_to_lat(blkg);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4bda70e8db48..db1a3a2ae006 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 }
 #endif
 
+static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
+{
+#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
+	/* fallback to root_blkg if we fail to get a blkg ref */
+	if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
+		bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
+	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
+#endif
+}
+
 bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 		    struct bio *bio)
 {
 	struct throtl_qnode *qn = NULL;
-	struct throtl_grp *tg = blkg_to_tg(blkg);
+	struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
 	struct throtl_service_queue *sq;
 	bool rw = bio_data_dir(bio);
 	bool throttled = false;
@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
 	if (unlikely(blk_queue_bypass(q)))
 		goto out_unlock;
 
+	blk_throtl_assoc_bio(tg, bio);
 	blk_throtl_update_idletime(tg);
 
 	sq = &tg->service_queue;
diff --git a/block/bounce.c b/block/bounce.c
index ec0d99995f5f..418677dcec60 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
 		}
 	}
 
-	bio_clone_blkg_association(bio, bio_src);
-
-	blkcg_bio_issue_init(bio);
+	bio_clone_blkcg_association(bio, bio_src);
 
 	return bio;
 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a3d87dd3c1a..ed41aa978c4a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
 	uint64_t serial_nr;
 
 	rcu_read_lock();
-	serial_nr = __bio_blkcg(bio)->css.serial_nr;
+	serial_nr = bio_blkcg(bio)->css.serial_nr;
 	rcu_read_unlock();
 
 	/*
@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
 	struct cfq_group *cfqg;
 
 	rcu_read_lock();
-	cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
+	cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
 	if (!cfqg) {
 		cfqq = &cfqd->oom_cfqq;
 		goto out;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index abad6d15f956..ea9debf59b22 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,7 +77,6 @@
 #include <linux/falloc.h>
 #include <linux/uio.h>
 #include <linux/ioprio.h>
-#include <linux/blk-cgroup.h>
 
 #include "loop.h"
 
@@ -1761,8 +1760,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 	/* always use the first bio's css */
 #ifdef CONFIG_BLK_CGROUP
-	if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
-		cmd->css = &bio_blkcg(rq->bio)->css;
+	if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
+		cmd->css = rq->bio->bi_css;
 		css_get(cmd->css);
 	} else
 #endif
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f3fb5bb8c82a..ac1cffd2a09b 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
 		    !discard_bio)
 			continue;
 		bio_chain(discard_bio, bio);
-		bio_clone_blkg_association(discard_bio, bio);
+		bio_clone_blkcg_association(discard_bio, bio);
 		if (mddev->gendisk)
 			trace_block_bio_remap(bdev_get_queue(rdev->bdev),
 				discard_bio, disk_devt(mddev->gendisk),
diff --git a/fs/buffer.c b/fs/buffer.c
index 109f55196866..6f1ae3ac9789 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 	 */
 	bio = bio_alloc(GFP_NOIO, 1);
 
+	if (wbc) {
+		wbc_init_bio(wbc, bio);
+		wbc_account_io(wbc, bh->b_page, bh->b_size);
+	}
+
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_write_hint = write_hint;
@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
 		op_flags |= REQ_PRIO;
 	bio_set_op_attrs(bio, op, op_flags);
 
-	if (wbc) {
-		wbc_init_bio(wbc, bio);
-		wbc_account_io(wbc, bh->b_page, bh->b_size);
-	}
-
 	submit_bio(bio);
 	return 0;
 }
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 2aa62d58d8dd..db7590178dfc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
 	bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
 	if (!bio)
 		return -ENOMEM;
+	wbc_init_bio(io->io_wbc, bio);
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio_set_dev(bio, bh->b_bdev);
 	bio->bi_end_io = ext4_end_bio;
 	bio->bi_private = ext4_get_io_end(io->io_end);
 	io->io_bio = bio;
 	io->io_next_block = bh->b_blocknr;
-	wbc_init_bio(io->io_wbc, bio);
 	return 0;
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b47c7f716731..056fb627edb3 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -503,31 +503,23 @@ do {						\
 	disk_devt((bio)->bi_disk)
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
 #else
-static inline int bio_associate_blkg_from_page(struct bio *bio,
-					       struct page *page) { return 0; }
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+						struct page *page) {  return 0; }
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
-int bio_associate_blkg_from_css(struct bio *bio,
-				struct cgroup_subsys_state *css);
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkg_association(struct bio *dst, struct bio *src);
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkg_from_css(struct bio *bio,
-					      struct cgroup_subsys_state *css)
-{ return 0; }
-static inline int bio_associate_create_blkg(struct request_queue *q,
-					    struct bio *bio) { return 0; }
-static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
-{ return 0; }
+static inline int bio_associate_blkcg(struct bio *bio,
+			struct cgroup_subsys_state *blkcg_css) { return 0; }
 static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkg_association(struct bio *dst,
-					      struct bio *src) { }
+static inline void bio_clone_blkcg_association(struct bio *dst,
+			struct bio *src) { }
 #endif	/* CONFIG_BLK_CGROUP */
 
 #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1e76ceebeb5d..6d766a19f2bb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -126,7 +126,7 @@ struct blkcg_gq {
 	struct request_list		rl;
 
 	/* reference count */
-	struct percpu_ref		refcnt;
+	atomic_t			refcnt;
 
 	/* is this blkg online? protected by both blkcg and q locks */
 	bool				online;
@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
 
 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 				      struct request_queue *q, bool update_hint);
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-				      struct request_queue *q);
 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 				    struct request_queue *q);
 int blkcg_init_queue(struct request_queue *q);
@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		   char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
-	struct cgroup_subsys_state *css;
-
-	css = kthread_blkcg();
-	if (css)
-		return css;
-	return task_css(current, io_cgrp_id);
-}
 
 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 {
 	return css ? container_of(css, struct blkcg, css) : NULL;
 }
 
-/**
- * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
- *
- * DO NOT USE.
- * There is a flaw using this version of the function.  In particular, this was
- * used in a broken paradigm where association was called on the given css.  It
- * is possible though that the returned css from task_css() is in the process
- * of dying due to migration of the current task.  So it is improper to assume
- * *_get() is going to succeed.  Both BFQ and CFQ rely on this logic and will
- * take additional work to handle more gracefully.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
-	if (bio && bio->bi_blkg)
-		return bio->bi_blkg->blkcg;
-	return css_to_blkcg(blkcg_css());
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, NULL if not associated.
- * Callers are expected to either handle NULL or know association has been
- * done prior to calling this.
- */
 static inline struct blkcg *bio_blkcg(struct bio *bio)
 {
-	if (bio && bio->bi_blkg)
-		return bio->bi_blkg->blkcg;
-	return NULL;
+	struct cgroup_subsys_state *css;
+
+	if (bio && bio->bi_css)
+		return css_to_blkcg(bio->bi_css);
+	css = kthread_blkcg();
+	if (css)
+		return css_to_blkcg(css);
+	return css_to_blkcg(task_css(current, io_cgrp_id));
 }
 
 static inline bool blk_cgroup_congested(void)
@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
  */
 static inline void blkg_get(struct blkcg_gq *blkg)
 {
-	percpu_ref_get(&blkg->refcnt);
+	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+	atomic_inc(&blkg->refcnt);
 }
 
 /**
- * blkg_tryget - try and get a blkg reference
+ * blkg_try_get - try and get a blkg reference
  * @blkg: blkg to get
  *
  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
  * of freeing this blkg, so we can only use it if the refcnt is not zero.
  */
-static inline bool blkg_tryget(struct blkcg_gq *blkg)
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
 {
-	return percpu_ref_tryget(&blkg->refcnt);
+	if (atomic_inc_not_zero(&blkg->refcnt))
+		return blkg;
+	return NULL;
 }
 
-/**
- * blkg_tryget_closest - try and get a blkg ref on the closet blkg
- * @blkg: blkg to get
- *
- * This walks up the blkg tree to find the closest non-dying blkg and returns
- * the blkg that it did association with as it may not be the passed in blkg.
- */
-static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
-{
-	while (!percpu_ref_tryget(&blkg->refcnt))
-		blkg = blkg->parent;
 
-	return blkg;
-}
+void __blkg_release_rcu(struct rcu_head *rcu);
 
 /**
  * blkg_put - put a blkg reference
@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
  */
 static inline void blkg_put(struct blkcg_gq *blkg)
 {
-	percpu_ref_put(&blkg->refcnt);
+	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+	if (atomic_dec_and_test(&blkg->refcnt))
+		call_rcu(&blkg->rcu_head, __blkg_release_rcu);
 }
 
 /**
@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
 
 	rcu_read_lock();
 
-	if (bio && bio->bi_blkg) {
-		blkcg = bio->bi_blkg->blkcg;
-		if (blkcg == &blkcg_root)
-			goto rl_use_root;
-
-		blkg_get(bio->bi_blkg);
-		rcu_read_unlock();
-		return &bio->bi_blkg->rl;
-	}
+	blkcg = bio_blkcg(bio);
 
-	blkcg = css_to_blkcg(blkcg_css());
+	/* bypass blkg lookup and use @q->root_rl directly for root */
 	if (blkcg == &blkcg_root)
-		goto rl_use_root;
+		goto root_rl;
 
+	/*
+	 * Try to use blkg->rl.  blkg lookup may fail under memory pressure
+	 * or if either the blkcg or queue is going away.  Fall back to
+	 * root_rl in such cases.
+	 */
 	blkg = blkg_lookup(blkcg, q);
 	if (unlikely(!blkg))
-		blkg = __blkg_lookup_create(blkcg, q);
-
-	if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
-		goto rl_use_root;
+		goto root_rl;
 
+	blkg_get(blkg);
 	rcu_read_unlock();
 	return &blkg->rl;
-
-	/*
-	 * Each blkg has its own request_list, however, the root blkcg
-	 * uses the request_queue's root_rl.  This is to avoid most
-	 * overhead for the root blkcg.
-	 */
-rl_use_root:
+root_rl:
 	rcu_read_unlock();
 	return &q->root_rl;
 }
@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
 				  struct bio *bio) { return false; }
 #endif
 
-
-static inline void blkcg_bio_issue_init(struct bio *bio)
-{
-	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-}
-
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio)
 {
+	struct blkcg *blkcg;
 	struct blkcg_gq *blkg;
 	bool throtl = false;
 
 	rcu_read_lock();
+	blkcg = bio_blkcg(bio);
+
+	/* associate blkcg if bio hasn't attached one */
+	bio_associate_blkcg(bio, &blkcg->css);
 
-	bio_associate_create_blkg(q, bio);
-	blkg = bio->bi_blkg;
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg)) {
+		spin_lock_irq(q->queue_lock);
+		blkg = blkg_lookup_create(blkcg, q);
+		if (IS_ERR(blkg))
+			blkg = NULL;
+		spin_unlock_irq(q->queue_lock);
+	}
 
 	throtl = blk_throtl_bio(q, blkg, bio);
 
 	if (!throtl) {
+		blkg = blkg ?: q->root_blkg;
 		/*
 		 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
 		 * is a split bio and we would have already accounted for the
@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
 		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
-	blkcg_bio_issue_init(bio);
-
 	rcu_read_unlock();
 	return !throtl;
 }
@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
 static inline void blkcg_deactivate_policy(struct request_queue *q,
 					   const struct blkcg_policy *pol) { }
 
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
 
-static inline void blkcg_bio_issue_init(struct bio *bio) { }
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio) { return true; }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 093a818c5b68..1dcf652ba0aa 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,6 +178,7 @@ struct bio {
 	 * release.  Read comment on top of bio_associate_current().
 	 */
 	struct io_context	*bi_ioc;
+	struct cgroup_subsys_state *bi_css;
 	struct blkcg_gq		*bi_blkg;
 	struct bio_issue	bi_issue;
 #endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b8bcbdeb2eac..32c553556bbd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,8 +93,6 @@ extern struct css_set init_css_set;
 
 bool css_has_online_children(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
-					 struct cgroup_subsys *ss);
 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
 					     struct cgroup_subsys *ss);
 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 738a0c24874f..fdfd04e348f6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
  *
  * @bio is a part of the writeback in progress controlled by @wbc.  Perform
  * writeback specific initialization.  This is used to apply the cgroup
- * writeback context.  Must be called after the bio has been associated with
- * a device.
+ * writeback context.
  */
 static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 {
@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 	 * regular writeback instead of writing things out itself.
 	 */
 	if (wbc->wb)
-		bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
+		bio_associate_blkcg(bio, wbc->wb->blkcg_css);
 }
 
 #else	/* CONFIG_CGROUP_WRITEBACK */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4c1cf0969a80..4a3dae2a8283 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -492,7 +492,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
 }
 
 /**
- * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
+ * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
  * @ss: the subsystem of interest (%NULL returns @cgrp->self)
  *
@@ -501,8 +501,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
  * enabled.  If @ss is associated with the hierarchy @cgrp is on, this
  * function is guaranteed to return non-NULL css.
  */
-static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
-							struct cgroup_subsys *ss)
+static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
+						struct cgroup_subsys *ss)
 {
 	lockdep_assert_held(&cgroup_mutex);
 
@@ -522,35 +522,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
 	return cgroup_css(cgrp, ss);
 }
 
-/**
- * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
- * @cgrp: the cgroup of interest
- * @ss: the subsystem of interest
- *
- * Find and get the effective css of @cgrp for @ss.  The effective css is
- * defined as the matching css of the nearest ancestor including self which
- * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
- * the root css is returned, so this function always returns a valid css.
- *
- * The returned css is not guaranteed to be online, and therefore it is the
- * callers responsiblity to tryget a reference for it.
- */
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
-					 struct cgroup_subsys *ss)
-{
-	struct cgroup_subsys_state *css;
-
-	do {
-		css = cgroup_css(cgrp, ss);
-
-		if (css)
-			return css;
-		cgrp = cgroup_parent(cgrp);
-	} while (cgrp);
-
-	return init_css_set.subsys[ss->id];
-}
-
 /**
  * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
  * @cgrp: the cgroup of interest
@@ -633,11 +604,10 @@ EXPORT_SYMBOL_GPL(of_css);
  *
  * Should be called under cgroup_[tree_]mutex.
  */
-#define for_each_e_css(css, ssid, cgrp)					    \
-	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	    \
-		if (!((css) = cgroup_e_css_by_mask(cgrp,		    \
-						   cgroup_subsys[(ssid)]))) \
-			;						    \
+#define for_each_e_css(css, ssid, cgrp)					\
+	for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++)	\
+		if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
+			;						\
 		else
 
 /**
@@ -1036,7 +1006,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
 			 * @ss is in this hierarchy, so we want the
 			 * effective css from @cgrp.
 			 */
-			template[i] = cgroup_e_css_by_mask(cgrp, ss);
+			template[i] = cgroup_e_css(cgrp, ss);
 		} else {
 			/*
 			 * @ss is not in this hierarchy, so we don't want
@@ -3053,7 +3023,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
 		return ret;
 
 	/*
-	 * At this point, cgroup_e_css_by_mask() results reflect the new csses
+	 * At this point, cgroup_e_css() results reflect the new csses
 	 * making the following cgroup_update_dfl_csses() properly update
 	 * css associations of all tasks in the subtree.
 	 */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fac0ddf8a8e2..2868d85f1fb1 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
 	if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
 		return NULL;
 
-	if (!bio->bi_blkg)
+	if (!bio->bi_css)
 		return NULL;
-	return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
+	return cgroup_get_kernfs_id(bio->bi_css->cgroup);
 }
 #else
 static union kernfs_node_id *
diff --git a/mm/page_io.c b/mm/page_io.c
index 573d3663d846..aafd19ec1db4 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 		goto out;
 	}
 	bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
-	bio_associate_blkg_from_page(bio, page);
+	bio_associate_blkcg_from_page(bio, page);
 	count_swpout_vm_event(page);
 	set_page_writeback(page);
 	unlock_page(page);
-- 
cgit v1.2.3


From 6d212db11947ae5464e4717536ed9faf61c01e86 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 15 Oct 2018 10:30:23 +0200
Subject: mm: add mm_pxd_folded checks to pgtable_bytes accounting functions

The common mm code calls mm_dec_nr_pmds() and mm_dec_nr_puds()
in free_pgtables() if the address range spans a full pud or pmd.
If mm_dec_nr_puds/mm_dec_nr_pmds are non-empty due to configuration
settings they blindly subtract the size of the pmd or pud table from
pgtable_bytes even if the pud or pmd page table layer is folded.

Add explicit mm_[pmd|pud]_folded checks to the four pgtable_bytes
accounting functions mm_inc_nr_puds, mm_inc_nr_pmds, mm_dec_nr_puds
and mm_dec_nr_pmds. As the check for folded page tables can be
overwritten by the architecture, this allows to keep a correct
pgtable_bytes value for platforms that use a dynamic number of
page table levels.

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/mm.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index daa2b8f1e9a8..a3701e91bb57 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1742,11 +1742,15 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address);
 
 static inline void mm_inc_nr_puds(struct mm_struct *mm)
 {
+	if (mm_pud_folded(mm))
+		return;
 	atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_puds(struct mm_struct *mm)
 {
+	if (mm_pud_folded(mm))
+		return;
 	atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes);
 }
 #endif
@@ -1766,11 +1770,15 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
 
 static inline void mm_inc_nr_pmds(struct mm_struct *mm)
 {
+	if (mm_pmd_folded(mm))
+		return;
 	atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 
 static inline void mm_dec_nr_pmds(struct mm_struct *mm)
 {
+	if (mm_pmd_folded(mm))
+		return;
 	atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes);
 }
 #endif
-- 
cgit v1.2.3


From a7ad38b0dd3c1ba8d6e5a55241e875e9db8331ab Mon Sep 17 00:00:00 2001
From: Guo Ren <ren_guo@c-sky.com>
Date: Sat, 3 Nov 2018 00:51:28 +0800
Subject: clocksource/drivers/c-sky: Add C-SKY SMP timer

The driver is for C-SKY SMP timer. It only supports oneshot event
and 32bit overflow for clocksource. Per cpu core has one timer and
all timers share one clock-counter-input from the same clocksource.

This use mfcr&mtcr instructions to access the regs.

Signed-off-by: Guo Ren <ren_guo@c-sky.com>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/clocksource/Kconfig         |  10 +++
 drivers/clocksource/Makefile        |   1 +
 drivers/clocksource/timer-mp-csky.c | 173 ++++++++++++++++++++++++++++++++++++
 include/linux/cpuhotplug.h          |   1 +
 4 files changed, 185 insertions(+)
 create mode 100644 drivers/clocksource/timer-mp-csky.c

(limited to 'include/linux')

diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
index a11f4ba98b05..591c9a8649a5 100644
--- a/drivers/clocksource/Kconfig
+++ b/drivers/clocksource/Kconfig
@@ -620,4 +620,14 @@ config RISCV_TIMER
 	  is accessed via both the SBI and the rdcycle instruction.  This is
 	  required for all RISC-V systems.
 
+config CSKY_MP_TIMER
+	bool "SMP Timer for the C-SKY platform" if COMPILE_TEST
+	depends on CSKY
+	select TIMER_OF
+	help
+	  Say yes here to enable C-SKY SMP timer driver used for C-SKY SMP
+	  system.
+	  csky,mptimer is not only used in SMP system, it also could be used
+	  single core system. It's not a mmio reg and it use mtcr/mfcr instruction.
+
 endmenu
diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile
index db51b2427e8a..5ce82d39cda7 100644
--- a/drivers/clocksource/Makefile
+++ b/drivers/clocksource/Makefile
@@ -79,3 +79,4 @@ obj-$(CONFIG_CLKSRC_ST_LPC)		+= clksrc_st_lpc.o
 obj-$(CONFIG_X86_NUMACHIP)		+= numachip.o
 obj-$(CONFIG_ATCPIT100_TIMER)		+= timer-atcpit100.o
 obj-$(CONFIG_RISCV_TIMER)		+= riscv_timer.o
+obj-$(CONFIG_CSKY_MP_TIMER)		+= timer-mp-csky.o
diff --git a/drivers/clocksource/timer-mp-csky.c b/drivers/clocksource/timer-mp-csky.c
new file mode 100644
index 000000000000..a8acc431a774
--- /dev/null
+++ b/drivers/clocksource/timer-mp-csky.c
@@ -0,0 +1,173 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/sched_clock.h>
+#include <linux/cpu.h>
+#include <linux/of_irq.h>
+#include <asm/reg_ops.h>
+
+#include "timer-of.h"
+
+#define PTIM_CCVR	"cr<3, 14>"
+#define PTIM_CTLR	"cr<0, 14>"
+#define PTIM_LVR	"cr<6, 14>"
+#define PTIM_TSR	"cr<1, 14>"
+
+static int csky_mptimer_irq;
+
+static int csky_mptimer_set_next_event(unsigned long delta,
+				       struct clock_event_device *ce)
+{
+	mtcr(PTIM_LVR, delta);
+
+	return 0;
+}
+
+static int csky_mptimer_shutdown(struct clock_event_device *ce)
+{
+	mtcr(PTIM_CTLR, 0);
+
+	return 0;
+}
+
+static int csky_mptimer_oneshot(struct clock_event_device *ce)
+{
+	mtcr(PTIM_CTLR, 1);
+
+	return 0;
+}
+
+static int csky_mptimer_oneshot_stopped(struct clock_event_device *ce)
+{
+	mtcr(PTIM_CTLR, 0);
+
+	return 0;
+}
+
+static DEFINE_PER_CPU(struct timer_of, csky_to) = {
+	.flags					= TIMER_OF_CLOCK,
+	.clkevt = {
+		.rating				= 300,
+		.features			= CLOCK_EVT_FEAT_PERCPU |
+						  CLOCK_EVT_FEAT_ONESHOT,
+		.set_state_shutdown		= csky_mptimer_shutdown,
+		.set_state_oneshot		= csky_mptimer_oneshot,
+		.set_state_oneshot_stopped	= csky_mptimer_oneshot_stopped,
+		.set_next_event			= csky_mptimer_set_next_event,
+	},
+};
+
+static irqreturn_t csky_timer_interrupt(int irq, void *dev)
+{
+	struct timer_of *to = this_cpu_ptr(&csky_to);
+
+	mtcr(PTIM_TSR, 0);
+
+	to->clkevt.event_handler(&to->clkevt);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * clock event for percpu
+ */
+static int csky_mptimer_starting_cpu(unsigned int cpu)
+{
+	struct timer_of *to = per_cpu_ptr(&csky_to, cpu);
+
+	to->clkevt.cpumask = cpumask_of(cpu);
+
+	clockevents_config_and_register(&to->clkevt, timer_of_rate(to),
+					2, ULONG_MAX);
+
+	enable_percpu_irq(csky_mptimer_irq, 0);
+
+	return 0;
+}
+
+static int csky_mptimer_dying_cpu(unsigned int cpu)
+{
+	disable_percpu_irq(csky_mptimer_irq);
+
+	return 0;
+}
+
+/*
+ * clock source
+ */
+static u64 sched_clock_read(void)
+{
+	return (u64)mfcr(PTIM_CCVR);
+}
+
+static u64 clksrc_read(struct clocksource *c)
+{
+	return (u64)mfcr(PTIM_CCVR);
+}
+
+struct clocksource csky_clocksource = {
+	.name	= "csky",
+	.rating	= 400,
+	.mask	= CLOCKSOURCE_MASK(32),
+	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
+	.read	= clksrc_read,
+};
+
+static int __init csky_mptimer_init(struct device_node *np)
+{
+	int ret, cpu, cpu_rollback;
+	struct timer_of *to = NULL;
+
+	/*
+	 * Csky_mptimer is designed for C-SKY SMP multi-processors and
+	 * every core has it's own private irq and regs for clkevt and
+	 * clksrc.
+	 *
+	 * The regs is accessed by cpu instruction: mfcr/mtcr instead of
+	 * mmio map style. So we needn't mmio-address in dts, but we still
+	 * need to give clk and irq number.
+	 *
+	 * We use private irq for the mptimer and irq number is the same
+	 * for every core. So we use request_percpu_irq() in timer_of_init.
+	 */
+	csky_mptimer_irq = irq_of_parse_and_map(np, 0);
+	if (csky_mptimer_irq <= 0)
+		return -EINVAL;
+
+	ret = request_percpu_irq(csky_mptimer_irq, csky_timer_interrupt,
+				 "csky_mp_timer", &csky_to);
+	if (ret)
+		return -EINVAL;
+
+	for_each_possible_cpu(cpu) {
+		to = per_cpu_ptr(&csky_to, cpu);
+		ret = timer_of_init(np, to);
+		if (ret)
+			goto rollback;
+	}
+
+	clocksource_register_hz(&csky_clocksource, timer_of_rate(to));
+	sched_clock_register(sched_clock_read, 32, timer_of_rate(to));
+
+	ret = cpuhp_setup_state(CPUHP_AP_CSKY_TIMER_STARTING,
+				"clockevents/csky/timer:starting",
+				csky_mptimer_starting_cpu,
+				csky_mptimer_dying_cpu);
+	if (ret)
+		return -EINVAL;
+
+	return 0;
+
+rollback:
+	for_each_possible_cpu(cpu_rollback) {
+		if (cpu_rollback == cpu)
+			break;
+
+		to = per_cpu_ptr(&csky_to, cpu_rollback);
+		timer_of_cleanup(to);
+	}
+	return -EINVAL;
+}
+TIMER_OF_DECLARE(csky_mptimer, "csky,mptimer", csky_mptimer_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index caf40ad0bbc6..e0cd2baa8380 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -126,6 +126,7 @@ enum cpuhp_state {
 	CPUHP_AP_MIPS_GIC_TIMER_STARTING,
 	CPUHP_AP_ARC_TIMER_STARTING,
 	CPUHP_AP_RISCV_TIMER_STARTING,
+	CPUHP_AP_CSKY_TIMER_STARTING,
 	CPUHP_AP_KVM_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING,
 	CPUHP_AP_KVM_ARM_VGIC_STARTING,
-- 
cgit v1.2.3


From 17b8b74c0f8dbf9b9e3301f9ca5b65dd1c079951 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 19 Oct 2018 19:35:19 +0200
Subject: netfilter: ipset: Correct rcu_dereference() call in
 ip_set_put_comment()

The function is called when rcu_read_lock() is held and not
when rcu_read_lock_bh() is held.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set_comment.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_comment.h b/include/linux/netfilter/ipset/ip_set_comment.h
index 8e2bab1e8e90..70877f8de7e9 100644
--- a/include/linux/netfilter/ipset/ip_set_comment.h
+++ b/include/linux/netfilter/ipset/ip_set_comment.h
@@ -43,11 +43,11 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
 	rcu_assign_pointer(comment->c, c);
 }
 
-/* Used only when dumping a set, protected by rcu_read_lock_bh() */
+/* Used only when dumping a set, protected by rcu_read_lock() */
 static inline int
 ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
 {
-	struct ip_set_comment_rcu *c = rcu_dereference_bh(comment->c);
+	struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
 
 	if (!c)
 		return 0;
-- 
cgit v1.2.3


From 94e297c50b529f5d01cfd1dbc808d61e95180ab7 Mon Sep 17 00:00:00 2001
From: Sam Protsenko <semen.protsenko@linaro.org>
Date: Fri, 2 Nov 2018 15:47:53 -0700
Subject: include/linux/notifier.h: SRCU: fix ctags

ctags indexing ("make tags" command) throws this warning:

    ctags: Warning: include/linux/notifier.h:125:
    null expansion of name pattern "\1"

This is the result of DEFINE_PER_CPU() macro expansion.  Fix that by
getting rid of line break.

Similar fix was already done in commit 25528213fe9f ("tags: Fix
DEFINE_PER_CPU expansions"), but this one probably wasn't noticed.

Link: http://lkml.kernel.org/r/20181030202808.28027-1-semen.protsenko@linaro.org
Fixes: 9c80172b902d ("kernel/SRCU: provide a static initializer")
Signed-off-by: Sam Protsenko <semen.protsenko@linaro.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/notifier.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index f35c7bf76143..0096a05395e3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -122,8 +122,7 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
 
 #ifdef CONFIG_TREE_SRCU
 #define _SRCU_NOTIFIER_HEAD(name, mod)				\
-	static DEFINE_PER_CPU(struct srcu_data,			\
-			name##_head_srcu_data);			\
+	static DEFINE_PER_CPU(struct srcu_data, name##_head_srcu_data); \
 	mod struct srcu_notifier_head name =			\
 			SRCU_NOTIFIER_INIT(name, name##_head_srcu_data)
 
-- 
cgit v1.2.3


From 89c83fb539f95491be80cdd5158e6f0ce329e317 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Fri, 2 Nov 2018 15:48:31 -0700
Subject: mm, thp: consolidate THP gfp handling into
 alloc_hugepage_direct_gfpmask

THP allocation mode is quite complex and it depends on the defrag mode.
This complexity is hidden in alloc_hugepage_direct_gfpmask from a large
part currently. The NUMA special casing (namely __GFP_THISNODE) is
however independent and placed in alloc_pages_vma currently. This both
adds an unnecessary branch to all vma based page allocation requests and
it makes the code more complex unnecessarily as well. Not to mention
that e.g. shmem THP used to do the node reclaiming unconditionally
regardless of the defrag mode until recently. This was not only
unexpected behavior but it was also hardly a good default behavior and I
strongly suspect it was just a side effect of the code sharing more than
a deliberate decision which suggests that such a layering is wrong.

Get rid of the thp special casing from alloc_pages_vma and move the
logic to alloc_hugepage_direct_gfpmask. __GFP_THISNODE is applied to the
resulting gfp mask only when the direct reclaim is not requested and
when there is no explicit numa binding to preserve the current logic.

Please note that there's also a slight difference wrt MPOL_BIND now. The
previous code would avoid using __GFP_THISNODE if the local node was
outside of policy_nodemask(). After this patch __GFP_THISNODE is avoided
for all MPOL_BIND policies. So there's a difference that if local node
is actually allowed by the bind policy's nodemask, previously
__GFP_THISNODE would be added, but now it won't be. From the behavior
POV this is still correct because the policy nodemask is used.

Link: http://lkml.kernel.org/r/20180925120326.24392-3-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Stefan Priebe - Profihost AG <s.priebe@profihost.ag>
Cc: Zi Yan <zi.yan@cs.rutgers.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h       | 12 +++------
 include/linux/mempolicy.h |  2 ++
 mm/huge_memory.c          | 38 +++++++++++++++++++++-------
 mm/mempolicy.c            | 63 +++--------------------------------------------
 mm/shmem.c                |  2 +-
 5 files changed, 40 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 24bcc5eec6b4..76f8db0b0e71 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -510,22 +510,18 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
 }
 extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
 			struct vm_area_struct *vma, unsigned long addr,
-			int node, bool hugepage);
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
-	alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
+			int node);
 #else
 #define alloc_pages(gfp_mask, order) \
 		alloc_pages_node(numa_node_id(), gfp_mask, order)
-#define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
-	alloc_pages(gfp_mask, order)
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order)	\
+#define alloc_pages_vma(gfp_mask, order, vma, addr, node)\
 	alloc_pages(gfp_mask, order)
 #endif
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 #define alloc_page_vma(gfp_mask, vma, addr)			\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
 #define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
-	alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
+	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
 
 extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
 extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5228c62af416..bac395f1d00a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -139,6 +139,8 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4e4ef8fa479d..55478ab3c83b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -629,21 +629,40 @@ release:
  *	    available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma, unsigned long addr)
 {
 	const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+	gfp_t this_node = 0;
+
+#ifdef CONFIG_NUMA
+	struct mempolicy *pol;
+	/*
+	 * __GFP_THISNODE is used only when __GFP_DIRECT_RECLAIM is not
+	 * specified, to express a general desire to stay on the current
+	 * node for optimistic allocation attempts. If the defrag mode
+	 * and/or madvise hint requires the direct reclaim then we prefer
+	 * to fallback to other node rather than node reclaim because that
+	 * can lead to excessive reclaim even though there is free memory
+	 * on other nodes. We expect that NUMA preferences are specified
+	 * by memory policies.
+	 */
+	pol = get_vma_policy(vma, addr);
+	if (pol->mode != MPOL_BIND)
+		this_node = __GFP_THISNODE;
+	mpol_cond_put(pol);
+#endif
 
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
 		return GFP_TRANSHUGE | (vma_madvised ? 0 : __GFP_NORETRY);
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
-		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM;
+		return GFP_TRANSHUGE_LIGHT | __GFP_KSWAPD_RECLAIM | this_node;
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags))
 		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-							     __GFP_KSWAPD_RECLAIM);
+							     __GFP_KSWAPD_RECLAIM | this_node);
 	if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
 		return GFP_TRANSHUGE_LIGHT | (vma_madvised ? __GFP_DIRECT_RECLAIM :
-							     0);
-	return GFP_TRANSHUGE_LIGHT;
+							     this_node);
+	return GFP_TRANSHUGE_LIGHT | this_node;
 }
 
 /* Caller must hold page table lock. */
@@ -715,8 +734,8 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
 			pte_free(vma->vm_mm, pgtable);
 		return ret;
 	}
-	gfp = alloc_hugepage_direct_gfpmask(vma);
-	page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
+	gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+	page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, vma, haddr, numa_node_id());
 	if (unlikely(!page)) {
 		count_vm_event(THP_FAULT_FALLBACK);
 		return VM_FAULT_FALLBACK;
@@ -1286,8 +1305,9 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd)
 alloc:
 	if (transparent_hugepage_enabled(vma) &&
 	    !transparent_hugepage_debug_cow()) {
-		huge_gfp = alloc_hugepage_direct_gfpmask(vma);
-		new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
+		huge_gfp = alloc_hugepage_direct_gfpmask(vma, haddr);
+		new_page = alloc_pages_vma(huge_gfp, HPAGE_PMD_ORDER, vma,
+				haddr, numa_node_id());
 	} else
 		new_page = NULL;
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 58fb833fce0c..5837a067124d 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1116,8 +1116,8 @@ static struct page *new_page(struct page *page, unsigned long start)
 	} else if (PageTransHuge(page)) {
 		struct page *thp;
 
-		thp = alloc_hugepage_vma(GFP_TRANSHUGE, vma, address,
-					 HPAGE_PMD_ORDER);
+		thp = alloc_pages_vma(GFP_TRANSHUGE, HPAGE_PMD_ORDER, vma,
+				address, numa_node_id());
 		if (!thp)
 			return NULL;
 		prep_transhuge_page(thp);
@@ -1662,7 +1662,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
 						unsigned long addr)
 {
 	struct mempolicy *pol = __get_vma_policy(vma, addr);
@@ -2011,7 +2011,6 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  * 	@vma:  Pointer to VMA or NULL if not available.
  *	@addr: Virtual Address of the allocation. Must be inside the VMA.
  *	@node: Which node to prefer for allocation (modulo policy).
- *	@hugepage: for hugepages try only the preferred node if possible
  *
  * 	This function allocates a page from the kernel page pool and applies
  *	a NUMA policy associated with the VMA or the current process.
@@ -2022,7 +2021,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
  */
 struct page *
 alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-		unsigned long addr, int node, bool hugepage)
+		unsigned long addr, int node)
 {
 	struct mempolicy *pol;
 	struct page *page;
@@ -2040,60 +2039,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 		goto out;
 	}
 
-	if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
-		int hpage_node = node;
-
-		/*
-		 * For hugepage allocation and non-interleave policy which
-		 * allows the current node (or other explicitly preferred
-		 * node) we only try to allocate from the current/preferred
-		 * node and don't fall back to other nodes, as the cost of
-		 * remote accesses would likely offset THP benefits.
-		 *
-		 * If the policy is interleave, or does not allow the current
-		 * node in its nodemask, we allocate the standard way.
-		 */
-		if (pol->mode == MPOL_PREFERRED &&
-						!(pol->flags & MPOL_F_LOCAL))
-			hpage_node = pol->v.preferred_node;
-
-		nmask = policy_nodemask(gfp, pol);
-		if (!nmask || node_isset(hpage_node, *nmask)) {
-			mpol_cond_put(pol);
-			/*
-			 * We cannot invoke reclaim if __GFP_THISNODE
-			 * is set. Invoking reclaim with
-			 * __GFP_THISNODE set, would cause THP
-			 * allocations to trigger heavy swapping
-			 * despite there may be tons of free memory
-			 * (including potentially plenty of THP
-			 * already available in the buddy) on all the
-			 * other NUMA nodes.
-			 *
-			 * At most we could invoke compaction when
-			 * __GFP_THISNODE is set (but we would need to
-			 * refrain from invoking reclaim even if
-			 * compaction returned COMPACT_SKIPPED because
-			 * there wasn't not enough memory to succeed
-			 * compaction). For now just avoid
-			 * __GFP_THISNODE instead of limiting the
-			 * allocation path to a strict and single
-			 * compaction invocation.
-			 *
-			 * Supposedly if direct reclaim was enabled by
-			 * the caller, the app prefers THP regardless
-			 * of the node it comes from so this would be
-			 * more desiderable behavior than only
-			 * providing THP originated from the local
-			 * node in such case.
-			 */
-			if (!(gfp & __GFP_DIRECT_RECLAIM))
-				gfp |= __GFP_THISNODE;
-			page = __alloc_pages_node(hpage_node, gfp, order);
-			goto out;
-		}
-	}
-
 	nmask = policy_nodemask(gfp, pol);
 	preferred_nid = policy_node(gfp, pol, node);
 	page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
diff --git a/mm/shmem.c b/mm/shmem.c
index 56bf122e0bb4..ea26d7a0342d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1435,7 +1435,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
 
 	shmem_pseudo_vma_init(&pvma, info, hindex);
 	page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+			HPAGE_PMD_ORDER, &pvma, 0, numa_node_id());
 	shmem_pseudo_vma_destroy(&pvma);
 	if (page)
 		prep_transhuge_page(page);
-- 
cgit v1.2.3


From 3e59020abf0f88182730527ee5b862e786eb485a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 31 Oct 2018 08:39:12 -0700
Subject: net: bql: add __netdev_tx_sent_queue()

When qdisc_run() tries to use BQL budget to bulk-dequeue a batch
of packets, GSO can later transform this list in another list
of skbs, and each skb is sent to device ndo_start_xmit(),
one at a time, with skb->xmit_more being set to one but
for last skb.

Problem is that very often, BQL limit is hit in the middle of
the packet train, forcing dev_hard_start_xmit() to stop the
bulk send and requeue the end of the list.

BQL role is to avoid head of line blocking, making sure
a qdisc can deliver high priority packets before low priority ones.

But there is no way requeued packets can be bypassed by fresh
packets in the qdisc.

Aborting the bulk send increases TX softirqs, and hot cache
lines (after skb_segment()) are wasted.

Note that for TSO packets, we never split a packet in the middle
because of BQL limit being hit.

Drivers should be able to update BQL counters without
flipping/caring about BQL status, if the current skb
has xmit_more set.

Upper layers are ultimately responsible to stop sending another
packet train when BQL limit is hit.

Code template in a driver might look like the following :

	send_doorbell = __netdev_tx_sent_queue(tx_queue, nr_bytes, skb->xmit_more);

Note that __netdev_tx_sent_queue() use is not mandatory,
since following patch will change dev_hard_start_xmit()
to not care about BQL status.

But it is highly recommended so that xmit_more full benefits
can be reached (less doorbells sent, and less atomic operations as well)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dc1d9ed33b31..857f8abf7b91 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3190,6 +3190,26 @@ static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 #endif
 }
 
+/* Variant of netdev_tx_sent_queue() for drivers that are aware
+ * that they should not test BQL status themselves.
+ * We do want to change __QUEUE_STATE_STACK_XOFF only for the last
+ * skb of a batch.
+ * Returns true if the doorbell must be used to kick the NIC.
+ */
+static inline bool __netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+					  unsigned int bytes,
+					  bool xmit_more)
+{
+	if (xmit_more) {
+#ifdef CONFIG_BQL
+		dql_queued(&dev_queue->dql, bytes);
+#endif
+		return netif_tx_queue_stopped(dev_queue);
+	}
+	netdev_tx_sent_queue(dev_queue, bytes);
+	return true;
+}
+
 /**
  * 	netdev_sent_queue - report the number of bytes queued to hardware
  * 	@dev: network device
-- 
cgit v1.2.3


From 35b69a420bfb56b7b74cb635ea903db05e357bec Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Sun, 4 Nov 2018 03:48:54 +0000
Subject: clockevents/drivers/i8253: Add support for PIT shutdown quirk

Add support for platforms where pit_shutdown() doesn't work because of a
quirk in the PIT emulation. On these platforms setting the counter register
to zero causes the PIT to start running again, negating the shutdown.

Provide a global variable that controls whether the counter register is
zero'ed, which platform specific code can override.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "gregkh@linuxfoundation.org" <gregkh@linuxfoundation.org>
Cc: "devel@linuxdriverproject.org" <devel@linuxdriverproject.org>
Cc: "daniel.lezcano@linaro.org" <daniel.lezcano@linaro.org>
Cc: "virtualization@lists.linux-foundation.org" <virtualization@lists.linux-foundation.org>
Cc: "jgross@suse.com" <jgross@suse.com>
Cc: "akataria@vmware.com" <akataria@vmware.com>
Cc: "olaf@aepfle.de" <olaf@aepfle.de>
Cc: "apw@canonical.com" <apw@canonical.com>
Cc: vkuznets <vkuznets@redhat.com>
Cc: "jasowang@redhat.com" <jasowang@redhat.com>
Cc: "marcelo.cerri@canonical.com" <marcelo.cerri@canonical.com>
Cc: KY Srinivasan <kys@microsoft.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1541303219-11142-2-git-send-email-mikelley@microsoft.com
---
 drivers/clocksource/i8253.c | 14 ++++++++++++--
 include/linux/i8253.h       |  1 +
 2 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/i8253.c b/drivers/clocksource/i8253.c
index 9c38895542f4..d4350bb10b83 100644
--- a/drivers/clocksource/i8253.c
+++ b/drivers/clocksource/i8253.c
@@ -20,6 +20,13 @@
 DEFINE_RAW_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
+/*
+ * Handle PIT quirk in pit_shutdown() where zeroing the counter register
+ * restarts the PIT, negating the shutdown. On platforms with the quirk,
+ * platform specific code can set this to false.
+ */
+bool i8253_clear_counter_on_shutdown __ro_after_init = true;
+
 #ifdef CONFIG_CLKSRC_I8253
 /*
  * Since the PIT overflows every tick, its not very useful
@@ -109,8 +116,11 @@ static int pit_shutdown(struct clock_event_device *evt)
 	raw_spin_lock(&i8253_lock);
 
 	outb_p(0x30, PIT_MODE);
-	outb_p(0, PIT_CH0);
-	outb_p(0, PIT_CH0);
+
+	if (i8253_clear_counter_on_shutdown) {
+		outb_p(0, PIT_CH0);
+		outb_p(0, PIT_CH0);
+	}
 
 	raw_spin_unlock(&i8253_lock);
 	return 0;
diff --git a/include/linux/i8253.h b/include/linux/i8253.h
index e6bb36a97519..8336b2f6f834 100644
--- a/include/linux/i8253.h
+++ b/include/linux/i8253.h
@@ -21,6 +21,7 @@
 #define PIT_LATCH	((PIT_TICK_RATE + HZ/2) / HZ)
 
 extern raw_spinlock_t i8253_lock;
+extern bool i8253_clear_counter_on_shutdown;
 extern struct clock_event_device i8253_clockevent;
 extern void clockevent_i8253_init(bool oneshot);
 
-- 
cgit v1.2.3


From d098093ba06eb032057d1aca1c2e45889e099d00 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Sun, 28 Oct 2018 12:29:55 +0100
Subject: mtd: nand: Fix nanddev_neraseblocks()

nanddev_neraseblocks() currently returns the number pages per LUN
instead of the total number of eraseblocks.

Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices")
Cc: <stable@vger.kernel.org>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index abe975c87b90..78b86dea2f29 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -324,9 +324,8 @@ static inline unsigned int nanddev_ntargets(const struct nand_device *nand)
  */
 static inline unsigned int nanddev_neraseblocks(const struct nand_device *nand)
 {
-	return (u64)nand->memorg.luns_per_target *
-	       nand->memorg.eraseblocks_per_lun *
-	       nand->memorg.pages_per_eraseblock;
+	return nand->memorg.ntargets * nand->memorg.luns_per_target *
+	       nand->memorg.eraseblocks_per_lun;
 }
 
 /**
-- 
cgit v1.2.3


From 163c8d54a997153ee1a1e07fcac087492ad85b37 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 5 Nov 2018 07:36:28 +0100
Subject: compiler: remove __no_sanitize_address_or_inline again

The __no_sanitize_address_or_inline and __no_kasan_or_inline defines
are almost identical. The only difference is that __no_kasan_or_inline
does not have the 'notrace' attribute.

To be able to replace __no_sanitize_address_or_inline with the older
definition, add 'notrace' to __no_kasan_or_inline and change to two
users of __no_sanitize_address_or_inline in the s390 code.

The 'notrace' option is necessary for e.g. the __load_psw_mask function
in arch/s390/include/asm/processor.h. Without the option it is possible
to trace __load_psw_mask which leads to kernel stack overflow.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Pointed-out-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/processor.h |  4 ++--
 include/linux/compiler-gcc.h      | 12 ------------
 include/linux/compiler.h          |  2 +-
 3 files changed, 3 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 302795c47c06..81038ab357ce 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -236,7 +236,7 @@ static inline unsigned long current_stack_pointer(void)
 	return sp;
 }
 
-static __no_sanitize_address_or_inline unsigned short stap(void)
+static __no_kasan_or_inline unsigned short stap(void)
 {
 	unsigned short cpu_address;
 
@@ -330,7 +330,7 @@ static inline void __load_psw(psw_t psw)
  * Set PSW mask to specified value, while leaving the
  * PSW addr pointing to the next instruction.
  */
-static __no_sanitize_address_or_inline void __load_psw_mask(unsigned long mask)
+static __no_kasan_or_inline void __load_psw_mask(unsigned long mask)
 {
 	unsigned long addr;
 	psw_t psw;
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index c0f5db3a9621..2010493e1040 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -143,18 +143,6 @@
 #define KASAN_ABI_VERSION 3
 #endif
 
-/*
- * Because __no_sanitize_address conflicts with inlining:
- *   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
- * we do one or the other. 
- */
-#ifdef CONFIG_KASAN
-#define __no_sanitize_address_or_inline					\
-	__no_sanitize_address __maybe_unused notrace
-#else
-#define __no_sanitize_address_or_inline inline
-#endif
-
 #if GCC_VERSION >= 50100
 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
 #endif
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 18c80cfa4fc4..06396c1cf127 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -189,7 +189,7 @@ void __read_once_size(const volatile void *p, void *res, int size)
  * 	https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368
  * '__maybe_unused' allows us to avoid defined-but-not-used warnings.
  */
-# define __no_kasan_or_inline __no_sanitize_address __maybe_unused
+# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused
 #else
 # define __no_kasan_or_inline __always_inline
 #endif
-- 
cgit v1.2.3


From aa9b760cec2385ad408bb2e346c7f6dc1be69a79 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 4 Nov 2018 11:32:47 +0100
Subject: HID: fix up .raw_event() documentation

The documentation for the .raw_event() callback says that if the
driver return 1, there will be no further processing of the event,
but this is not true, the actual code in hid-core.c looks like this:

  if (hdrv && hdrv->raw_event && hid_match_report(hid, report)) {
           ret = hdrv->raw_event(hid, report, data, size);
           if (ret < 0)
                   goto unlock;
   }

   ret = hid_report_raw_event(hid, type, data, size, interrupt);

The only return value that has any effect on the processing is
a negative error.

Correct this as it seems to confuse people: I found bogus code in
the Razer out-of-tree driver attempting to return 1 here.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index 2827b87590d8..387c70df6f29 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -722,8 +722,8 @@ struct hid_usage_id {
  * input will not be passed to raw_event unless hid_device_io_start is
  * called.
  *
- * raw_event and event should return 0 on no action performed, 1 when no
- * further processing should be done and negative on error
+ * raw_event and event should return negative on error, any other value will
+ * pass the event on to .event() typically return 0 for success.
  *
  * input_mapping shall return a negative value to completely ignore this usage
  * (e.g. doubled or invalid usage), zero to continue with parsing of this
-- 
cgit v1.2.3


From 8bd66d147c88bd441178c7b4c774ae5a185f19b8 Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Wed, 31 Oct 2018 12:39:01 -0700
Subject: include/linux/compiler*.h: define asm_volatile_goto

asm_volatile_goto should also be defined for other compilers that support
asm goto.

Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h
mutually exclusive").

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_types.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 3439d7d0249a..4a3f9c09c92d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -130,6 +130,10 @@ struct ftrace_likely_data {
 # define randomized_struct_fields_end
 #endif
 
+#ifndef asm_volatile_goto
+#define asm_volatile_goto(x...) asm goto(x)
+#endif
+
 /* Are two types/vars the same type (ignoring qualifiers)? */
 #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
 
-- 
cgit v1.2.3


From 98ee3fc7ef8395f8b7a379e6608aee91efc66d48 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@bootlin.com>
Date: Tue, 6 Nov 2018 17:25:37 +0100
Subject: mtd: nand: Fix nanddev_pos_next_page() kernel-doc header

Function name is wrong in the kernel-doc header.

Fixes: 9c3736a3de21 ("mtd: nand: Add core infrastructure to deal with NAND devices")
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 include/linux/mtd/nand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 78b86dea2f29..7f53ece2c039 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -568,7 +568,7 @@ static inline void nanddev_pos_next_eraseblock(struct nand_device *nand,
 }
 
 /**
- * nanddev_pos_next_eraseblock() - Move a position to the next page
+ * nanddev_pos_next_page() - Move a position to the next page
  * @nand: NAND device
  * @pos: the position to update
  *
-- 
cgit v1.2.3


From 81bd415c91eb966118d773dddf254aebf3022411 Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 6 Jun 2018 21:42:32 +0200
Subject: watchdog/core: Add missing prototypes for weak functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The split out of the hard lockup detector exposed two new weak functions,
but no prototypes for them, which triggers the build warning:

  kernel/watchdog.c:109:12: warning: no previous prototype for ‘watchdog_nmi_enable’ [-Wmissing-prototypes]
  kernel/watchdog.c:115:13: warning: no previous prototype for ‘watchdog_nmi_disable’ [-Wmissing-prototypes]

Add the prototypes.

Fixes: 73ce0511c436 ("kernel/watchdog.c: move hardlockup detector to separate file")
Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Babu Moger <babu.moger@oracle.com>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180606194232.17653-1-malat@debian.org
---
 include/linux/nmi.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 08f9247e9827..9003e29cde46 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -119,6 +119,8 @@ static inline int hardlockup_detector_perf_init(void) { return 0; }
 void watchdog_nmi_stop(void);
 void watchdog_nmi_start(void);
 int watchdog_nmi_probe(void);
+int watchdog_nmi_enable(unsigned int cpu);
+void watchdog_nmi_disable(unsigned int cpu);
 
 /**
  * touch_nmi_watchdog - restart NMI watchdog timeout.
-- 
cgit v1.2.3


From 24efee412c75843755da0ddd7bbc2db2ce9129f5 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Date: Tue, 6 Nov 2018 22:52:11 +0100
Subject: Compiler Attributes: improve explanation of header

Explain better what "optional" attributes are, and avoid calling
them so to avoid confusion. Simply retain "Optional" as a word
to look for in the comments.

Moreover, add a couple sentences to explain a bit more the intention
and the documentation links.

Signed-off-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
---
 include/linux/compiler_attributes.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h
index 6b28c1b7310c..f8c400ba1929 100644
--- a/include/linux/compiler_attributes.h
+++ b/include/linux/compiler_attributes.h
@@ -4,22 +4,26 @@
 
 /*
  * The attributes in this file are unconditionally defined and they directly
- * map to compiler attribute(s) -- except those that are optional.
+ * map to compiler attribute(s), unless one of the compilers does not support
+ * the attribute. In that case, __has_attribute is used to check for support
+ * and the reason is stated in its comment ("Optional: ...").
  *
  * Any other "attributes" (i.e. those that depend on a configuration option,
  * on a compiler, on an architecture, on plugins, on other attributes...)
  * should be defined elsewhere (e.g. compiler_types.h or compiler-*.h).
+ * The intention is to keep this file as simple as possible, as well as
+ * compiler- and version-agnostic (e.g. avoiding GCC_VERSION checks).
  *
  * This file is meant to be sorted (by actual attribute name,
  * not by #define identifier). Use the __attribute__((__name__)) syntax
  * (i.e. with underscores) to avoid future collisions with other macros.
- * If an attribute is optional, state the reason in the comment.
+ * Provide links to the documentation of each supported compiler, if it exists.
  */
 
 /*
- * To check for optional attributes, we use __has_attribute, which is supported
- * on gcc >= 5, clang >= 2.9 and icc >= 17. In the meantime, to support
- * 4.6 <= gcc < 5, we implement __has_attribute by hand.
+ * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
+ * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute
+ * by hand.
  *
  * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__
  * depending on the compiler used to build it; however, these attributes have
-- 
cgit v1.2.3


From 23c625ce3065e40c933a4239efb9b11f1194a343 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Thu, 8 Nov 2018 14:55:21 +0100
Subject: libceph: assume argonaut on the server side

No one is running pre-argonaut.  In addition one of the argonaut
features (NOSRCADDR) has been required since day one (and a half,
2.6.34 vs 2.6.35) of the kernel client.

Allow for the possibility of reusing these feature bits later.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/mds_client.c               | 12 +++---------
 include/linux/ceph/ceph_features.h |  8 +-------
 2 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 67a9aeb2f4ec..bd13a3267ae0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -80,12 +80,8 @@ static int parse_reply_info_in(void **p, void *end,
 	info->symlink = *p;
 	*p += info->symlink_len;
 
-	if (features & CEPH_FEATURE_DIRLAYOUTHASH)
-		ceph_decode_copy_safe(p, end, &info->dir_layout,
-				      sizeof(info->dir_layout), bad);
-	else
-		memset(&info->dir_layout, 0, sizeof(info->dir_layout));
-
+	ceph_decode_copy_safe(p, end, &info->dir_layout,
+			      sizeof(info->dir_layout), bad);
 	ceph_decode_32_safe(p, end, info->xattr_len, bad);
 	ceph_decode_need(p, end, info->xattr_len, bad);
 	info->xattr_data = *p;
@@ -3182,10 +3178,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
 	recon_state.pagelist = pagelist;
 	if (session->s_con.peer_features & CEPH_FEATURE_MDSENC)
 		recon_state.msg_version = 3;
-	else if (session->s_con.peer_features & CEPH_FEATURE_FLOCK)
-		recon_state.msg_version = 2;
 	else
-		recon_state.msg_version = 1;
+		recon_state.msg_version = 2;
 	err = iterate_session_caps(session, encode_caps_cb, &recon_state);
 	if (err < 0)
 		goto fail;
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 6b92b3395fa9..65a38c4a02a1 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -213,12 +213,6 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
 	 CEPH_FEATURE_NEW_OSDOPREPLY_ENCODING |	\
 	 CEPH_FEATURE_CEPHX_V2)
 
-#define CEPH_FEATURES_REQUIRED_DEFAULT   \
-	(CEPH_FEATURE_NOSRCADDR |	 \
-	 CEPH_FEATURE_SUBSCRIBE2 |	 \
-	 CEPH_FEATURE_RECONNECT_SEQ |	 \
-	 CEPH_FEATURE_PGID64 |		 \
-	 CEPH_FEATURE_PGPOOL3 |		 \
-	 CEPH_FEATURE_OSDENC)
+#define CEPH_FEATURES_REQUIRED_DEFAULT	0
 
 #endif
-- 
cgit v1.2.3